From 8a173635bcd85ad7375d996fb39efeaddd8c611d Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 2 May 2014 12:50:23 -0400 Subject: Tweak the changelog formatter a little. (I had a bad clone of Knuth's algorithm sitting around in an old code repository of mine. I added orphan detection and smarter hyphenation; it seems to give marginally better results than we had before.) --- scripts/maint/format_changelog.py | 139 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/scripts/maint/format_changelog.py b/scripts/maint/format_changelog.py index 6997d958a..35044b318 100755 --- a/scripts/maint/format_changelog.py +++ b/scripts/maint/format_changelog.py @@ -12,7 +12,135 @@ import os import re import sys -import textwrap + +# ============================== +# Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping +# algorithm, with totally ad hoc parameters! +# +# We're trying to minimize: +# The total of the cubes of ragged space on underflowed intermediate lines, +# PLUS +# 100 * the fourth power of overflowed characters +# PLUS +# .1 * a bit more than the cube of ragged space on the last line. +# +# We use an obvious dynamic programming algorithm to sorta approximate this. +# It's not coded right or optimally, but it's fast enough for changelogs +# +# (Code found in an old directory of mine, lightly cleaned. -NM) + +NO_HYPHENATE=set(""" +pf-divert +""".split()) + +LASTLINE_UNDERFLOW_EXPONENT = 1 +LASTLINE_UNDERFLOW_PENALTY = 1 + +UNDERFLOW_EXPONENT = 3 +UNDERFLOW_PENALTY = 1 + +OVERFLOW_EXPONENT = 4 +OVERFLOW_PENALTY = 2000 + +ORPHAN_PENALTY = 10000 + +def generate_wrapping(words, divisions): + lines = [] + last = 0 + for i in divisions: + w = words[last:i] + last = i + line = " ".join(w).replace("\xff ","-").replace("\xff","-") + lines.append(line) + return lines + +def wrapping_quality(words, divisions, width1, width2): + total = 0.0 + + lines = generate_wrapping(words, divisions) + for line in lines: + length = len(line) + if line is lines[0]: + width = width1 + else: + width = width2 + + if length > width: + total += OVERFLOW_PENALTY * ( + (length - width) ** OVERFLOW_EXPONENT ) + else: + if line is lines[-1]: + e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY) + if " " not in line: + total += ORPHAN_PENALTY + else: + e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY) + + total += p * ((width - length) ** e) + + return total + +def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72): + wrapping_after = [ (0,), ] + + w1 = width - prefix_len1 + w2 = width - prefix_len2 + + for i in range(1, len(words)+1): + best_so_far = None + best_score = 1e300 + for j in range(i): + t = wrapping_after[j] + t1 = t[:-1] + (i,) + t2 = t + (i,) + wq1 = wrapping_quality(words, t1, w1, w2) + wq2 = wrapping_quality(words, t2, w1, w2) + + if wq1 < best_score: + best_so_far = t1 + best_score = wq1 + if wq2 < best_score: + best_so_far = t2 + best_score = wq2 + wrapping_after.append( best_so_far ) + + lines = generate_wrapping(words, wrapping_after[-1]) + + return lines + +def hyphenateable(word): + if re.match(r'^[^\d\-].*-', word): + stripped = re.sub(r'^\W+','',word) + stripped = re.sub(r'\W+$','',word) + return stripped not in NO_HYPHENATE + else: + return False + +def split_paragraph(s): + "Split paragraph into words; tuned for Tor." + + r = [] + for word in s.split(): + if hyphenateable(word): + while "-" in word: + a,word = word.split("-",1) + r.append(a+"\xff") + r.append(word) + return r + +def fill(text, width, initial_indent, subsequent_indent): + words = split_paragraph(text) + lines = wrap_graf(words, len(initial_indent), len(subsequent_indent), + width) + res = [ initial_indent, lines[0], "\n" ] + for line in lines[1:]: + res.append(subsequent_indent) + res.append(line) + res.append("\n") + return "".join(res) + +# ============================== + TP_MAINHEAD = 0 TP_HEADTEXT = 1 @@ -108,10 +236,11 @@ class ChangeLog(object): if indent2 == -1: indent2 = indent1 text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par) - print textwrap.fill(text, width=72, - initial_indent=" "*indent1, - subsequent_indent=" "*indent2, - break_on_hyphens=False) + + sys.stdout.write(fill(text, + width=72, + initial_indent=" "*indent1, + subsequent_indent=" "*indent2)) def dump(self): print self.mainhead -- cgit v1.2.3