1 files changed, 298 insertions, 0 deletions
diff --git a/scripts/maint/format_changelog.py b/scripts/maint/format_changelog.py
new file mode 100755
index 000000000..86f5c5039
--- /dev/null
+++ b/scripts/maint/format_changelog.py
@@ -0,0 +1,298 @@
+#!/usr/bin/python
+# Copyright (c) 2014, The Tor Project, Inc.
+# See LICENSE for licensing information
+#
+# This script reformats a section of the changelog to wrap everything to
+# the right width and put blank lines in the right places.  Eventually,
+# it might include a linter.
+#
+# To run it, pipe a section of the changelog (starting with "Changes
+# in Tor 0.x.y.z-alpha" through the script.)
+
+import os
+import re
+import sys
+
+# ==============================
+# Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping
+# algorithm, with totally ad hoc parameters!
+#
+# We're trying to minimize:
+#    The total of the cubes of ragged space on underflowed intermediate lines,
+#  PLUS
+#    100 * the fourth power of overflowed characters
+#  PLUS
+#    .1 * a bit more than the cube of ragged space on the last line.
+#  PLUS
+#    OPENPAREN_PENALTY for each line that starts with (
+#
+# We use an obvious dynamic programming algorithm to sorta approximate this.
+# It's not coded right or optimally, but it's fast enough for changelogs
+#
+# (Code found in an old directory of mine, lightly cleaned. -NM)
+
+NO_HYPHENATE=set("""
+pf-divert
+""".split())
+
+LASTLINE_UNDERFLOW_EXPONENT = 1
+LASTLINE_UNDERFLOW_PENALTY = 1
+
+UNDERFLOW_EXPONENT = 3
+UNDERFLOW_PENALTY = 1
+
+OVERFLOW_EXPONENT = 4
+OVERFLOW_PENALTY = 2000
+
+ORPHAN_PENALTY = 10000
+
+OPENPAREN_PENALTY = 200
+
+def generate_wrapping(words, divisions):
+    lines = []
+    last = 0
+    for i in divisions:
+        w = words[last:i]
+        last = i
+        line = " ".join(w).replace("\xff ","-").replace("\xff","-")
+        lines.append(line)
+    return lines
+
+def wrapping_quality(words, divisions, width1, width2):
+    total = 0.0
+
+    lines = generate_wrapping(words, divisions)
+    for line in lines:
+        length = len(line)
+        if line is lines[0]:
+            width = width1
+        else:
+            width = width2
+
+        if line[0:1] == '(':
+            total += OPENPAREN_PENALTY
+
+        if length > width:
+            total += OVERFLOW_PENALTY * (
+                (length - width) ** OVERFLOW_EXPONENT )
+        else:
+            if line is lines[-1]:
+                e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
+                if " " not in line:
+                    total += ORPHAN_PENALTY
+            else:
+                e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
+
+            total += p * ((width - length) ** e)
+
+    return total
+
+def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
+    wrapping_after = [ (0,), ]
+
+    w1 = width - prefix_len1
+    w2 = width - prefix_len2
+
+    for i in range(1, len(words)+1):
+        best_so_far = None
+        best_score = 1e300
+        for j in range(i):
+            t = wrapping_after[j]
+            t1 = t[:-1] + (i,)
+            t2 = t + (i,)
+            wq1 = wrapping_quality(words, t1, w1, w2)
+            wq2 = wrapping_quality(words, t2, w1, w2)
+
+            if wq1 < best_score:
+                best_so_far = t1
+                best_score = wq1
+            if wq2 < best_score:
+                best_so_far = t2
+                best_score = wq2
+        wrapping_after.append( best_so_far )
+
+    lines = generate_wrapping(words, wrapping_after[-1])
+
+    return lines
+
+def hyphenateable(word):
+    if re.match(r'^[^\d\-]\D*-', word):
+        stripped = re.sub(r'^\W+','',word)
+        stripped = re.sub(r'\W+$','',word)
+        return stripped not in NO_HYPHENATE
+    else:
+        return False
+
+def split_paragraph(s):
+    "Split paragraph into words; tuned for Tor."
+
+    r = []
+    for word in s.split():
+        if hyphenateable(word):
+            while "-" in word:
+                a,word = word.split("-",1)
+                r.append(a+"\xff")
+        r.append(word)
+    return r
+
+def fill(text, width, initial_indent, subsequent_indent):
+    words = split_paragraph(text)
+    lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
+                      width)
+    res = [ initial_indent, lines[0], "\n" ]
+    for line in lines[1:]:
+        res.append(subsequent_indent)
+        res.append(line)
+        res.append("\n")
+    return "".join(res)
+
+# ==============================
+
+
+TP_MAINHEAD = 0
+TP_HEADTEXT = 1
+TP_BLANK = 2
+TP_SECHEAD = 3
+TP_ITEMFIRST = 4
+TP_ITEMBODY = 5
+TP_END = 6
+
+def head_parser(line):
+    if re.match(r'^[A-Z]', line):
+        return TP_MAINHEAD
+    elif re.match(r'^  o ', line):
+        return TP_SECHEAD
+    elif re.match(r'^\s*$', line):
+        return TP_BLANK
+    else:
+        return TP_HEADTEXT
+
+def body_parser(line):
+    if re.match(r'^  o ', line):
+        return TP_SECHEAD
+    elif re.match(r'^    -',line):
+        return TP_ITEMFIRST
+    elif re.match(r'^      \S', line):
+        return TP_ITEMBODY
+    elif re.match(r'^\s*$', line):
+        return TP_BLANK
+    elif re.match(r'^Changes in', line):
+        return TP_END
+    else:
+        print "Weird line %r"%line
+
+class ChangeLog(object):
+    def __init__(self):
+        self.mainhead = None
+        self.headtext = []
+        self.curgraf = None
+        self.sections = []
+        self.cursection = None
+        self.lineno = 0
+
+    def addLine(self, tp, line):
+        self.lineno += 1
+
+        if tp == TP_MAINHEAD:
+            assert not self.mainhead
+            self.mainhead = line
+
+        elif tp == TP_HEADTEXT:
+            if self.curgraf is None:
+                self.curgraf = []
+                self.headtext.append(self.curgraf)
+            self.curgraf.append(line)
+
+        elif tp == TP_BLANK:
+            self.curgraf = None
+
+        elif tp == TP_SECHEAD:
+            self.cursection = [ self.lineno, line, [] ]
+            self.sections.append(self.cursection)
+
+        elif tp == TP_ITEMFIRST:
+            item = ( self.lineno, [ [line] ])
+            self.curgraf = item[1][0]
+            self.cursection[2].append(item)
+
+        elif tp == TP_ITEMBODY:
+            if self.curgraf is None:
+                self.curgraf = []
+                self.cursection[2][-1][1].append(self.curgraf)
+            self.curgraf.append(line)
+
+        else:
+            assert "This" is "unreachable"
+
+    def lint_head(self, line, head):
+        m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
+        if not m:
+            print >>sys.stderr, "Weird header format on line %s"%line
+
+    def lint_item(self, line, grafs, head_type):
+        pass
+
+    def lint(self):
+        self.head_lines = {}
+        for sec_line, sec_head, items in self.sections:
+            head_type = self.lint_head(sec_line, sec_head)
+            for item_line, grafs in items:
+                self.lint_item(item_line, grafs, head_type)
+
+    def dumpGraf(self,par,indent1,indent2=-1):
+        if indent2 == -1:
+            indent2 = indent1
+        text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
+
+        sys.stdout.write(fill(text,
+                              width=72,
+                              initial_indent=" "*indent1,
+                              subsequent_indent=" "*indent2))
+
+    def dump(self):
+        print self.mainhead
+        for par in self.headtext:
+            self.dumpGraf(par, 2)
+            print
+        for _,head,items in self.sections:
+            if not head.endswith(':'):
+                print >>sys.stderr, "adding : to %r"%head
+                head = head + ":"
+            print head
+            for _,grafs in items:
+                self.dumpGraf(grafs[0],4,6)
+                for par in grafs[1:]:
+                    print
+                    self.dumpGraf(par,6,6)
+            print
+        print
+
+CL = ChangeLog()
+parser = head_parser
+
+sys.stdin = open('ChangeLog', 'r')
+
+for line in sys.stdin:
+    line = line.rstrip()
+    tp = parser(line)
+
+    if tp == TP_SECHEAD:
+        parser = body_parser
+    elif tp == TP_END:
+        nextline = line
+        break
+
+    CL.addLine(tp,line)
+
+CL.lint()
+
+sys.stdout = open('ChangeLog.new', 'w')
+
+CL.dump()
+
+print nextline
+
+for line in sys.stdin:
+    sys.stdout.write(line)
+
+os.rename('ChangeLog.new', 'ChangeLog')