7 files changed, 622 insertions, 6 deletions
diff --git a/scripts/README b/scripts/README
new file mode 100644
index 000000000..70c763923
--- /dev/null
+++ b/scripts/README
@@ -0,0 +1,58 @@
+The scripts directory holds tools for use in building, generating, testing,
+and maintaining the Tor source code.  It is mainly for use by developers.
+
+Code maintenance scripts
+------------------------
+
+maint/checkLogs.pl -- Verify that Tor log statements are unique.
+
+maint/check_config_macros.pl -- Look for autoconf tests whose results are
+never used.
+
+maint/checkOptionDocs.pl -- Make sure that Tor options are documented in the
+manpage, and that the manpage only documents real Tor options.
+
+maint/checkSpaces.pl -- Style checker for the Tor source code.  Mainly checks
+whitespace.
+
+maint/findMergedChanges.pl -- Find a set of changes/* files that have been
+merged into an upstream version.
+
+maint/format_changelog.py -- Flow the changelog into the proper format.
+
+maint/redox.py -- Find places that should have DOCDOC comments to indicate a
+need for doxygen comments, and put those comments there.
+
+maint/updateVersions.pl -- Update the version number in the .nsi and windows
+orconfig.h files.
+
+
+Testing scripts
+---------------
+
+test/cov-blame -- Mash up the results of gcov with git blame.  Mainly useful
+to find out who has been writing untested code.
+
+test/cov-diff -- Compare two directories of gcov files to identify changed
+lines without coverage.
+
+test/coverage -- Generates a directory full of gcov files. You need to use
+this script instead of calling gcov directly because of our confusingly named
+object files.
+
+test/scan-build.sh -- Example script for invoking clang's scan-build
+static analysis tools.
+
+
+Code generation scripts
+-----------------------
+
+codegen/gen_linux_syscalls.pl -- Generate a table mapping linux syscall
+numbers to their names.
+
+codegen/gen_server_ciphers.py -- Generate a sorted list of TLS ciphersuites
+for servers to choose from.
+
+codegen/get_mozilla_ciphers.py -- Generate a list of TLS ciphersuites for
+clients to use in order to look like Firefox.
+
diff --git a/scripts/codegen/gen_linux_syscalls.pl b/scripts/codegen/gen_linux_syscalls.pl
new file mode 100755
index 000000000..f985bad6c
--- /dev/null
+++ b/scripts/codegen/gen_linux_syscalls.pl
@@ -0,0 +1,37 @@
+#!/usr/bin/perl -w
+
+use strict;
+my %syscalls = ();
+
+while (<>) {
+    if (/^#define (__NR_\w+) /) {
+	$syscalls{$1} = 1;
+    }
+}
+
+print <<EOL;
+/* Automatically generated with
+        gen_linux_syscalls.pl /usr/include/asm/unistd*.h
+   Do not edit.
+ */
+static const struct {
+  int syscall_num; const char *syscall_name;
+} SYSCALLS_BY_NUMBER[] = {
+EOL
+
+for my $k (sort keys %syscalls) {
+    my $name = $k;
+    $name =~ s/^__NR_//;
+    print <<EOL;
+#ifdef $k
+  { $k, "$name" },
+#endif
+EOL
+
+}
+
+print <<EOL
+  {0, NULL}
+};
+
+EOL
diff --git a/scripts/codegen/gen_server_ciphers.py b/scripts/codegen/gen_server_ciphers.py
new file mode 100755
index 000000000..97ed9d046
--- /dev/null
+++ b/scripts/codegen/gen_server_ciphers.py
@@ -0,0 +1,115 @@
+#!/usr/bin/python
+# Copyright 2014, The Tor Project, Inc
+# See LICENSE for licensing information
+
+# This script parses openssl headers to find ciphersuite names, determines
+# which ones we should be willing to use as a server, and sorts them according
+# to preference rules.
+#
+# Run it on all the files in your openssl include directory.
+
+import re
+import sys
+
+EPHEMERAL_INDICATORS = [ "_EDH_", "_DHE_", "_ECDHE_" ]
+BAD_STUFF = [ "_DES_40_", "MD5", "_RC4_", "_DES_64_",
+              "_SEED_", "_CAMELLIA_", "_NULL" ]
+
+# these never get #ifdeffed.
+MANDATORY = [
+    "TLS1_TXT_DHE_RSA_WITH_AES_256_SHA",
+    "TLS1_TXT_DHE_RSA_WITH_AES_128_SHA",
+    "SSL3_TXT_EDH_RSA_DES_192_CBC3_SHA",
+]
+
+def find_ciphers(filename):
+    with open(filename) as f:
+        for line in f:
+            m = re.search(r'(?:SSL3|TLS1)_TXT_\w+', line)
+            if m:
+                yield m.group(0)
+
+def usable_cipher(ciph):
+    ephemeral = False
+    for e in EPHEMERAL_INDICATORS:
+        if e in ciph:
+            ephemeral = True
+    if not ephemeral:
+        return False
+
+    if "_RSA_" not in ciph:
+        return False
+
+    for b in BAD_STUFF:
+        if b in ciph:
+            return False
+    return True
+
+# All fields we sort on, in order of priority.
+FIELDS = [ 'cipher', 'fwsec', 'mode',  'digest', 'bitlength' ]
+# Map from sorted fields to recognized value in descending order of goodness
+FIELD_VALS = { 'cipher' : [ 'AES', 'DES'],
+               'fwsec' : [ 'ECDHE', 'DHE' ],
+               'mode' : [ 'GCM', 'CBC' ],
+               'digest' : [ 'SHA384', 'SHA256', 'SHA' ],
+               'bitlength' : [ '256', '128', '192' ],
+}
+
+class Ciphersuite(object):
+    def __init__(self, name, fwsec, cipher, bitlength, mode, digest):
+        self.name = name
+        self.fwsec = fwsec
+        self.cipher = cipher
+        self.bitlength = bitlength
+        self.mode = mode
+        self.digest = digest
+
+        for f in FIELDS:
+            assert(getattr(self, f) in FIELD_VALS[f])
+
+    def sort_key(self):
+        return tuple(FIELD_VALS[f].index(getattr(self,f)) for f in FIELDS)
+
+
+def parse_cipher(ciph):
+    m = re.match('(?:TLS1|SSL3)_TXT_(EDH|DHE|ECDHE)_RSA(?:_WITH)?_(AES|DES)_(256|128|192)(|_CBC|_CBC3|_GCM)_(SHA|SHA256|SHA384)$', ciph)
+
+    if not m:
+        print "/* Couldn't parse %s ! */"%ciph
+        return None
+
+    fwsec, cipher, bits, mode, digest = m.groups()
+    if fwsec == 'EDH':
+        fwsec = 'DHE'
+
+    if mode in [ '_CBC3', '_CBC', '' ]:
+        mode = 'CBC'
+    elif mode == '_GCM':
+        mode = 'GCM'
+
+    return Ciphersuite(ciph, fwsec, cipher, bits, mode, digest)
+
+ALL_CIPHERS = []
+
+for fname in sys.argv[1:]:
+    ALL_CIPHERS += (parse_cipher(c)
+                           for c in find_ciphers(fname)
+                           if usable_cipher(c) )
+
+ALL_CIPHERS.sort(key=Ciphersuite.sort_key)
+
+for c in ALL_CIPHERS:
+    if c is ALL_CIPHERS[-1]:
+        colon = ';'
+    else:
+        colon = ' ":"'
+
+    if c.name in MANDATORY:
+        print "       /* Required */"
+        print '       %s%s'%(c.name,colon)
+    else:
+        print "#ifdef %s"%c.name
+        print '       %s%s'%(c.name,colon)
+        print "#endif"
+
+
diff --git a/scripts/codegen/get_mozilla_ciphers.py b/scripts/codegen/get_mozilla_ciphers.py
new file mode 100644
index 000000000..0636eb365
--- /dev/null
+++ b/scripts/codegen/get_mozilla_ciphers.py
@@ -0,0 +1,210 @@
+#!/usr/bin/python
+# coding=utf-8
+# Copyright 2011, The Tor Project, Inc
+# original version by Arturo Filastò
+# See LICENSE for licensing information
+
+# This script parses Firefox and OpenSSL sources, and uses this information
+# to generate a ciphers.inc file.
+#
+# It takes two arguments: the location of a firefox source directory, and the
+# location of an openssl source directory.
+
+import os
+import re
+import sys
+
+if len(sys.argv) != 3:
+    print >>sys.stderr, "Syntax: get_mozilla_ciphers.py <firefox-source-dir> <openssl-source-dir>"
+    sys.exit(1)
+
+ff_root = sys.argv[1]
+ossl_root = sys.argv[2]
+
+def ff(s):
+    return os.path.join(ff_root, s)
+def ossl(s):
+    return os.path.join(ossl_root, s)
+
+#####
+# Read the cpp file to understand what Ciphers map to what name :
+# Make "ciphers" a map from name used in the javascript to a cipher macro name
+fileA = open(ff('security/manager/ssl/src/nsNSSComponent.cpp'),'r')
+
+# The input format is a file containing exactly one section of the form:
+# static CipherPref CipherPrefs[] = {
+#  {"name", MACRO_NAME}, // comment
+#  ...
+#  {NULL, 0}
+# }
+
+inCipherSection = False
+cipherLines = []
+for line in fileA:
+    if line.startswith('static const CipherPref sCipherPrefs[]'):
+        # Get the starting boundary of the Cipher Preferences
+        inCipherSection = True
+    elif inCipherSection:
+        line = line.strip()
+        if line.startswith('{ nullptr, 0}'):
+            # At the ending boundary of the Cipher Prefs
+            break
+        else:
+            cipherLines.append(line)
+fileA.close()
+
+# Parse the lines and put them into a dict
+ciphers = {}
+cipher_pref = {}
+key_pending = None
+for line in cipherLines:
+    m = re.search(r'^{\s*\"([^\"]+)\",\s*(\S+)\s*(?:,\s*(true|false))?\s*}', line)
+    if m:
+        assert not key_pending
+        key,value,enabled = m.groups()
+        if enabled == 'true':
+            ciphers[key] = value
+            cipher_pref[value] = key
+        continue
+    m = re.search(r'^{\s*\"([^\"]+)\",', line)
+    if m:
+        assert not key_pending
+        key_pending = m.group(1)
+        continue
+    m = re.search(r'^\s*(\S+)(?:,\s*(true|false))?\s*}', line)
+    if m:
+        assert key_pending
+        key = key_pending
+        value,enabled = m.groups()
+        key_pending = None
+        if enabled == 'true':
+            ciphers[key] = value
+            cipher_pref[value] = key
+
+####
+# Now find the correct order for the ciphers
+fileC = open(ff('security/nss/lib/ssl/ssl3con.c'), 'r')
+firefox_ciphers = []
+inEnum=False
+for line in fileC:
+    if not inEnum:
+        if "ssl3CipherSuiteCfg cipherSuites[" in line:
+            inEnum = True
+        continue
+
+    if line.startswith("};"):
+        break
+
+    m = re.match(r'^\s*\{\s*([A-Z_0-9]+),', line)
+    if m:
+        firefox_ciphers.append(m.group(1))
+
+fileC.close()
+
+#####
+# Read the JS file to understand what ciphers are enabled.  The format is
+#  pref("name", true/false);
+# Build a map enabled_ciphers from javascript name to "true" or "false",
+# and an (unordered!) list of the macro names for those ciphers that are
+# enabled.
+fileB = open(ff('netwerk/base/public/security-prefs.js'), 'r')
+
+enabled_ciphers = {}
+for line in fileB:
+    m = re.match(r'pref\(\"([^\"]+)\"\s*,\s*(\S*)\s*\)', line)
+    if not m:
+        continue
+    key, val = m.groups()
+    if key.startswith("security.ssl3"):
+        enabled_ciphers[key] = val
+fileB.close()
+
+used_ciphers = []
+for k, v in enabled_ciphers.items():
+    if v == "true":
+        used_ciphers.append(ciphers[k])
+
+#oSSLinclude = ('/usr/include/openssl/ssl3.h', '/usr/include/openssl/ssl.h',
+#               '/usr/include/openssl/ssl2.h', '/usr/include/openssl/ssl23.h',
+#               '/usr/include/openssl/tls1.h')
+oSSLinclude = ('ssl/ssl3.h', 'ssl/ssl.h',
+               'ssl/ssl2.h', 'ssl/ssl23.h',
+               'ssl/tls1.h')
+
+#####
+# This reads the hex code for the ciphers that are used by firefox.
+# sslProtoD is set to a map from macro name to macro value in sslproto.h;
+# cipher_codes is set to an (unordered!) list of these hex values.
+sslProto = open(ff('security/nss/lib/ssl/sslproto.h'), 'r')
+sslProtoD = {}
+
+for line in sslProto:
+    m = re.match('#define\s+(\S+)\s+(\S+)', line)
+    if m:
+        key, value = m.groups()
+        sslProtoD[key] = value
+sslProto.close()
+
+cipher_codes = []
+for x in used_ciphers:
+    cipher_codes.append(sslProtoD[x].lower())
+
+####
+# Now read through all the openssl include files, and try to find the openssl
+# macro names for those files.
+openssl_macro_by_hex = {}
+all_openssl_macros = {}
+for fl in oSSLinclude:
+    fp = open(ossl(fl), 'r')
+    for line in fp.readlines():
+        m = re.match('#define\s+(\S+)\s+(\S+)', line)
+        if m:
+            value,key = m.groups()
+            if key.startswith('0x') and "_CK_" in value:
+                key = key.replace('0x0300','0x').lower()
+                #print "%s %s" % (key, value)
+                openssl_macro_by_hex[key] = value
+            all_openssl_macros[value]=key
+    fp.close()
+
+# Now generate the output.
+print """\
+/* This is an include file used to define the list of ciphers clients should
+ * advertise.  Before including it, you should define the CIPHER and XCIPHER
+ * macros.
+ *
+ * This file was automatically generated by get_mozilla_ciphers.py.
+ */"""
+# Go in order by the order in CipherPrefs
+for firefox_macro in firefox_ciphers:
+
+    try:
+        js_cipher_name = cipher_pref[firefox_macro]
+    except KeyError:
+        # This one has no javascript preference.
+        continue
+
+    # The cipher needs to be enabled in security-prefs.js
+    if enabled_ciphers.get(js_cipher_name, 'false') != 'true':
+        continue
+
+    hexval = sslProtoD[firefox_macro].lower()
+
+    try:
+        openssl_macro = openssl_macro_by_hex[hexval.lower()]
+        openssl_macro = openssl_macro.replace("_CK_", "_TXT_")
+        if openssl_macro not in all_openssl_macros:
+            raise KeyError()
+        format = {'hex':hexval, 'macro':openssl_macro, 'note':""}
+    except KeyError:
+        # openssl doesn't have a macro for this.
+        format = {'hex':hexval, 'macro':firefox_macro,
+                  'note':"/* No openssl macro found for "+hexval+" */\n"}
+
+    res = """\
+%(note)s#ifdef %(macro)s
+    CIPHER(%(hex)s, %(macro)s)
+#else
+   XCIPHER(%(hex)s, %(macro)s)
+#endif""" % format
+    print res
diff --git a/scripts/maint/check_config_macros.pl b/scripts/maint/check_config_macros.pl
new file mode 100755
index 000000000..bcde2becc
--- /dev/null
+++ b/scripts/maint/check_config_macros.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my @macros = ();
+
+open(F, 'orconfig.h.in');
+while(<F>) {
+    if (/^#undef +([A-Za-z0-9_]*)/) {
+	push @macros, $1;
+    }
+}
+close F;
+
+for my $m (@macros) {
+    my $s = `git grep '$m' src`;
+    if ($s eq '') {
+	print "Unused: $m\n";
+    }
+}
diff --git a/scripts/maint/format_changelog.py b/scripts/maint/format_changelog.py
index 6997d958a..86f5c5039 100755
--- a/scripts/maint/format_changelog.py
+++ b/scripts/maint/format_changelog.py
@@ -12,7 +12,142 @@
 import os
 import re
 import sys
-import textwrap
+
+# ==============================
+# Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping
+# algorithm, with totally ad hoc parameters!
+#
+# We're trying to minimize:
+#    The total of the cubes of ragged space on underflowed intermediate lines,
+#  PLUS
+#    100 * the fourth power of overflowed characters
+#  PLUS
+#    .1 * a bit more than the cube of ragged space on the last line.
+#  PLUS
+#    OPENPAREN_PENALTY for each line that starts with (
+#
+# We use an obvious dynamic programming algorithm to sorta approximate this.
+# It's not coded right or optimally, but it's fast enough for changelogs
+#
+# (Code found in an old directory of mine, lightly cleaned. -NM)
+
+NO_HYPHENATE=set("""
+pf-divert
+""".split())
+
+LASTLINE_UNDERFLOW_EXPONENT = 1
+LASTLINE_UNDERFLOW_PENALTY = 1
+
+UNDERFLOW_EXPONENT = 3
+UNDERFLOW_PENALTY = 1
+
+OVERFLOW_EXPONENT = 4
+OVERFLOW_PENALTY = 2000
+
+ORPHAN_PENALTY = 10000
+
+OPENPAREN_PENALTY = 200
+
+def generate_wrapping(words, divisions):
+    lines = []
+    last = 0
+    for i in divisions:
+        w = words[last:i]
+        last = i
+        line = " ".join(w).replace("\xff ","-").replace("\xff","-")
+        lines.append(line)
+    return lines
+
+def wrapping_quality(words, divisions, width1, width2):
+    total = 0.0
+
+    lines = generate_wrapping(words, divisions)
+    for line in lines:
+        length = len(line)
+        if line is lines[0]:
+            width = width1
+        else:
+            width = width2
+
+        if line[0:1] == '(':
+            total += OPENPAREN_PENALTY
+
+        if length > width:
+            total += OVERFLOW_PENALTY * (
+                (length - width) ** OVERFLOW_EXPONENT )
+        else:
+            if line is lines[-1]:
+                e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
+                if " " not in line:
+                    total += ORPHAN_PENALTY
+            else:
+                e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
+
+            total += p * ((width - length) ** e)
+
+    return total
+
+def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
+    wrapping_after = [ (0,), ]
+
+    w1 = width - prefix_len1
+    w2 = width - prefix_len2
+
+    for i in range(1, len(words)+1):
+        best_so_far = None
+        best_score = 1e300
+        for j in range(i):
+            t = wrapping_after[j]
+            t1 = t[:-1] + (i,)
+            t2 = t + (i,)
+            wq1 = wrapping_quality(words, t1, w1, w2)
+            wq2 = wrapping_quality(words, t2, w1, w2)
+
+            if wq1 < best_score:
+                best_so_far = t1
+                best_score = wq1
+            if wq2 < best_score:
+                best_so_far = t2
+                best_score = wq2
+        wrapping_after.append( best_so_far )
+
+    lines = generate_wrapping(words, wrapping_after[-1])
+
+    return lines
+
+def hyphenateable(word):
+    if re.match(r'^[^\d\-]\D*-', word):
+        stripped = re.sub(r'^\W+','',word)
+        stripped = re.sub(r'\W+$','',word)
+        return stripped not in NO_HYPHENATE
+    else:
+        return False
+
+def split_paragraph(s):
+    "Split paragraph into words; tuned for Tor."
+
+    r = []
+    for word in s.split():
+        if hyphenateable(word):
+            while "-" in word:
+                a,word = word.split("-",1)
+                r.append(a+"\xff")
+        r.append(word)
+    return r
+
+def fill(text, width, initial_indent, subsequent_indent):
+    words = split_paragraph(text)
+    lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
+                      width)
+    res = [ initial_indent, lines[0], "\n" ]
+    for line in lines[1:]:
+        res.append(subsequent_indent)
+        res.append(line)
+        res.append("\n")
+    return "".join(res)
+
+# ==============================
+
 
 TP_MAINHEAD = 0
 TP_HEADTEXT = 1
@@ -83,7 +218,7 @@ class ChangeLog(object):
         elif tp == TP_ITEMBODY:
             if self.curgraf is None:
                 self.curgraf = []
-                self.cursection[2][1][-1].append(self.curgraf)
+                self.cursection[2][-1][1].append(self.curgraf)
             self.curgraf.append(line)
 
         else:
@@ -108,10 +243,11 @@ class ChangeLog(object):
         if indent2 == -1:
             indent2 = indent1
         text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
-        print textwrap.fill(text, width=72,
-                            initial_indent=" "*indent1,
-                            subsequent_indent=" "*indent2,
-                            break_on_hyphens=False)
+
+        sys.stdout.write(fill(text,
+                              width=72,
+                              initial_indent=" "*indent1,
+                              subsequent_indent=" "*indent2))
 
     def dump(self):
         print self.mainhead
diff --git a/scripts/maint/sortChanges.py b/scripts/maint/sortChanges.py
new file mode 100755
index 000000000..f70490bad
--- /dev/null
+++ b/scripts/maint/sortChanges.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+
+import re
+import sys
+
+def fetch(fn):
+    with open(fn) as f:
+        s = f.read()
+        s = "%s\n" % s.rstrip()
+        return s
+
+def score(s):
+    m = re.match(r'^ +o (.*)', s)
+    if not m:
+        print >>sys.stderr, "Can't score %r"%s
+    lw = m.group(1).lower()
+    if lw.startswith("major feature"):
+        score = 0
+    elif lw.startswith("major bug"):
+        score = 1
+    elif lw.startswith("major"):
+        score = 2
+    elif lw.startswith("minor feature"):
+        score = 10
+    elif lw.startswith("minor bug"):
+        score = 11
+    elif lw.startswith("minor"):
+        score = 12
+    else:
+        score = 100
+
+    return (score,  lw, s)
+
+
+changes = [ score(fetch(fn)) for fn in sys.argv[1:] if not fn.endswith('~') ]
+
+changes.sort()
+
+for _, _, s in changes:
+    print s