3 files changed, 89 insertions, 52 deletions
diff --git a/patchwork/hasher.py b/patchwork/hasher.py
new file mode 100644
index 0000000..d9bb6c2
--- /dev/null
+++ b/patchwork/hasher.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Patchwork - automated patch tracking system
+# Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Hash generation for diffs."""
+
+import hashlib
+import re
+import sys
+
+HUNK_RE = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
+FILENAME_RE = re.compile(r'^(---|\+\+\+) (\S+)')
+
+
+def hash_diff(diff):
+    """Generate a hash from a diff."""
+
+    # normalise spaces
+    diff = diff.replace('\r', '')
+    diff = diff.strip() + '\n'
+
+    prefixes = ['-', '+', ' ']
+    hashed = hashlib.sha1()
+
+    for line in diff.split('\n'):
+        if len(line) <= 0:
+            continue
+
+        hunk_match = HUNK_RE.match(line)
+        filename_match = FILENAME_RE.match(line)
+
+        if filename_match:
+            # normalise -p1 top-directories
+            if filename_match.group(1) == '---':
+                filename = 'a/'
+            else:
+                filename = 'b/'
+            filename += '/'.join(filename_match.group(2).split('/')[1:])
+
+            line = filename_match.group(1) + ' ' + filename
+        elif hunk_match:
+            # remove line numbers, but leave line counts
+            def fn(x):
+                if not x:
+                    return 1
+                return int(x)
+            line_nos = list(map(fn, hunk_match.groups()))
+            line = '@@ -%d +%d @@' % tuple(line_nos)
+        elif line[0] in prefixes:
+            # if we have a +, - or context line, leave as-is
+            pass
+        else:
+            # other lines are ignored
+            continue
+
+        hashed.update((line + '\n').encode('utf-8'))
+
+    return hashed.hexdigest()
+
+
+def main(args):
+    """Hash a diff provided by stdin.
+
+    This is required by scripts found in /tools
+    """
+    print(hash_diff('\n'.join(sys.stdin.readlines())))
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/patchwork/models.py b/patchwork/models.py
index 15a2936..cff9587 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -22,7 +22,6 @@ from __future__ import absolute_import
 
 from collections import Counter, OrderedDict
 import datetime
-import hashlib
 import random
 import re
 
@@ -35,6 +34,7 @@ from django.utils.encoding import python_2_unicode_compatible
 from django.utils.functional import cached_property
 
 from patchwork.fields import HashField
+from patchwork.hasher import hash_diff
 
 
 @python_2_unicode_compatible
@@ -366,54 +366,6 @@ class Patch(SeriesMixin, Submission):
 
         return counts
 
-    @staticmethod
-    def hash_diff(diff):
-        """Generate a hash from a diff."""
-        hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
-        filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
-
-        # normalise spaces
-        diff = diff.replace('\r', '')
-        diff = diff.strip() + '\n'
-
-        prefixes = ['-', '+', ' ']
-        hash = hashlib.sha1()
-
-        for line in diff.split('\n'):
-            if len(line) <= 0:
-                continue
-
-            hunk_match = hunk_re.match(line)
-            filename_match = filename_re.match(line)
-
-            if filename_match:
-                # normalise -p1 top-directories
-                if filename_match.group(1) == '---':
-                    filename = 'a/'
-                else:
-                    filename = 'b/'
-                filename += '/'.join(filename_match.group(2).split('/')[1:])
-
-                line = filename_match.group(1) + ' ' + filename
-            elif hunk_match:
-                # remove line numbers, but leave line counts
-                def fn(x):
-                    if not x:
-                        return 1
-                    return int(x)
-                line_nos = list(map(fn, hunk_match.groups()))
-                line = '@@ -%d +%d @@' % tuple(line_nos)
-            elif line[0] in prefixes:
-                # if we have a +, - or context line, leave as-is
-                pass
-            else:
-                # other lines are ignored
-                continue
-
-            hash.update((line + '\n').encode('utf-8'))
-
-        return hash
-
     def _set_tag(self, tag, count):
         if count == 0:
             self.patchtag_set.filter(tag=tag).delete()
@@ -441,7 +393,7 @@ class Patch(SeriesMixin, Submission):
             self.state = get_default_initial_patch_state()
 
         if self.hash is None and self.diff is not None:
-            self.hash = self.hash_diff(self.diff).hexdigest()
+            self.hash = hash_diff(self.diff)
 
         super(Patch, self).save(**kwargs)
 
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 9d1b79e..16cc53c 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-#
 # Patchwork - automated patch tracking system
 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
 #