From f216e4b5861066eeab6be778a5be196991192653 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 30 Nov 2016 19:00:25 +0000 Subject: hasher: Create hasher module This exposes the hashing functionality of Patchwork without requiring Django or similar dependencies. Signed-off-by: Stephen Finucane Tested-by: Tom Rini --- patchwork/hasher.py | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++ patchwork/models.py | 52 ++------------------------------ patchwork/parser.py | 2 -- 3 files changed, 89 insertions(+), 52 deletions(-) create mode 100644 patchwork/hasher.py diff --git a/patchwork/hasher.py b/patchwork/hasher.py new file mode 100644 index 0000000..d9bb6c2 --- /dev/null +++ b/patchwork/hasher.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# +# Patchwork - automated patch tracking system +# Copyright (C) 2008 Jeremy Kerr +# +# This file is part of the Patchwork package. +# +# Patchwork is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Patchwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Patchwork; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Hash generation for diffs.""" + +import hashlib +import re +import sys + +HUNK_RE = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') +FILENAME_RE = re.compile(r'^(---|\+\+\+) (\S+)') + + +def hash_diff(diff): + """Generate a hash from a diff.""" + + # normalise spaces + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' + + prefixes = ['-', '+', ' '] + hashed = hashlib.sha1() + + for line in diff.split('\n'): + if len(line) <= 0: + continue + + hunk_match = HUNK_RE.match(line) + filename_match = FILENAME_RE.match(line) + + if filename_match: + # normalise -p1 top-directories + if filename_match.group(1) == '---': + filename = 'a/' + else: + filename = 'b/' + filename += '/'.join(filename_match.group(2).split('/')[1:]) + + line = filename_match.group(1) + ' ' + filename + elif hunk_match: + # remove line numbers, but leave line counts + def fn(x): + if not x: + return 1 + return int(x) + line_nos = list(map(fn, hunk_match.groups())) + line = '@@ -%d +%d @@' % tuple(line_nos) + elif line[0] in prefixes: + # if we have a +, - or context line, leave as-is + pass + else: + # other lines are ignored + continue + + hashed.update((line + '\n').encode('utf-8')) + + return hashed.hexdigest() + + +def main(args): + """Hash a diff provided by stdin. + + This is required by scripts found in /tools + """ + print(hash_diff('\n'.join(sys.stdin.readlines()))) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/patchwork/models.py b/patchwork/models.py index 15a2936..cff9587 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -22,7 +22,6 @@ from __future__ import absolute_import from collections import Counter, OrderedDict import datetime -import hashlib import random import re @@ -35,6 +34,7 @@ from django.utils.encoding import python_2_unicode_compatible from django.utils.functional import cached_property from patchwork.fields import HashField +from patchwork.hasher import hash_diff @python_2_unicode_compatible @@ -366,54 +366,6 @@ class Patch(SeriesMixin, Submission): return counts - @staticmethod - def hash_diff(diff): - """Generate a hash from a diff.""" - hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') - filename_re = re.compile(r'^(---|\+\+\+) (\S+)') - - # normalise spaces - diff = diff.replace('\r', '') - diff = diff.strip() + '\n' - - prefixes = ['-', '+', ' '] - hash = hashlib.sha1() - - for line in diff.split('\n'): - if len(line) <= 0: - continue - - hunk_match = hunk_re.match(line) - filename_match = filename_re.match(line) - - if filename_match: - # normalise -p1 top-directories - if filename_match.group(1) == '---': - filename = 'a/' - else: - filename = 'b/' - filename += '/'.join(filename_match.group(2).split('/')[1:]) - - line = filename_match.group(1) + ' ' + filename - elif hunk_match: - # remove line numbers, but leave line counts - def fn(x): - if not x: - return 1 - return int(x) - line_nos = list(map(fn, hunk_match.groups())) - line = '@@ -%d +%d @@' % tuple(line_nos) - elif line[0] in prefixes: - # if we have a +, - or context line, leave as-is - pass - else: - # other lines are ignored - continue - - hash.update((line + '\n').encode('utf-8')) - - return hash - def _set_tag(self, tag, count): if count == 0: self.patchtag_set.filter(tag=tag).delete() @@ -441,7 +393,7 @@ class Patch(SeriesMixin, Submission): self.state = get_default_initial_patch_state() if self.hash is None and self.diff is not None: - self.hash = self.hash_diff(self.diff).hexdigest() + self.hash = hash_diff(self.diff) super(Patch, self).save(**kwargs) diff --git a/patchwork/parser.py b/patchwork/parser.py index 9d1b79e..16cc53c 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# # Patchwork - automated patch tracking system # Copyright (C) 2008 Jeremy Kerr # -- cgit v1.2.3