diff options
| -rw-r--r-- | patchwork/migrations/0039_unique_series_references.py | 121 | ||||
| -rw-r--r-- | patchwork/models.py | 6 | ||||
| -rw-r--r-- | patchwork/parser.py | 121 | ||||
| -rw-r--r-- | patchwork/tests/test_parser.py | 9 | ||||
| -rw-r--r-- | patchwork/tests/utils.py | 6 |
5 files changed, 198 insertions, 65 deletions
diff --git a/patchwork/migrations/0039_unique_series_references.py b/patchwork/migrations/0039_unique_series_references.py new file mode 100644 index 0000000..99b10fc --- /dev/null +++ b/patchwork/migrations/0039_unique_series_references.py @@ -0,0 +1,121 @@ +from django.db import connection, migrations, models +from django.db.models import Count +import django.db.models.deletion + + +def merge_duplicate_series(apps, schema_editor): + SeriesReference = apps.get_model('patchwork', 'SeriesReference') + Patch = apps.get_model('patchwork', 'Patch') + + msgid_seriesrefs = {} + + # find all SeriesReference that share a msgid but point to different series + # and decide which of the series is going to be the authoritative one + msgid_counts = ( + SeriesReference.objects.values('msgid') + .annotate(count=Count('msgid')) + .filter(count__gt=1) + ) + for msgid_count in msgid_counts: + msgid = msgid_count['msgid'] + chosen_ref = None + for series_ref in SeriesReference.objects.filter(msgid=msgid): + if series_ref.series.cover_letter: + if chosen_ref: + # I don't think this can happen, but explode if it does + raise Exception( + "Looks like you've got two or more series that share " + "some patches but do not share a cover letter. Unable " + "to auto-resolve." + ) + + # if a series has a cover letter, that's the one we'll group + # everything under + chosen_ref = series_ref + + if not chosen_ref: + # if none of the series have cover letters, simply use the last + # one (hint: this relies on Python's weird scoping for for loops + # where 'series_ref' is still accessible outside the loop) + chosen_ref = series_ref + + msgid_seriesrefs[msgid] = chosen_ref + + # reassign any patches referring to non-authoritative series to point to + # the authoritative one, and delete the other series; we do this separately + # to allow us a chance to raise the exception above if necessary + for msgid, chosen_ref in msgid_seriesrefs.items(): + for series_ref in SeriesReference.objects.filter(msgid=msgid): + if series_ref == chosen_ref: + continue + + # update the patches to point to our chosen series instead, on the + # assumption that all other metadata is correct + for patch in Patch.objects.filter(series=series_ref.series): + patch.series = chosen_ref.series + patch.save() + + # delete the other series (which will delete the series ref) + series_ref.series.delete() + + +def copy_project_field(apps, schema_editor): + if connection.vendor == 'postgresql': + schema_editor.execute( + """ + UPDATE patchwork_seriesreference + SET project_id = patchwork_series.project_id + FROM patchwork_series + WHERE patchwork_seriesreference.series_id = patchwork_series.id + """ + ) + elif connection.vendor == 'mysql': + schema_editor.execute( + """ + UPDATE patchwork_seriesreference, patchwork_series + SET patchwork_seriesreference.project_id = patchwork_series.project_id + WHERE patchwork_seriesreference.series_id = patchwork_series.id + """ # noqa + ) + else: + SeriesReference = apps.get_model('patchwork', 'SeriesReference') + + for series_ref in SeriesReference.objects.all().select_related( + 'series' + ): + series_ref.project = series_ref.series.project + series_ref.save() + + +class Migration(migrations.Migration): + + dependencies = [('patchwork', '0038_state_slug')] + + operations = [ + migrations.RunPython( + merge_duplicate_series, migrations.RunPython.noop, atomic=False + ), + migrations.AddField( + model_name='seriesreference', + name='project', + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + to='patchwork.Project', + ), + ), + migrations.AlterUniqueTogether( + name='seriesreference', unique_together={('project', 'msgid')} + ), + migrations.RunPython( + copy_project_field, migrations.RunPython.noop, atomic=False + ), + migrations.AlterField( + model_name='seriesreference', + name='project', + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='patchwork.Project', + ), + ), + ] diff --git a/patchwork/models.py b/patchwork/models.py index d95eb0d..be71d40 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -79,7 +79,8 @@ class Project(models.Model): webscm_url = models.CharField(max_length=2000, blank=True) list_archive_url = models.CharField(max_length=2000, blank=True) list_archive_url_format = models.CharField( - max_length=2000, blank=True, + max_length=2000, + blank=True, help_text="URL format for the list archive's Message-ID redirector. " "{} will be replaced by the Message-ID.") commit_url_format = models.CharField( @@ -785,6 +786,7 @@ class SeriesReference(models.Model): required to handle the case whereby one or more patches are received before the cover letter. """ + project = models.ForeignKey(Project, on_delete=models.CASCADE) series = models.ForeignKey(Series, related_name='references', related_query_name='reference', on_delete=models.CASCADE) @@ -794,7 +796,7 @@ class SeriesReference(models.Model): return self.msgid class Meta: - unique_together = [('series', 'msgid')] + unique_together = [('project', 'msgid')] class Bundle(models.Model): diff --git a/patchwork/parser.py b/patchwork/parser.py index c424729..c0084cc 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -16,6 +16,7 @@ import re from django.contrib.auth.models import User from django.db.utils import IntegrityError +from django.db import transaction from django.utils import six from patchwork.models import Comment @@ -256,16 +257,9 @@ def _find_series_by_references(project, mail): for ref in refs: try: return SeriesReference.objects.get( - msgid=ref[:255], series__project=project).series + msgid=ref[:255], project=project).series except SeriesReference.DoesNotExist: continue - except SeriesReference.MultipleObjectsReturned: - # FIXME: Open bug: this can happen when we're processing - # messages in parallel. Pick the first and log. - logger.error("Multiple SeriesReferences for %s in project %s!" % - (ref[:255], project.name)) - return SeriesReference.objects.filter( - msgid=ref[:255], series__project=project).first().series def _find_series_by_markers(project, mail, author): @@ -1092,47 +1086,65 @@ def parse_mail(mail, list_id=None): except IntegrityError: raise DuplicateMailError(msgid=msgid) - # if we don't have a series marker, we will never have an existing - # series to match against. - series = None - if n: - series = find_series(project, mail, author) - else: - x = n = 1 + for attempt in range(1, 11): # arbitrary retry count + try: + with transaction.atomic(): + # if we don't have a series marker, we will never have an + # existing series to match against. + series = None + if n: + series = find_series(project, mail, author) + else: + x = n = 1 - # We will create a new series if: - # - there is no existing series to assign this patch to, or - # - there is an existing series, but it already has a patch with this - # number in it - if not series or Patch.objects.filter(series=series, number=x).count(): - series = Series.objects.create( - project=project, - date=date, - submitter=author, - version=version, - total=n) + # We will create a new series if: + # - there is no existing series to assign this patch to, or + # - there is an existing series, but it already has a patch + # with this number in it + if not series or Patch.objects.filter( + series=series, number=x).count(): + series = Series.objects.create( + project=project, + date=date, + submitter=author, + version=version, + total=n) - # NOTE(stephenfin) We must save references for series. We - # do this to handle the case where a later patch is - # received first. Without storing references, it would not - # be possible to identify the relationship between patches - # as the earlier patch does not reference the later one. - for ref in refs + [msgid]: - ref = ref[:255] - # we don't want duplicates - try: - # we could have a ref to a previous series. (For - # example, a series sent in reply to another - # series.) That should not create a series ref - # for this series, so check for the msg-id only, - # not the msg-id/series pair. - SeriesReference.objects.get(msgid=ref, - series__project=project) - except SeriesReference.DoesNotExist: - SeriesReference.objects.create(series=series, msgid=ref) - except SeriesReference.MultipleObjectsReturned: - logger.error("Multiple SeriesReferences for %s" - " in project %s!" % (ref, project.name)) + # NOTE(stephenfin) We must save references for series. + # We do this to handle the case where a later patch is + # received first. Without storing references, it would + # not be possible to identify the relationship between + # patches as the earlier patch does not reference the + # later one. + for ref in refs + [msgid]: + ref = ref[:255] + # we don't want duplicates + try: + # we could have a ref to a previous series. + # (For example, a series sent in reply to + # another series.) That should not create a + # series ref for this series, so check for the + # msg-id only, not the msg-id/series pair. + SeriesReference.objects.get( + msgid=ref, project=project) + except SeriesReference.DoesNotExist: + SeriesReference.objects.create( + msgid=ref, project=project, series=series) + break + except IntegrityError: + # we lost the race so go again + logger.warning('Conflict while saving series. This is ' + 'probably because multiple patches belonging ' + 'to the same series have been received at ' + 'once. Trying again (attempt %02d/10)', + attempt) + else: + # we failed to save the series so return the series-less patch + logger.warning('Failed to save series. Your patch with message ID ' + '%s has been saved but this should not happen. ' + 'Please report this as a bug and include logs', + msgid) + return patch # add to a series if we have found one, and we have a numbered # patch. Don't add unnumbered patches (for example diffs sent @@ -1170,14 +1182,9 @@ def parse_mail(mail, list_id=None): # message try: series = SeriesReference.objects.get( - msgid=msgid, series__project=project).series + msgid=msgid, project=project).series except SeriesReference.DoesNotExist: series = None - except SeriesReference.MultipleObjectsReturned: - logger.error("Multiple SeriesReferences for %s" - " in project %s!" % (msgid, project.name)) - series = SeriesReference.objects.filter( - msgid=msgid, series__project=project).first().series if not series: series = Series.objects.create( @@ -1190,12 +1197,8 @@ def parse_mail(mail, list_id=None): # we don't save the in-reply-to or references fields # for a cover letter, as they can't refer to the same # series - try: - SeriesReference.objects.get_or_create(series=series, - msgid=msgid) - except SeriesReference.MultipleObjectsReturned: - logger.error("Multiple SeriesReferences for %s" - " in project %s!" % (msgid, project.name)) + SeriesReference.objects.create( + msgid=msgid, project=project, series=series) try: cover_letter = CoverLetter.objects.create( diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index 0bf7158..0edbd87 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -421,17 +421,20 @@ class SeriesCorrelationTest(TestCase): msgids = [make_msgid()] project = create_project() series_v1 = create_series(project=project) - create_series_reference(msgid=msgids[0], series=series_v1) + create_series_reference(msgid=msgids[0], series=series_v1, + project=project) # ...and three patches for i in range(3): msgids.append(make_msgid()) - create_series_reference(msgid=msgids[-1], series=series_v1) + create_series_reference(msgid=msgids[-1], series=series_v1, + project=project) # now create a new series with "cover letter" msgids.append(make_msgid()) series_v2 = create_series(project=project) - ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2) + ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2, + project=project) # ...and the "first patch" of this new series msgid = make_msgid() diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py index 91bcbe1..5e6a1b1 100644 --- a/patchwork/tests/utils.py +++ b/patchwork/tests/utils.py @@ -283,8 +283,12 @@ def create_series(**kwargs): def create_series_reference(**kwargs): """Create 'SeriesReference' object.""" + project = kwargs.pop('project', create_project()) + series = kwargs.pop('series', create_series(project=project)) + values = { - 'series': create_series() if 'series' not in kwargs else None, + 'series': series, + 'project': project, 'msgid': make_msgid(), } values.update(**kwargs) |