aboutsummaryrefslogtreecommitdiff
path: root/sqitch/deploy/fix_duplicated_licenses.sql
blob: 33820ca7e9c2e2c30152d3b8b2f473af3273508c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
-- Deploy guix-data-service:fix_duplicated_licenses to pg

BEGIN;

SET CONSTRAINTS ALL DEFERRED;

-- Remove unique constraint from license_sets

ALTER TABLE license_sets DROP CONSTRAINT license_sets_pkey;

-- Change all license sets to refer to canonical licenses

UPDATE license_sets AS master SET license_ids = ARRAY(
  SELECT new_licenses_2.id FROM (
    SELECT a.elem AS id, a.nr AS index
    FROM license_sets, unnest(license_sets.license_ids)
    WITH ORDINALITY a(elem, nr)
    WHERE id = master.id
  ) AS old_licenses
  INNER JOIN (
    SELECT licenses.id AS old_id, new_licenses.*
    FROM licenses INNER JOIN (
      SELECT MIN(id) AS id, name, uri, comment
      FROM licenses
      GROUP BY name, uri, comment
      ORDER BY name
    ) AS new_licenses
    ON licenses.name = new_licenses.name AND
      (licenses.uri = new_licenses.uri OR (
        licenses.uri IS NULL AND new_licenses.uri IS NULL
      )) AND (
        licenses.comment = new_licenses.comment OR (
      licenses.comment IS NULL AND new_licenses.comment IS NULL
      ))
  ) AS new_licenses_2
  ON old_licenses.id = new_licenses_2.old_id
  ORDER BY old_licenses.index);

-- Remove unique constraint from package_metadata

ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_unique_fields;

-- Update package_metadata to refer to canonical license_sets

WITH data AS (
  SELECT MIN(id) AS id, ARRAY_AGG(id) AS old_ids
  FROM license_sets
  GROUP BY license_ids
)
UPDATE package_metadata AS master
SET license_set_id = data.id
FROM data
WHERE license_set_id = ANY(data.old_ids);

-- Remove unique constraint from packages

ALTER TABLE packages DROP CONSTRAINT packages_pkey;

-- Update packages to refer to canonical package_metadata entries

WITH data AS (
  SELECT MIN(package_metadata.id) AS id, ARRAY_AGG(package_metadata.id) AS old_ids
  FROM package_metadata
  GROUP BY package_metadata.synopsis, package_metadata.description,
  package_metadata.home_page, package_metadata.location_id,
  package_metadata.license_set_id
  HAVING COUNT(package_metadata.id) > 1
)
UPDATE packages SET package_metadata_id = data.id
FROM data
WHERE package_metadata_id = ANY(data.old_ids);

-- Remove unique constraint from package_derivations

ALTER TABLE package_derivations DROP CONSTRAINT package_derivations_pkey;

-- Update package_derivations to refer to canonical packages entries

WITH data AS (
  SELECT unnest(old_ids) AS old, id FROM (
    SELECT MIN(packages.id) AS id, ARRAY_AGG(packages.id) AS old_ids
    FROM packages
    GROUP BY name, version, package_metadata_id
    HAVING COUNT(id) > 1
  ) AS d2
)
UPDATE package_derivations SET package_id = data.id
FROM data
WHERE package_id = data.old;

-- Update guix_revision_package_derivations to refer to canonical
-- package_derivations entries

WITH data AS (
  SELECT unnest(old_ids) AS old, id FROM (
    SELECT MIN(package_derivations.id) AS id, ARRAY_AGG(package_derivations.id) AS old_ids
    FROM package_derivations
    GROUP BY package_id, derivation_id, system, target
    HAVING COUNT(id) > 1
  ) AS d2
)
UPDATE guix_revision_package_derivations SET package_derivation_id = data.id
FROM data
WHERE package_derivation_id = data.old;

-- Drop the foreign key constraint as an attempt to speed up deleting from
-- package_derivations.

ALTER TABLE guix_revision_package_derivations
  DROP CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey;

-- Delete non-canonical package_dervations entries

DELETE FROM package_derivations AS pd WHERE id NOT IN (
  SELECT MIN(id)
  FROM package_derivations
  GROUP BY (
    package_id,
    derivation_id,
    system,
    target
  )
);

-- Reinstate the deleted constraint

ALTER TABLE guix_revision_package_derivations
  ADD CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey
  FOREIGN KEY (package_derivation_id) REFERENCES package_derivations(id);

-- Delete non-canonical packages entries

DELETE FROM packages AS p WHERE id NOT IN (
  SELECT MIN(id)
  FROM packages
  GROUP BY (name, version, package_metadata_id)
);

-- Add referential constraints

ALTER TABLE package_derivations
  ADD CONSTRAINT package_derivations_package_id_fkey
  FOREIGN KEY (package_id) REFERENCES packages (id);

ALTER TABLE package_derivations
  ADD CONSTRAINT package_derivations_derivation_id_fkey
  FOREIGN KEY (derivation_id) REFERENCES derivations (id);

-- Delete non-canonical package_metadata entries

ALTER TABLE packages DROP CONSTRAINT package_metadata_id;

DELETE FROM package_metadata AS pm WHERE id NOT IN (
  SELECT MIN(id)
  FROM package_metadata
  GROUP BY (synopsis, description, home_page, location_id, license_set_id)
);

ALTER TABLE packages ADD CONSTRAINT package_metadata_id
  FOREIGN KEY (package_metadata_id) REFERENCES package_metadata(id);

-- Delete non-canonical license_sets entries

ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_license_set_id_fkey;

DELETE FROM license_sets AS ls WHERE id NOT IN (
  SELECT MIN(id)
  FROM license_sets
  GROUP BY license_ids
);

ALTER TABLE package_metadata ADD CONSTRAINT package_metadata_license_set_id_fkey
  FOREIGN KEY (license_set_id) REFERENCES license_sets(id);

-- Delete non-canonical licenses entries

DELETE FROM licenses AS l WHERE id NOT IN (
  SELECT MIN(id)
  FROM licenses
  GROUP BY (name, uri, comment)
);

-- Restore unique constraints

CREATE UNIQUE INDEX ON licenses (name)
  WHERE uri IS NULL AND comment IS NULL;
CREATE UNIQUE INDEX ON licenses (name, uri)
  WHERE uri IS NOT NULL AND comment IS NULL;
CREATE UNIQUE INDEX ON licenses (name, comment)
  WHERE uri IS NULL AND comment IS NOT NULL;
CREATE UNIQUE INDEX ON licenses (name, uri, comment)
  WHERE uri IS NOT NULL AND comment IS NOT NULL;

ALTER TABLE license_sets ADD PRIMARY KEY (license_ids);

ALTER TABLE package_metadata ALTER synopsis SET NOT NULL;
ALTER TABLE package_metadata ALTER description SET NOT NULL;

CREATE UNIQUE INDEX ON package_metadata (
  synopsis,
  description,
  coalesce(location_id, -1),
  coalesce(license_set_id, -1)
) WHERE home_page IS NULL;

CREATE UNIQUE INDEX ON package_metadata (
  synopsis,
  description,
  home_page,
  coalesce(location_id, -1),
  coalesce(license_set_id, -1)
) WHERE home_page IS NOT NULL;

ALTER TABLE packages ADD PRIMARY KEY (name, version, package_metadata_id);

ALTER TABLE package_derivations ADD PRIMARY KEY (package_id, derivation_id, system, target);

COMMIT;