1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
|
-- Deploy guix-data-service:fix_duplicated_licenses to pg
BEGIN;
SET CONSTRAINTS ALL DEFERRED;
-- Remove unique constraint from license_sets
ALTER TABLE license_sets DROP CONSTRAINT license_sets_pkey;
-- Change all license sets to refer to canonical licenses
UPDATE license_sets AS master SET license_ids = ARRAY(
SELECT new_licenses_2.id FROM (
SELECT a.elem AS id, a.nr AS index
FROM license_sets, unnest(license_sets.license_ids)
WITH ORDINALITY a(elem, nr)
WHERE id = master.id
) AS old_licenses
INNER JOIN (
SELECT licenses.id AS old_id, new_licenses.*
FROM licenses INNER JOIN (
SELECT MIN(id) AS id, name, uri, comment
FROM licenses
GROUP BY name, uri, comment
ORDER BY name
) AS new_licenses
ON licenses.name = new_licenses.name AND
(licenses.uri = new_licenses.uri OR (
licenses.uri IS NULL AND new_licenses.uri IS NULL
)) AND (
licenses.comment = new_licenses.comment OR (
licenses.comment IS NULL AND new_licenses.comment IS NULL
))
) AS new_licenses_2
ON old_licenses.id = new_licenses_2.old_id
ORDER BY old_licenses.index);
-- Remove unique constraint from package_metadata
ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_unique_fields;
-- Update package_metadata to refer to canonical license_sets
WITH data AS (
SELECT MIN(id) AS id, ARRAY_AGG(id) AS old_ids
FROM license_sets
GROUP BY license_ids
)
UPDATE package_metadata AS master
SET license_set_id = data.id
FROM data
WHERE license_set_id = ANY(data.old_ids);
-- Remove unique constraint from packages
ALTER TABLE packages DROP CONSTRAINT packages_pkey;
-- Update packages to refer to canonical package_metadata entries
WITH data AS (
SELECT MIN(package_metadata.id) AS id, ARRAY_AGG(package_metadata.id) AS old_ids
FROM package_metadata
GROUP BY package_metadata.synopsis, package_metadata.description,
package_metadata.home_page, package_metadata.location_id,
package_metadata.license_set_id
HAVING COUNT(package_metadata.id) > 1
)
UPDATE packages SET package_metadata_id = data.id
FROM data
WHERE package_metadata_id = ANY(data.old_ids);
-- Remove unique constraint from package_derivations
ALTER TABLE package_derivations DROP CONSTRAINT package_derivations_pkey;
-- Update package_derivations to refer to canonical packages entries
WITH data AS (
SELECT unnest(old_ids) AS old, id FROM (
SELECT MIN(packages.id) AS id, ARRAY_AGG(packages.id) AS old_ids
FROM packages
GROUP BY name, version, package_metadata_id
HAVING COUNT(id) > 1
) AS d2
)
UPDATE package_derivations SET package_id = data.id
FROM data
WHERE package_id = data.old;
-- Update guix_revision_package_derivations to refer to canonical
-- package_derivations entries
WITH data AS (
SELECT unnest(old_ids) AS old, id FROM (
SELECT MIN(package_derivations.id) AS id, ARRAY_AGG(package_derivations.id) AS old_ids
FROM package_derivations
GROUP BY package_id, derivation_id, system, target
HAVING COUNT(id) > 1
) AS d2
)
UPDATE guix_revision_package_derivations SET package_derivation_id = data.id
FROM data
WHERE package_derivation_id = data.old;
-- Drop the foreign key constraint as an attempt to speed up deleting from
-- package_derivations.
ALTER TABLE guix_revision_package_derivations
DROP CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey;
-- Delete non-canonical package_dervations entries
DELETE FROM package_derivations AS pd WHERE id NOT IN (
SELECT MIN(id)
FROM package_derivations
GROUP BY (
package_id,
derivation_id,
system,
target
)
);
-- Reinstate the deleted constraint
ALTER TABLE guix_revision_package_derivations
ADD CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey
FOREIGN KEY (package_derivation_id) REFERENCES package_derivations(id);
-- Delete non-canonical packages entries
DELETE FROM packages AS p WHERE id NOT IN (
SELECT MIN(id)
FROM packages
GROUP BY (name, version, package_metadata_id)
);
-- Add referential constraints
ALTER TABLE package_derivations
ADD CONSTRAINT package_derivations_package_id_fkey
FOREIGN KEY (package_id) REFERENCES packages (id);
ALTER TABLE package_derivations
ADD CONSTRAINT package_derivations_derivation_id_fkey
FOREIGN KEY (derivation_id) REFERENCES derivations (id);
-- Delete non-canonical package_metadata entries
ALTER TABLE packages DROP CONSTRAINT package_metadata_id;
DELETE FROM package_metadata AS pm WHERE id NOT IN (
SELECT MIN(id)
FROM package_metadata
GROUP BY (synopsis, description, home_page, location_id, license_set_id)
);
ALTER TABLE packages ADD CONSTRAINT package_metadata_id
FOREIGN KEY (package_metadata_id) REFERENCES package_metadata(id);
-- Delete non-canonical license_sets entries
ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_license_set_id_fkey;
DELETE FROM license_sets AS ls WHERE id NOT IN (
SELECT MIN(id)
FROM license_sets
GROUP BY license_ids
);
ALTER TABLE package_metadata ADD CONSTRAINT package_metadata_license_set_id_fkey
FOREIGN KEY (license_set_id) REFERENCES license_sets(id);
-- Delete non-canonical licenses entries
DELETE FROM licenses AS l WHERE id NOT IN (
SELECT MIN(id)
FROM licenses
GROUP BY (name, uri, comment)
);
-- Restore unique constraints
CREATE UNIQUE INDEX ON licenses (name)
WHERE uri IS NULL AND comment IS NULL;
CREATE UNIQUE INDEX ON licenses (name, uri)
WHERE uri IS NOT NULL AND comment IS NULL;
CREATE UNIQUE INDEX ON licenses (name, comment)
WHERE uri IS NULL AND comment IS NOT NULL;
CREATE UNIQUE INDEX ON licenses (name, uri, comment)
WHERE uri IS NOT NULL AND comment IS NOT NULL;
ALTER TABLE license_sets ADD PRIMARY KEY (license_ids);
ALTER TABLE package_metadata ALTER synopsis SET NOT NULL;
ALTER TABLE package_metadata ALTER description SET NOT NULL;
CREATE UNIQUE INDEX ON package_metadata (
synopsis,
description,
coalesce(location_id, -1),
coalesce(license_set_id, -1)
) WHERE home_page IS NULL;
CREATE UNIQUE INDEX ON package_metadata (
synopsis,
description,
home_page,
coalesce(location_id, -1),
coalesce(license_set_id, -1)
) WHERE home_page IS NOT NULL;
ALTER TABLE packages ADD PRIMARY KEY (name, version, package_metadata_id);
ALTER TABLE package_derivations ADD PRIMARY KEY (package_id, derivation_id, system, target);
COMMIT;
|