diff options
Diffstat (limited to 'sqitch/deploy')
-rw-r--r-- | sqitch/deploy/fix_duplicated_licenses.sql | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/sqitch/deploy/fix_duplicated_licenses.sql b/sqitch/deploy/fix_duplicated_licenses.sql new file mode 100644 index 0000000..33820ca --- /dev/null +++ b/sqitch/deploy/fix_duplicated_licenses.sql @@ -0,0 +1,218 @@ +-- Deploy guix-data-service:fix_duplicated_licenses to pg + +BEGIN; + +SET CONSTRAINTS ALL DEFERRED; + +-- Remove unique constraint from license_sets + +ALTER TABLE license_sets DROP CONSTRAINT license_sets_pkey; + +-- Change all license sets to refer to canonical licenses + +UPDATE license_sets AS master SET license_ids = ARRAY( + SELECT new_licenses_2.id FROM ( + SELECT a.elem AS id, a.nr AS index + FROM license_sets, unnest(license_sets.license_ids) + WITH ORDINALITY a(elem, nr) + WHERE id = master.id + ) AS old_licenses + INNER JOIN ( + SELECT licenses.id AS old_id, new_licenses.* + FROM licenses INNER JOIN ( + SELECT MIN(id) AS id, name, uri, comment + FROM licenses + GROUP BY name, uri, comment + ORDER BY name + ) AS new_licenses + ON licenses.name = new_licenses.name AND + (licenses.uri = new_licenses.uri OR ( + licenses.uri IS NULL AND new_licenses.uri IS NULL + )) AND ( + licenses.comment = new_licenses.comment OR ( + licenses.comment IS NULL AND new_licenses.comment IS NULL + )) + ) AS new_licenses_2 + ON old_licenses.id = new_licenses_2.old_id + ORDER BY old_licenses.index); + +-- Remove unique constraint from package_metadata + +ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_unique_fields; + +-- Update package_metadata to refer to canonical license_sets + +WITH data AS ( + SELECT MIN(id) AS id, ARRAY_AGG(id) AS old_ids + FROM license_sets + GROUP BY license_ids +) +UPDATE package_metadata AS master +SET license_set_id = data.id +FROM data +WHERE license_set_id = ANY(data.old_ids); + +-- Remove unique constraint from packages + +ALTER TABLE packages DROP CONSTRAINT packages_pkey; + +-- Update packages to refer to canonical package_metadata entries + +WITH data AS ( + SELECT MIN(package_metadata.id) AS id, ARRAY_AGG(package_metadata.id) AS old_ids + FROM package_metadata + GROUP BY package_metadata.synopsis, package_metadata.description, + package_metadata.home_page, package_metadata.location_id, + package_metadata.license_set_id + HAVING COUNT(package_metadata.id) > 1 +) +UPDATE packages SET package_metadata_id = data.id +FROM data +WHERE package_metadata_id = ANY(data.old_ids); + +-- Remove unique constraint from package_derivations + +ALTER TABLE package_derivations DROP CONSTRAINT package_derivations_pkey; + +-- Update package_derivations to refer to canonical packages entries + +WITH data AS ( + SELECT unnest(old_ids) AS old, id FROM ( + SELECT MIN(packages.id) AS id, ARRAY_AGG(packages.id) AS old_ids + FROM packages + GROUP BY name, version, package_metadata_id + HAVING COUNT(id) > 1 + ) AS d2 +) +UPDATE package_derivations SET package_id = data.id +FROM data +WHERE package_id = data.old; + +-- Update guix_revision_package_derivations to refer to canonical +-- package_derivations entries + +WITH data AS ( + SELECT unnest(old_ids) AS old, id FROM ( + SELECT MIN(package_derivations.id) AS id, ARRAY_AGG(package_derivations.id) AS old_ids + FROM package_derivations + GROUP BY package_id, derivation_id, system, target + HAVING COUNT(id) > 1 + ) AS d2 +) +UPDATE guix_revision_package_derivations SET package_derivation_id = data.id +FROM data +WHERE package_derivation_id = data.old; + +-- Drop the foreign key constraint as an attempt to speed up deleting from +-- package_derivations. + +ALTER TABLE guix_revision_package_derivations + DROP CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey; + +-- Delete non-canonical package_dervations entries + +DELETE FROM package_derivations AS pd WHERE id NOT IN ( + SELECT MIN(id) + FROM package_derivations + GROUP BY ( + package_id, + derivation_id, + system, + target + ) +); + +-- Reinstate the deleted constraint + +ALTER TABLE guix_revision_package_derivations + ADD CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey + FOREIGN KEY (package_derivation_id) REFERENCES package_derivations(id); + +-- Delete non-canonical packages entries + +DELETE FROM packages AS p WHERE id NOT IN ( + SELECT MIN(id) + FROM packages + GROUP BY (name, version, package_metadata_id) +); + +-- Add referential constraints + +ALTER TABLE package_derivations + ADD CONSTRAINT package_derivations_package_id_fkey + FOREIGN KEY (package_id) REFERENCES packages (id); + +ALTER TABLE package_derivations + ADD CONSTRAINT package_derivations_derivation_id_fkey + FOREIGN KEY (derivation_id) REFERENCES derivations (id); + +-- Delete non-canonical package_metadata entries + +ALTER TABLE packages DROP CONSTRAINT package_metadata_id; + +DELETE FROM package_metadata AS pm WHERE id NOT IN ( + SELECT MIN(id) + FROM package_metadata + GROUP BY (synopsis, description, home_page, location_id, license_set_id) +); + +ALTER TABLE packages ADD CONSTRAINT package_metadata_id + FOREIGN KEY (package_metadata_id) REFERENCES package_metadata(id); + +-- Delete non-canonical license_sets entries + +ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_license_set_id_fkey; + +DELETE FROM license_sets AS ls WHERE id NOT IN ( + SELECT MIN(id) + FROM license_sets + GROUP BY license_ids +); + +ALTER TABLE package_metadata ADD CONSTRAINT package_metadata_license_set_id_fkey + FOREIGN KEY (license_set_id) REFERENCES license_sets(id); + +-- Delete non-canonical licenses entries + +DELETE FROM licenses AS l WHERE id NOT IN ( + SELECT MIN(id) + FROM licenses + GROUP BY (name, uri, comment) +); + +-- Restore unique constraints + +CREATE UNIQUE INDEX ON licenses (name) + WHERE uri IS NULL AND comment IS NULL; +CREATE UNIQUE INDEX ON licenses (name, uri) + WHERE uri IS NOT NULL AND comment IS NULL; +CREATE UNIQUE INDEX ON licenses (name, comment) + WHERE uri IS NULL AND comment IS NOT NULL; +CREATE UNIQUE INDEX ON licenses (name, uri, comment) + WHERE uri IS NOT NULL AND comment IS NOT NULL; + +ALTER TABLE license_sets ADD PRIMARY KEY (license_ids); + +ALTER TABLE package_metadata ALTER synopsis SET NOT NULL; +ALTER TABLE package_metadata ALTER description SET NOT NULL; + +CREATE UNIQUE INDEX ON package_metadata ( + synopsis, + description, + coalesce(location_id, -1), + coalesce(license_set_id, -1) +) WHERE home_page IS NULL; + +CREATE UNIQUE INDEX ON package_metadata ( + synopsis, + description, + home_page, + coalesce(location_id, -1), + coalesce(license_set_id, -1) +) WHERE home_page IS NOT NULL; + +ALTER TABLE packages ADD PRIMARY KEY (name, version, package_metadata_id); + +ALTER TABLE package_derivations ADD PRIMARY KEY (package_id, derivation_id, system, target); + +COMMIT; |