diff options
-rw-r--r-- | guix-data-service/model/license.scm | 57 | ||||
-rw-r--r-- | guix-data-service/model/package-metadata.scm | 22 | ||||
-rw-r--r-- | guix-data-service/model/utils.scm | 8 | ||||
-rw-r--r-- | sqitch/deploy/fix_duplicated_licenses.sql | 218 | ||||
-rw-r--r-- | sqitch/revert/fix_duplicated_licenses.sql | 7 | ||||
-rw-r--r-- | sqitch/sqitch.plan | 1 | ||||
-rw-r--r-- | sqitch/verify/fix_duplicated_licenses.sql | 7 | ||||
-rw-r--r-- | tests/model-license-set.scm | 6 | ||||
-rw-r--r-- | tests/model-license.scm | 7 | ||||
-rw-r--r-- | tests/model-package-metadata.scm | 18 |
10 files changed, 313 insertions, 38 deletions
diff --git a/guix-data-service/model/license.scm b/guix-data-service/model/license.scm index 0b28343..73df612 100644 --- a/guix-data-service/model/license.scm +++ b/guix-data-service/model/license.scm @@ -43,28 +43,6 @@ (inferior-eval '(use-modules (guix licenses)) inf) (inferior-eval (proc packages) inf)) -(define (select-licenses license-values) - (string-append - "SELECT id, licenses.name, licenses.uri, licenses.comment " - "FROM licenses " - "JOIN (VALUES " - (string-join - (map (lambda (values) - (string-append - "(" - (string-join - (map value->quoted-string-or-null - values) - ", ") - ")")) - license-values) - ", ") - ") AS vals (name, uri, comment) " - "ON " - "licenses.name = vals.name AND " - "licenses.uri = vals.uri AND " - "licenses.comment = vals.comment")) - (define (insert-licenses values) (string-append "INSERT INTO licenses " @@ -91,15 +69,39 @@ (map number->string (sort (map string->number ids) <))) + (define (non-string-to-false lst) + (map (lambda (value) + (if (string? value) + value + #f)) + lst)) + + (define (empty-string-to-false lst) + ;; TODO squee returns empty strings for null values, which will probably + ;; cause problems + (map (lambda (value) + (if (string? value) + (if (string-null? value) + #f + value) + value)) + lst)) + (let* ((unique-license-tuples (filter (lambda (license-tuple) (not (null? license-tuple))) (delete-duplicates - (concatenate license-data)))) + (map + (lambda (lst) + (non-string-to-false + (empty-string-to-false lst))) + (concatenate license-data))))) (existing-license-entries (exec-query->vhash conn - (select-licenses unique-license-tuples) - cdr + "SELECT id, name, uri, comment FROM licenses" + (lambda (vals) + (non-string-to-false + (empty-string-to-false (cdr vals)))) first)) ;; id (missing-license-entries (delete-duplicates @@ -128,5 +130,8 @@ (begin (error "missing license entry" license-values))))) - license-value-lists))) + (map (lambda (lst) + (non-string-to-false + (empty-string-to-false lst))) + license-value-lists)))) license-data))) diff --git a/guix-data-service/model/package-metadata.scm b/guix-data-service/model/package-metadata.scm index 37a8da8..178d783 100644 --- a/guix-data-service/model/package-metadata.scm +++ b/guix-data-service/model/package-metadata.scm @@ -52,8 +52,14 @@ "ON " (string-join (map (lambda (field) - (string-append - "package_metadata." field " = vals." field)) + (if (member field '("home_page" "location_id" + "license_set_id")) + (string-append + "(package_metadata." field " = vals." field " OR " + "(package_metadata." field " IS NULL AND" + " vals." field " IS NULL))") + (string-append + "package_metadata." field " = vals." field))) fields) " AND "))) @@ -129,7 +135,8 @@ WHERE packages.id IN ( (map (lambda (package license-set-id) (list (inferior-package-synopsis package) (inferior-package-description package) - (inferior-package-home-page package) + (non-empty-string-or-false + (inferior-package-home-page package)) (location->location-id conn (inferior-package-location package)) @@ -140,7 +147,14 @@ WHERE packages.id IN ( (let* ((existing-package-metadata-entries (exec-query->vhash conn (select-package-metadata package-metadata) - cdr + (match-lambda + ((id synopsis description home-page + location-id license-set-id) + (list synopsis + description + (non-empty-string-or-false home-page) + location-id + license-set-id))) first)) ;; id)) (missing-package-metadata-entries (delete-duplicates diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm index 1ece31a..0ce9fea 100644 --- a/guix-data-service/model/utils.scm +++ b/guix-data-service/model/utils.scm @@ -5,6 +5,7 @@ #:use-module (squee) #:export (quote-string value->quoted-string-or-null + non-empty-string-or-false exec-query->vhash two-lists->vhash deduplicate-strings @@ -18,6 +19,13 @@ (string-append "$STR$" value "$STR$") "NULL")) +(define (non-empty-string-or-false s) + (if (string? s) + (if (string-null? s) + #f + s) + #f)) + (define (exec-query->vhash conn query field-function value-function) (fold (lambda (row result) (vhash-cons (field-function row) diff --git a/sqitch/deploy/fix_duplicated_licenses.sql b/sqitch/deploy/fix_duplicated_licenses.sql new file mode 100644 index 0000000..33820ca --- /dev/null +++ b/sqitch/deploy/fix_duplicated_licenses.sql @@ -0,0 +1,218 @@ +-- Deploy guix-data-service:fix_duplicated_licenses to pg + +BEGIN; + +SET CONSTRAINTS ALL DEFERRED; + +-- Remove unique constraint from license_sets + +ALTER TABLE license_sets DROP CONSTRAINT license_sets_pkey; + +-- Change all license sets to refer to canonical licenses + +UPDATE license_sets AS master SET license_ids = ARRAY( + SELECT new_licenses_2.id FROM ( + SELECT a.elem AS id, a.nr AS index + FROM license_sets, unnest(license_sets.license_ids) + WITH ORDINALITY a(elem, nr) + WHERE id = master.id + ) AS old_licenses + INNER JOIN ( + SELECT licenses.id AS old_id, new_licenses.* + FROM licenses INNER JOIN ( + SELECT MIN(id) AS id, name, uri, comment + FROM licenses + GROUP BY name, uri, comment + ORDER BY name + ) AS new_licenses + ON licenses.name = new_licenses.name AND + (licenses.uri = new_licenses.uri OR ( + licenses.uri IS NULL AND new_licenses.uri IS NULL + )) AND ( + licenses.comment = new_licenses.comment OR ( + licenses.comment IS NULL AND new_licenses.comment IS NULL + )) + ) AS new_licenses_2 + ON old_licenses.id = new_licenses_2.old_id + ORDER BY old_licenses.index); + +-- Remove unique constraint from package_metadata + +ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_unique_fields; + +-- Update package_metadata to refer to canonical license_sets + +WITH data AS ( + SELECT MIN(id) AS id, ARRAY_AGG(id) AS old_ids + FROM license_sets + GROUP BY license_ids +) +UPDATE package_metadata AS master +SET license_set_id = data.id +FROM data +WHERE license_set_id = ANY(data.old_ids); + +-- Remove unique constraint from packages + +ALTER TABLE packages DROP CONSTRAINT packages_pkey; + +-- Update packages to refer to canonical package_metadata entries + +WITH data AS ( + SELECT MIN(package_metadata.id) AS id, ARRAY_AGG(package_metadata.id) AS old_ids + FROM package_metadata + GROUP BY package_metadata.synopsis, package_metadata.description, + package_metadata.home_page, package_metadata.location_id, + package_metadata.license_set_id + HAVING COUNT(package_metadata.id) > 1 +) +UPDATE packages SET package_metadata_id = data.id +FROM data +WHERE package_metadata_id = ANY(data.old_ids); + +-- Remove unique constraint from package_derivations + +ALTER TABLE package_derivations DROP CONSTRAINT package_derivations_pkey; + +-- Update package_derivations to refer to canonical packages entries + +WITH data AS ( + SELECT unnest(old_ids) AS old, id FROM ( + SELECT MIN(packages.id) AS id, ARRAY_AGG(packages.id) AS old_ids + FROM packages + GROUP BY name, version, package_metadata_id + HAVING COUNT(id) > 1 + ) AS d2 +) +UPDATE package_derivations SET package_id = data.id +FROM data +WHERE package_id = data.old; + +-- Update guix_revision_package_derivations to refer to canonical +-- package_derivations entries + +WITH data AS ( + SELECT unnest(old_ids) AS old, id FROM ( + SELECT MIN(package_derivations.id) AS id, ARRAY_AGG(package_derivations.id) AS old_ids + FROM package_derivations + GROUP BY package_id, derivation_id, system, target + HAVING COUNT(id) > 1 + ) AS d2 +) +UPDATE guix_revision_package_derivations SET package_derivation_id = data.id +FROM data +WHERE package_derivation_id = data.old; + +-- Drop the foreign key constraint as an attempt to speed up deleting from +-- package_derivations. + +ALTER TABLE guix_revision_package_derivations + DROP CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey; + +-- Delete non-canonical package_dervations entries + +DELETE FROM package_derivations AS pd WHERE id NOT IN ( + SELECT MIN(id) + FROM package_derivations + GROUP BY ( + package_id, + derivation_id, + system, + target + ) +); + +-- Reinstate the deleted constraint + +ALTER TABLE guix_revision_package_derivations + ADD CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey + FOREIGN KEY (package_derivation_id) REFERENCES package_derivations(id); + +-- Delete non-canonical packages entries + +DELETE FROM packages AS p WHERE id NOT IN ( + SELECT MIN(id) + FROM packages + GROUP BY (name, version, package_metadata_id) +); + +-- Add referential constraints + +ALTER TABLE package_derivations + ADD CONSTRAINT package_derivations_package_id_fkey + FOREIGN KEY (package_id) REFERENCES packages (id); + +ALTER TABLE package_derivations + ADD CONSTRAINT package_derivations_derivation_id_fkey + FOREIGN KEY (derivation_id) REFERENCES derivations (id); + +-- Delete non-canonical package_metadata entries + +ALTER TABLE packages DROP CONSTRAINT package_metadata_id; + +DELETE FROM package_metadata AS pm WHERE id NOT IN ( + SELECT MIN(id) + FROM package_metadata + GROUP BY (synopsis, description, home_page, location_id, license_set_id) +); + +ALTER TABLE packages ADD CONSTRAINT package_metadata_id + FOREIGN KEY (package_metadata_id) REFERENCES package_metadata(id); + +-- Delete non-canonical license_sets entries + +ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_license_set_id_fkey; + +DELETE FROM license_sets AS ls WHERE id NOT IN ( + SELECT MIN(id) + FROM license_sets + GROUP BY license_ids +); + +ALTER TABLE package_metadata ADD CONSTRAINT package_metadata_license_set_id_fkey + FOREIGN KEY (license_set_id) REFERENCES license_sets(id); + +-- Delete non-canonical licenses entries + +DELETE FROM licenses AS l WHERE id NOT IN ( + SELECT MIN(id) + FROM licenses + GROUP BY (name, uri, comment) +); + +-- Restore unique constraints + +CREATE UNIQUE INDEX ON licenses (name) + WHERE uri IS NULL AND comment IS NULL; +CREATE UNIQUE INDEX ON licenses (name, uri) + WHERE uri IS NOT NULL AND comment IS NULL; +CREATE UNIQUE INDEX ON licenses (name, comment) + WHERE uri IS NULL AND comment IS NOT NULL; +CREATE UNIQUE INDEX ON licenses (name, uri, comment) + WHERE uri IS NOT NULL AND comment IS NOT NULL; + +ALTER TABLE license_sets ADD PRIMARY KEY (license_ids); + +ALTER TABLE package_metadata ALTER synopsis SET NOT NULL; +ALTER TABLE package_metadata ALTER description SET NOT NULL; + +CREATE UNIQUE INDEX ON package_metadata ( + synopsis, + description, + coalesce(location_id, -1), + coalesce(license_set_id, -1) +) WHERE home_page IS NULL; + +CREATE UNIQUE INDEX ON package_metadata ( + synopsis, + description, + home_page, + coalesce(location_id, -1), + coalesce(license_set_id, -1) +) WHERE home_page IS NOT NULL; + +ALTER TABLE packages ADD PRIMARY KEY (name, version, package_metadata_id); + +ALTER TABLE package_derivations ADD PRIMARY KEY (package_id, derivation_id, system, target); + +COMMIT; diff --git a/sqitch/revert/fix_duplicated_licenses.sql b/sqitch/revert/fix_duplicated_licenses.sql new file mode 100644 index 0000000..cec3d71 --- /dev/null +++ b/sqitch/revert/fix_duplicated_licenses.sql @@ -0,0 +1,7 @@ +-- Revert guix-data-service:fix_duplicated_licenses from pg + +BEGIN; + +-- XXX Add DDLs here. + +COMMIT; diff --git a/sqitch/sqitch.plan b/sqitch/sqitch.plan index 7a37548..bd513cc 100644 --- a/sqitch/sqitch.plan +++ b/sqitch/sqitch.plan @@ -15,3 +15,4 @@ dates_to_load_new_guix_revision_jobs 2019-06-02T07:39:49Z Christopher Baines <ma load_new_guix_revision_job_events 2019-06-02T15:44:41Z Christopher Baines <mail@cbaines.net> # Add new table for guix_revision_job_events load_new_guix_revision_job_logs 2019-06-21T14:33:09Z chris <chris@phact> # Add load_new_guix_revision_job_logs change_load_new_guix_revision_job_logs_contents_to_be_nullable 2019-07-07T20:10:54Z Christopher Baines <mail@cbaines.net> # Change the contents field in the load_new_guix_revision_job_logs table\nto be nullable.\n\nwith '#' will # be ignored, and an empty message aborts the add. #\nChange to add: # #\nchange_load_new_guix_revision_job_logs_contents_to_be_nullable #\nsqitch/deploy/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql\nsqitch/revert/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql\nsqitch/verify/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql +fix_duplicated_licenses 2019-07-30T05:48:17Z Christopher Baines <mail@cbaines.net> # Fix duplicated licenses, and add constraints diff --git a/sqitch/verify/fix_duplicated_licenses.sql b/sqitch/verify/fix_duplicated_licenses.sql new file mode 100644 index 0000000..47379eb --- /dev/null +++ b/sqitch/verify/fix_duplicated_licenses.sql @@ -0,0 +1,7 @@ +-- Verify guix-data-service:fix_duplicated_licenses on pg + +BEGIN; + +-- XXX Add verifications here. + +ROLLBACK; diff --git a/tests/model-license-set.scm b/tests/model-license-set.scm index 9cd949d..c39171f 100644 --- a/tests/model-license-set.scm +++ b/tests/model-license-set.scm @@ -19,10 +19,10 @@ "https://example.com/why-license-1")) (("License 1" "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1") + #f) ("License 2" - "https://gnu.org/licenses/test-2.html" - "https://example.com/why-license-2"))))) + #f + #f))))) (with-postgresql-connection "test-model-license-set" diff --git a/tests/model-license.scm b/tests/model-license.scm index 7888117..8f4b0c3 100644 --- a/tests/model-license.scm +++ b/tests/model-license.scm @@ -17,10 +17,13 @@ "https://example.com/why-license-1")) (("License 1" "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1") + #f) ("License 2" "https://gnu.org/licenses/test-2.html" - "https://example.com/why-license-2"))))) + #f) + ("License 3" + #f + #f))))) (with-postgresql-connection "test-model-license" diff --git a/tests/model-package-metadata.scm b/tests/model-package-metadata.scm index ea0cdbe..015a0b2 100644 --- a/tests/model-package-metadata.scm +++ b/tests/model-package-metadata.scm @@ -18,6 +18,15 @@ (home-page "https://example.com") (location (location "file.scm" 5 0)))) +(define mock-inferior-package-foo-2 + (mock-inferior-package + (name "foo") + (version "2") + (synopsis "Foo") + (description "Foo description") + (home-page #f) + (location #f))) + (define (test-license-set-ids conn) (mock ((guix-data-service model license) @@ -46,7 +55,8 @@ (match (inferior-packages->package-metadata-ids conn - (list mock-inferior-package-foo) + (list mock-inferior-package-foo + mock-inferior-package-foo-2) (test-license-set-ids conn)) ((x) (string? x)))) #:always-rollback? #t)) @@ -57,11 +67,13 @@ (test-equal "inferior-packages->package-metadata-ids" (inferior-packages->package-metadata-ids conn - (list mock-inferior-package-foo) + (list mock-inferior-package-foo + mock-inferior-package-foo-2) (test-license-set-ids conn)) (inferior-packages->package-metadata-ids conn - (list mock-inferior-package-foo) + (list mock-inferior-package-foo + mock-inferior-package-foo-2) (test-license-set-ids conn))) #:always-rollback? #t)))))) |