aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--guix-data-service/model/license.scm57
-rw-r--r--guix-data-service/model/package-metadata.scm22
-rw-r--r--guix-data-service/model/utils.scm8
-rw-r--r--sqitch/deploy/fix_duplicated_licenses.sql218
-rw-r--r--sqitch/revert/fix_duplicated_licenses.sql7
-rw-r--r--sqitch/sqitch.plan1
-rw-r--r--sqitch/verify/fix_duplicated_licenses.sql7
-rw-r--r--tests/model-license-set.scm6
-rw-r--r--tests/model-license.scm7
-rw-r--r--tests/model-package-metadata.scm18
10 files changed, 313 insertions, 38 deletions
diff --git a/guix-data-service/model/license.scm b/guix-data-service/model/license.scm
index 0b28343..73df612 100644
--- a/guix-data-service/model/license.scm
+++ b/guix-data-service/model/license.scm
@@ -43,28 +43,6 @@
(inferior-eval '(use-modules (guix licenses)) inf)
(inferior-eval (proc packages) inf))
-(define (select-licenses license-values)
- (string-append
- "SELECT id, licenses.name, licenses.uri, licenses.comment "
- "FROM licenses "
- "JOIN (VALUES "
- (string-join
- (map (lambda (values)
- (string-append
- "("
- (string-join
- (map value->quoted-string-or-null
- values)
- ", ")
- ")"))
- license-values)
- ", ")
- ") AS vals (name, uri, comment) "
- "ON "
- "licenses.name = vals.name AND "
- "licenses.uri = vals.uri AND "
- "licenses.comment = vals.comment"))
-
(define (insert-licenses values)
(string-append
"INSERT INTO licenses "
@@ -91,15 +69,39 @@
(map number->string
(sort (map string->number ids) <)))
+ (define (non-string-to-false lst)
+ (map (lambda (value)
+ (if (string? value)
+ value
+ #f))
+ lst))
+
+ (define (empty-string-to-false lst)
+ ;; TODO squee returns empty strings for null values, which will probably
+ ;; cause problems
+ (map (lambda (value)
+ (if (string? value)
+ (if (string-null? value)
+ #f
+ value)
+ value))
+ lst))
+
(let* ((unique-license-tuples
(filter (lambda (license-tuple)
(not (null? license-tuple)))
(delete-duplicates
- (concatenate license-data))))
+ (map
+ (lambda (lst)
+ (non-string-to-false
+ (empty-string-to-false lst)))
+ (concatenate license-data)))))
(existing-license-entries
(exec-query->vhash conn
- (select-licenses unique-license-tuples)
- cdr
+ "SELECT id, name, uri, comment FROM licenses"
+ (lambda (vals)
+ (non-string-to-false
+ (empty-string-to-false (cdr vals))))
first)) ;; id
(missing-license-entries
(delete-duplicates
@@ -128,5 +130,8 @@
(begin
(error "missing license entry"
license-values)))))
- license-value-lists)))
+ (map (lambda (lst)
+ (non-string-to-false
+ (empty-string-to-false lst)))
+ license-value-lists))))
license-data)))
diff --git a/guix-data-service/model/package-metadata.scm b/guix-data-service/model/package-metadata.scm
index 37a8da8..178d783 100644
--- a/guix-data-service/model/package-metadata.scm
+++ b/guix-data-service/model/package-metadata.scm
@@ -52,8 +52,14 @@
"ON "
(string-join
(map (lambda (field)
- (string-append
- "package_metadata." field " = vals." field))
+ (if (member field '("home_page" "location_id"
+ "license_set_id"))
+ (string-append
+ "(package_metadata." field " = vals." field " OR "
+ "(package_metadata." field " IS NULL AND"
+ " vals." field " IS NULL))")
+ (string-append
+ "package_metadata." field " = vals." field)))
fields)
" AND ")))
@@ -129,7 +135,8 @@ WHERE packages.id IN (
(map (lambda (package license-set-id)
(list (inferior-package-synopsis package)
(inferior-package-description package)
- (inferior-package-home-page package)
+ (non-empty-string-or-false
+ (inferior-package-home-page package))
(location->location-id
conn
(inferior-package-location package))
@@ -140,7 +147,14 @@ WHERE packages.id IN (
(let* ((existing-package-metadata-entries
(exec-query->vhash conn
(select-package-metadata package-metadata)
- cdr
+ (match-lambda
+ ((id synopsis description home-page
+ location-id license-set-id)
+ (list synopsis
+ description
+ (non-empty-string-or-false home-page)
+ location-id
+ license-set-id)))
first)) ;; id))
(missing-package-metadata-entries
(delete-duplicates
diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm
index 1ece31a..0ce9fea 100644
--- a/guix-data-service/model/utils.scm
+++ b/guix-data-service/model/utils.scm
@@ -5,6 +5,7 @@
#:use-module (squee)
#:export (quote-string
value->quoted-string-or-null
+ non-empty-string-or-false
exec-query->vhash
two-lists->vhash
deduplicate-strings
@@ -18,6 +19,13 @@
(string-append "$STR$" value "$STR$")
"NULL"))
+(define (non-empty-string-or-false s)
+ (if (string? s)
+ (if (string-null? s)
+ #f
+ s)
+ #f))
+
(define (exec-query->vhash conn query field-function value-function)
(fold (lambda (row result)
(vhash-cons (field-function row)
diff --git a/sqitch/deploy/fix_duplicated_licenses.sql b/sqitch/deploy/fix_duplicated_licenses.sql
new file mode 100644
index 0000000..33820ca
--- /dev/null
+++ b/sqitch/deploy/fix_duplicated_licenses.sql
@@ -0,0 +1,218 @@
+-- Deploy guix-data-service:fix_duplicated_licenses to pg
+
+BEGIN;
+
+SET CONSTRAINTS ALL DEFERRED;
+
+-- Remove unique constraint from license_sets
+
+ALTER TABLE license_sets DROP CONSTRAINT license_sets_pkey;
+
+-- Change all license sets to refer to canonical licenses
+
+UPDATE license_sets AS master SET license_ids = ARRAY(
+ SELECT new_licenses_2.id FROM (
+ SELECT a.elem AS id, a.nr AS index
+ FROM license_sets, unnest(license_sets.license_ids)
+ WITH ORDINALITY a(elem, nr)
+ WHERE id = master.id
+ ) AS old_licenses
+ INNER JOIN (
+ SELECT licenses.id AS old_id, new_licenses.*
+ FROM licenses INNER JOIN (
+ SELECT MIN(id) AS id, name, uri, comment
+ FROM licenses
+ GROUP BY name, uri, comment
+ ORDER BY name
+ ) AS new_licenses
+ ON licenses.name = new_licenses.name AND
+ (licenses.uri = new_licenses.uri OR (
+ licenses.uri IS NULL AND new_licenses.uri IS NULL
+ )) AND (
+ licenses.comment = new_licenses.comment OR (
+ licenses.comment IS NULL AND new_licenses.comment IS NULL
+ ))
+ ) AS new_licenses_2
+ ON old_licenses.id = new_licenses_2.old_id
+ ORDER BY old_licenses.index);
+
+-- Remove unique constraint from package_metadata
+
+ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_unique_fields;
+
+-- Update package_metadata to refer to canonical license_sets
+
+WITH data AS (
+ SELECT MIN(id) AS id, ARRAY_AGG(id) AS old_ids
+ FROM license_sets
+ GROUP BY license_ids
+)
+UPDATE package_metadata AS master
+SET license_set_id = data.id
+FROM data
+WHERE license_set_id = ANY(data.old_ids);
+
+-- Remove unique constraint from packages
+
+ALTER TABLE packages DROP CONSTRAINT packages_pkey;
+
+-- Update packages to refer to canonical package_metadata entries
+
+WITH data AS (
+ SELECT MIN(package_metadata.id) AS id, ARRAY_AGG(package_metadata.id) AS old_ids
+ FROM package_metadata
+ GROUP BY package_metadata.synopsis, package_metadata.description,
+ package_metadata.home_page, package_metadata.location_id,
+ package_metadata.license_set_id
+ HAVING COUNT(package_metadata.id) > 1
+)
+UPDATE packages SET package_metadata_id = data.id
+FROM data
+WHERE package_metadata_id = ANY(data.old_ids);
+
+-- Remove unique constraint from package_derivations
+
+ALTER TABLE package_derivations DROP CONSTRAINT package_derivations_pkey;
+
+-- Update package_derivations to refer to canonical packages entries
+
+WITH data AS (
+ SELECT unnest(old_ids) AS old, id FROM (
+ SELECT MIN(packages.id) AS id, ARRAY_AGG(packages.id) AS old_ids
+ FROM packages
+ GROUP BY name, version, package_metadata_id
+ HAVING COUNT(id) > 1
+ ) AS d2
+)
+UPDATE package_derivations SET package_id = data.id
+FROM data
+WHERE package_id = data.old;
+
+-- Update guix_revision_package_derivations to refer to canonical
+-- package_derivations entries
+
+WITH data AS (
+ SELECT unnest(old_ids) AS old, id FROM (
+ SELECT MIN(package_derivations.id) AS id, ARRAY_AGG(package_derivations.id) AS old_ids
+ FROM package_derivations
+ GROUP BY package_id, derivation_id, system, target
+ HAVING COUNT(id) > 1
+ ) AS d2
+)
+UPDATE guix_revision_package_derivations SET package_derivation_id = data.id
+FROM data
+WHERE package_derivation_id = data.old;
+
+-- Drop the foreign key constraint as an attempt to speed up deleting from
+-- package_derivations.
+
+ALTER TABLE guix_revision_package_derivations
+ DROP CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey;
+
+-- Delete non-canonical package_dervations entries
+
+DELETE FROM package_derivations AS pd WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM package_derivations
+ GROUP BY (
+ package_id,
+ derivation_id,
+ system,
+ target
+ )
+);
+
+-- Reinstate the deleted constraint
+
+ALTER TABLE guix_revision_package_derivations
+ ADD CONSTRAINT guix_revision_package_derivations_package_derivation_id_fkey
+ FOREIGN KEY (package_derivation_id) REFERENCES package_derivations(id);
+
+-- Delete non-canonical packages entries
+
+DELETE FROM packages AS p WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM packages
+ GROUP BY (name, version, package_metadata_id)
+);
+
+-- Add referential constraints
+
+ALTER TABLE package_derivations
+ ADD CONSTRAINT package_derivations_package_id_fkey
+ FOREIGN KEY (package_id) REFERENCES packages (id);
+
+ALTER TABLE package_derivations
+ ADD CONSTRAINT package_derivations_derivation_id_fkey
+ FOREIGN KEY (derivation_id) REFERENCES derivations (id);
+
+-- Delete non-canonical package_metadata entries
+
+ALTER TABLE packages DROP CONSTRAINT package_metadata_id;
+
+DELETE FROM package_metadata AS pm WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM package_metadata
+ GROUP BY (synopsis, description, home_page, location_id, license_set_id)
+);
+
+ALTER TABLE packages ADD CONSTRAINT package_metadata_id
+ FOREIGN KEY (package_metadata_id) REFERENCES package_metadata(id);
+
+-- Delete non-canonical license_sets entries
+
+ALTER TABLE package_metadata DROP CONSTRAINT package_metadata_license_set_id_fkey;
+
+DELETE FROM license_sets AS ls WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM license_sets
+ GROUP BY license_ids
+);
+
+ALTER TABLE package_metadata ADD CONSTRAINT package_metadata_license_set_id_fkey
+ FOREIGN KEY (license_set_id) REFERENCES license_sets(id);
+
+-- Delete non-canonical licenses entries
+
+DELETE FROM licenses AS l WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM licenses
+ GROUP BY (name, uri, comment)
+);
+
+-- Restore unique constraints
+
+CREATE UNIQUE INDEX ON licenses (name)
+ WHERE uri IS NULL AND comment IS NULL;
+CREATE UNIQUE INDEX ON licenses (name, uri)
+ WHERE uri IS NOT NULL AND comment IS NULL;
+CREATE UNIQUE INDEX ON licenses (name, comment)
+ WHERE uri IS NULL AND comment IS NOT NULL;
+CREATE UNIQUE INDEX ON licenses (name, uri, comment)
+ WHERE uri IS NOT NULL AND comment IS NOT NULL;
+
+ALTER TABLE license_sets ADD PRIMARY KEY (license_ids);
+
+ALTER TABLE package_metadata ALTER synopsis SET NOT NULL;
+ALTER TABLE package_metadata ALTER description SET NOT NULL;
+
+CREATE UNIQUE INDEX ON package_metadata (
+ synopsis,
+ description,
+ coalesce(location_id, -1),
+ coalesce(license_set_id, -1)
+) WHERE home_page IS NULL;
+
+CREATE UNIQUE INDEX ON package_metadata (
+ synopsis,
+ description,
+ home_page,
+ coalesce(location_id, -1),
+ coalesce(license_set_id, -1)
+) WHERE home_page IS NOT NULL;
+
+ALTER TABLE packages ADD PRIMARY KEY (name, version, package_metadata_id);
+
+ALTER TABLE package_derivations ADD PRIMARY KEY (package_id, derivation_id, system, target);
+
+COMMIT;
diff --git a/sqitch/revert/fix_duplicated_licenses.sql b/sqitch/revert/fix_duplicated_licenses.sql
new file mode 100644
index 0000000..cec3d71
--- /dev/null
+++ b/sqitch/revert/fix_duplicated_licenses.sql
@@ -0,0 +1,7 @@
+-- Revert guix-data-service:fix_duplicated_licenses from pg
+
+BEGIN;
+
+-- XXX Add DDLs here.
+
+COMMIT;
diff --git a/sqitch/sqitch.plan b/sqitch/sqitch.plan
index 7a37548..bd513cc 100644
--- a/sqitch/sqitch.plan
+++ b/sqitch/sqitch.plan
@@ -15,3 +15,4 @@ dates_to_load_new_guix_revision_jobs 2019-06-02T07:39:49Z Christopher Baines <ma
load_new_guix_revision_job_events 2019-06-02T15:44:41Z Christopher Baines <mail@cbaines.net> # Add new table for guix_revision_job_events
load_new_guix_revision_job_logs 2019-06-21T14:33:09Z chris <chris@phact> # Add load_new_guix_revision_job_logs
change_load_new_guix_revision_job_logs_contents_to_be_nullable 2019-07-07T20:10:54Z Christopher Baines <mail@cbaines.net> # Change the contents field in the load_new_guix_revision_job_logs table\nto be nullable.\n\nwith '#' will # be ignored, and an empty message aborts the add. #\nChange to add: # #\nchange_load_new_guix_revision_job_logs_contents_to_be_nullable #\nsqitch/deploy/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql\nsqitch/revert/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql\nsqitch/verify/change_load_new_guix_revision_job_logs_contents_to_be_nullable.sql
+fix_duplicated_licenses 2019-07-30T05:48:17Z Christopher Baines <mail@cbaines.net> # Fix duplicated licenses, and add constraints
diff --git a/sqitch/verify/fix_duplicated_licenses.sql b/sqitch/verify/fix_duplicated_licenses.sql
new file mode 100644
index 0000000..47379eb
--- /dev/null
+++ b/sqitch/verify/fix_duplicated_licenses.sql
@@ -0,0 +1,7 @@
+-- Verify guix-data-service:fix_duplicated_licenses on pg
+
+BEGIN;
+
+-- XXX Add verifications here.
+
+ROLLBACK;
diff --git a/tests/model-license-set.scm b/tests/model-license-set.scm
index 9cd949d..c39171f 100644
--- a/tests/model-license-set.scm
+++ b/tests/model-license-set.scm
@@ -19,10 +19,10 @@
"https://example.com/why-license-1"))
(("License 1"
"https://gnu.org/licenses/test-1.html"
- "https://example.com/why-license-1")
+ #f)
("License 2"
- "https://gnu.org/licenses/test-2.html"
- "https://example.com/why-license-2")))))
+ #f
+ #f)))))
(with-postgresql-connection
"test-model-license-set"
diff --git a/tests/model-license.scm b/tests/model-license.scm
index 7888117..8f4b0c3 100644
--- a/tests/model-license.scm
+++ b/tests/model-license.scm
@@ -17,10 +17,13 @@
"https://example.com/why-license-1"))
(("License 1"
"https://gnu.org/licenses/test-1.html"
- "https://example.com/why-license-1")
+ #f)
("License 2"
"https://gnu.org/licenses/test-2.html"
- "https://example.com/why-license-2")))))
+ #f)
+ ("License 3"
+ #f
+ #f)))))
(with-postgresql-connection
"test-model-license"
diff --git a/tests/model-package-metadata.scm b/tests/model-package-metadata.scm
index ea0cdbe..015a0b2 100644
--- a/tests/model-package-metadata.scm
+++ b/tests/model-package-metadata.scm
@@ -18,6 +18,15 @@
(home-page "https://example.com")
(location (location "file.scm" 5 0))))
+(define mock-inferior-package-foo-2
+ (mock-inferior-package
+ (name "foo")
+ (version "2")
+ (synopsis "Foo")
+ (description "Foo description")
+ (home-page #f)
+ (location #f)))
+
(define (test-license-set-ids conn)
(mock
((guix-data-service model license)
@@ -46,7 +55,8 @@
(match
(inferior-packages->package-metadata-ids
conn
- (list mock-inferior-package-foo)
+ (list mock-inferior-package-foo
+ mock-inferior-package-foo-2)
(test-license-set-ids conn))
((x) (string? x))))
#:always-rollback? #t))
@@ -57,11 +67,13 @@
(test-equal "inferior-packages->package-metadata-ids"
(inferior-packages->package-metadata-ids
conn
- (list mock-inferior-package-foo)
+ (list mock-inferior-package-foo
+ mock-inferior-package-foo-2)
(test-license-set-ids conn))
(inferior-packages->package-metadata-ids
conn
- (list mock-inferior-package-foo)
+ (list mock-inferior-package-foo
+ mock-inferior-package-foo-2)
(test-license-set-ids conn)))
#:always-rollback? #t))))))