From 5ed98343d78ea2f0e51e3d53a1d0807d01c5cab7 Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Thu, 5 Dec 2024 20:56:23 +0000 Subject: Rework loading revision data These changes were motivated by switching to a mechanism of loading data that isn't dependent on the big advisory lock that prevents more than one revision from being processed at a time. Since INSERT ... RETURNING id; is used, this can block if another transaction inserts the same data, and then cause an error when that transaction commits. The solution is to use ON CONFLICT DO NOTHING, but you have to handle the case when the INSERT doesn't return an id since the other transaction has inserted it. This commit rewrites insert-missing-data-and-return-all-ids to do as described above, as well as being more efficient in how existing data is detected and to use more vectors. Other utilities for inserting data are added as well. --- guix-data-service/jobs/load-new-guix-revision.scm | 277 +++++---- guix-data-service/model/build.scm | 27 +- guix-data-service/model/channel-news.scm | 117 ++-- guix-data-service/model/derivation.scm | 132 ++-- guix-data-service/model/git-branch.scm | 16 +- guix-data-service/model/guix-revision.scm | 5 +- guix-data-service/model/license-set.scm | 88 +-- guix-data-service/model/license.scm | 36 +- guix-data-service/model/lint-checker.scm | 57 +- guix-data-service/model/lint-warning-message.scm | 53 +- guix-data-service/model/lint-warning.scm | 32 +- guix-data-service/model/location.scm | 15 +- guix-data-service/model/nar.scm | 105 ++-- .../package-derivation-by-guix-revision-range.scm | 4 +- guix-data-service/model/package-derivation.scm | 5 +- guix-data-service/model/package-metadata.scm | 203 ++----- guix-data-service/model/package.scm | 17 - guix-data-service/model/system.scm | 21 +- guix-data-service/model/utils.scm | 663 +++++++++++---------- tests/model-license-set.scm | 18 +- tests/model-license.scm | 24 +- tests/model-lint-checker.scm | 37 +- tests/model-lint-warning-message.scm | 12 +- tests/model-package-metadata.scm | 23 +- tests/model-package.scm | 50 +- 25 files changed, 881 insertions(+), 1156 deletions(-) diff --git a/guix-data-service/jobs/load-new-guix-revision.scm b/guix-data-service/jobs/load-new-guix-revision.scm index 1cad403..3941380 100644 --- a/guix-data-service/jobs/load-new-guix-revision.scm +++ b/guix-data-service/jobs/load-new-guix-revision.scm @@ -846,95 +846,84 @@ conn (inferior-packages->license-id-lists conn - ;; TODO Don't needlessly convert - (vector->list - (assq-ref inferior-packages-data 'license-data)))))) + (assq-ref inferior-packages-data 'license-data))))) (all-package-metadata-ids new-package-metadata-ids (with-time-logging "inserting package metadata entries" (inferior-packages->package-metadata-ids conn - ;; TODO Don't needlessly convert - (vector->list - (assq-ref inferior-packages-data 'metadata)) + (assq-ref inferior-packages-data 'metadata) package-license-set-ids))) (replacement-package-ids (vector-map (lambda (_ package-index-or-false) (if package-index-or-false - (first + (vector-ref (inferior-packages->package-ids conn - (list (list (vector-ref names package-index-or-false) - (vector-ref versions package-index-or-false) - (list-ref all-package-metadata-ids - package-index-or-false) - (cons "integer" NULL))))) + (vector + (list (vector-ref names package-index-or-false) + (vector-ref versions package-index-or-false) + (vector-ref all-package-metadata-ids + package-index-or-false) + (cons "integer" NULL)))) + 0) (cons "integer" NULL))) (assq-ref inferior-packages-data 'replacements)))) - (unless (null? new-package-metadata-ids) + (unless (= 0 (vector-length new-package-metadata-ids)) (with-time-logging "inserting package metadata tsvector entries" (insert-package-metadata-tsvector-entries conn new-package-metadata-ids))) (with-time-logging "getting package-ids (without replacements)" - (list->vector - (inferior-packages->package-ids - conn - ;; TODO Do this more efficiently - (zip (vector->list names) - (vector->list versions) - all-package-metadata-ids - (vector->list replacement-package-ids))))))) + (inferior-packages->package-ids + conn + ;; Similar to zip, but generating a vector of lists + (vector-map (lambda (index . vals) vals) + names + versions + all-package-metadata-ids + replacement-package-ids))))) (define (insert-lint-warnings conn package-ids lint-checker-ids lint-warnings-data) - (lint-warnings-data->lint-warning-ids - conn - (append-map! + (concatenate! + (filter-map (lambda (lint-checker-id warnings-per-package) (if warnings-per-package (vector-fold (lambda (_ result package-id warnings) - (append! - result - (map - (match-lambda - ((location-data messages-by-locale) - (let ((location-id - (location->location-id - conn - (apply location location-data))) - (lint-warning-message-set-id - (lint-warning-message-data->lint-warning-message-set-id - conn - messages-by-locale))) - (list lint-checker-id - package-id - location-id - lint-warning-message-set-id)))) - (fold (lambda (location-and-messages result) - ;; TODO Sort to delete duplicates, rather than use member - (if (member location-and-messages result) - (begin - (apply - simple-format - (current-error-port) - "warning: skipping duplicate lint warning ~A ~A\n" - location-and-messages) - result) - (append! result - (list location-and-messages)))) - '() - warnings)))) + (if (null? warnings) + result + (cons + (lint-warnings-data->lint-warning-ids + conn + (list->vector + (map + (match-lambda + ((location-data messages-by-locale) + (let ((location-id + (location->location-id + conn + (apply location location-data))) + (lint-warning-message-set-id + (lint-warning-message-data->lint-warning-message-set-id + conn + messages-by-locale))) + (list lint-checker-id + package-id + location-id + lint-warning-message-set-id)))) + warnings))) + result))) '() package-ids warnings-per-package) - '())) - lint-checker-ids + #f)) + (vector->list lint-checker-ids) lint-warnings-data))) (define (update-derivation-ids-hash-table! conn @@ -977,30 +966,25 @@ #:key (log-tag "unspecified")) (define (insert-into-derivations conn drvs) - (string-append - "INSERT INTO derivations " - "(file_name, builder, args, env_vars, system_id) VALUES " - (string-join - (map (match-lambda - (($ outputs inputs sources - system builder args env-vars file-name) - (simple-format - #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')" - file-name - builder - (string-join (map quote-string args) ",") - (string-join (map (match-lambda - ((key . value) - (string-append - "['" key '"', $$" - value "$$ ]"))) - env-vars) - ",") - (system->system-id conn system)))) - drvs) - ",") - " RETURNING id" - ";")) + (insert-missing-data-and-return-all-ids + conn + "derivations" + '(file_name builder args env_vars system_id) + (vector-map (match-lambda* + ((_ ($ outputs inputs sources + system builder args env-vars file-name)) + (list file-name + builder + (cons "varchar[]" + (list->vector args)) + (cons "varchar[][]" + (list->vector + (map (match-lambda + ((key . value) + (vector key value))) + env-vars))) + (system->system-id conn system)))) + drvs))) (define (insert-derivations) (with-resource-from-pool postgresql-connection-pool conn @@ -1021,45 +1005,41 @@ (let ((derivations ;; Do this while holding the PostgreSQL connection to ;; avoid conflicts with other fibers - (delete-duplicates - (filter-map (lambda (derivation) - (if (hash-ref derivation-ids-hash-table - (derivation-file-name - derivation)) - #f - derivation)) - unfiltered-derivations)))) - (if (null? derivations) - (values '() '()) + (list->vector + (delete-duplicates + (filter-map (lambda (derivation) + (if (hash-ref derivation-ids-hash-table + (derivation-file-name + derivation)) + #f + derivation)) + unfiltered-derivations))))) + (if (= 0 (vector-length derivations)) + (values #() #()) (begin (simple-format (current-error-port) "insert-missing-derivations: inserting ~A derivations (~A)\n" - (length derivations) + (vector-length derivations) log-tag) (let ((derivation-ids - (append-map! - (lambda (chunk) - (map (lambda (result) - (string->number (car result))) - (exec-query conn (insert-into-derivations conn chunk)))) - (chunk derivations 500)))) + (insert-into-derivations conn derivations))) ;; Do this while holding the connection so that other ;; fibers don't also try inserting the same derivations (with-time-logging (string-append "insert-missing-derivations: updating hash table (" log-tag ")") - (for-each (lambda (derivation derivation-id) - (hash-set! derivation-ids-hash-table - (derivation-file-name derivation) - derivation-id)) + (vector-for-each (lambda (_ derivation derivation-id) + (hash-set! derivation-ids-hash-table + (derivation-file-name derivation) + derivation-id)) derivations derivation-ids)) (simple-format (current-error-port) "insert-missing-derivations: finished inserting ~A derivations (~A)\n" - (length derivations) + (vector-length derivations) log-tag) (values derivations @@ -1137,10 +1117,10 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" hash compressed-nar-bytevector uncompressed-size)))))))) - sources-ids + (vector->list sources-ids) sources))))) - derivation-ids - derivations))) + (vector->list derivation-ids) + (vector->list derivations)))) (let ((derivations derivation-ids @@ -1154,12 +1134,13 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (string-append "insert-missing-derivations: inserting outputs (" log-tag ")") (with-resource-from-pool postgresql-connection-pool conn - (for-each (lambda (derivation-id derivation) - (insert-derivation-outputs conn - derivation-id - (derivation-outputs derivation))) - derivation-ids - derivations))) + (vector-for-each + (lambda (_ derivation-id derivation) + (insert-derivation-outputs conn + derivation-id + (derivation-outputs derivation))) + derivation-ids + derivations))) (with-time-logging (string-append @@ -1169,7 +1150,7 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (map derivation-input-derivation (append-map derivation-inputs - derivations)))) + (vector->list derivations))))) (unless (null? input-derivations) ;; Ensure all the input derivations exist (for-each @@ -1182,12 +1163,14 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" #:log-tag log-tag)) (chunk! input-derivations 1000)))))) - (string-append "insert-missing-derivations: done parallel (" log-tag ")") + (simple-format + (current-error-port) + "debug: insert-missing-derivations: done parallel (~A)\n" log-tag) (with-resource-from-pool postgresql-connection-pool conn (with-time-logging (simple-format #f "insert-missing-derivations: inserting inputs for ~A derivations (~A)" - (length derivations) + (vector-length derivations) log-tag) (insert-derivation-inputs conn derivation-ids @@ -1913,25 +1896,28 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (define inferior-lint-checkers-data (call-with-inferior (lambda (inferior inferior-store) - (inferior-lint-checkers inferior)))) + (list->vector + (inferior-lint-checkers inferior))))) (when inferior-lint-checkers-data (letpar& ((lint-checker-ids (with-resource-from-pool postgresql-connection-pool conn (lint-checkers->lint-checker-ids conn - (map (match-lambda - ((name descriptions-by-locale network-dependent) - (list - name - network-dependent - ;; Uses of sort may cause problems - (call-with-worker-thread - utility-thread-channel - (lambda () - (lint-checker-description-data->lint-checker-description-set-id - conn descriptions-by-locale)))))) - inferior-lint-checkers-data)))) + (vector-map + (match-lambda* + ((_ (name descriptions-by-locale network-dependent)) + (list + name + network-dependent + ;; Uses of sort may cause problems + (call-with-worker-thread + utility-thread-channel + (lambda () + (lint-checker-description-data->lint-checker-description-set-id + conn + descriptions-by-locale)))))) + inferior-lint-checkers-data)))) (lint-warnings-data (fibers-map (match-lambda @@ -1946,7 +1932,8 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (inferior-lint-warnings inferior inferior-store checker-name))))))) - inferior-lint-checkers-data))) + (vector->list + inferior-lint-checkers-data)))) (let ((package-ids (fibers-force package-ids-promise))) (with-resource-from-pool postgresql-connection-pool conn @@ -1955,20 +1942,21 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (fibers-force guix-revision-id-promise) lint-checker-ids) - (let ((lint-warning-ids - (insert-lint-warnings + (let ((lint-warning-id-vectors + (with-time-logging "inserting lint warnings" + (insert-lint-warnings + conn + package-ids + lint-checker-ids + lint-warnings-data)))) + (with-time-logging "inserting guix revision lint warnings" + (for-each + (lambda (lint-warning-ids) + (insert-guix-revision-lint-warnings conn - package-ids - lint-checker-ids - lint-warnings-data))) - (chunk-for-each! - (lambda (lint-warning-ids-chunk) - (insert-guix-revision-lint-warnings - conn - (fibers-force guix-revision-id-promise) - lint-warning-ids-chunk)) - 5000 - lint-warning-ids))))))) + (fibers-force guix-revision-id-promise) + lint-warning-ids)) + lint-warning-id-vectors)))))))) (define (extract-and-store-package-derivations) (define packages-count @@ -2113,7 +2101,8 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" guix-revision-id package-derivation-ids-chunk))) 2000 - package-derivation-ids))) + ;; TODO Chunk more efficiently + (vector->list package-derivation-ids)))) 'finished) @@ -2386,7 +2375,7 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" (with-time-logging "updating builds.derivation_output_details_set_id" (update-builds-derivation-output-details-set-id conn - (string->number (fibers-force guix-revision-id-promise))))) + (fibers-force guix-revision-id-promise)))) (begin (simple-format #t "Failed to generate store item for ~A\n" commit) diff --git a/guix-data-service/model/build.scm b/guix-data-service/model/build.scm index 3822459..acf745a 100644 --- a/guix-data-service/model/build.scm +++ b/guix-data-service/model/build.scm @@ -510,19 +510,20 @@ WHERE derivations.file_name = $1" derivation-output-details-lists build-server-build-ids) (let ((build-ids - (insert-missing-data-and-return-all-ids - conn - "builds" - '(build_server_id derivation_file_name build_server_build_id) - (map (lambda (derivation-file-name build-server-build-id) - (list build-server-id - derivation-file-name - (if (string? build-server-build-id) - build-server-build-id - '()))) - derivation-file-names - build-server-build-ids) - #:delete-duplicates? #t))) + (vector->list + (insert-missing-data-and-return-all-ids + conn + "builds" + '(build_server_id derivation_file_name build_server_build_id) + (list->vector + (map (lambda (derivation-file-name build-server-build-id) + (list build-server-id + derivation-file-name + (if (string? build-server-build-id) + build-server-build-id + '()))) + derivation-file-names + build-server-build-ids)))))) (for-each (lambda (build-id derivation-output-details) diff --git a/guix-data-service/model/channel-news.scm b/guix-data-service/model/channel-news.scm index 6bc6842..4bb5625 100644 --- a/guix-data-service/model/channel-news.scm +++ b/guix-data-service/model/channel-news.scm @@ -76,62 +76,21 @@ SELECT channel_news_entries.commit, conn "channel_news_entry_text" '(lang text) - (map (match-lambda - ((lang . text) - (list lang text))) - text))) + (list->vector + (map (match-lambda + ((lang . text) + (list lang text))) + text)))) (define (insert-channel-news-entry conn commit tag) - (match (exec-query - conn - (string-append - "INSERT INTO channel_news_entries (commit, tag) VALUES (" - (value->quoted-string-or-null commit) - "," - (value->quoted-string-or-null tag) - ") RETURNING id")) - (((id)) - (string->number id)))) + (insert-and-return-id + conn + "channel_news_entries" + '(commit tag) + (list (or commit NULL) + (or tag NULL)))) (define (insert-channel-news-entries conn channel-news-entries) - (define select-channel-news-entries - " -SELECT channel_news_entries.id, - channel_news_entries.commit, - channel_news_entries.tag, - ( - SELECT ARRAY_AGG( - channel_news_entry_titles.channel_news_entry_text_id - ORDER BY channel_news_entry_titles.channel_news_entry_text_id - ) - FROM channel_news_entry_titles - WHERE channel_news_entry_id = channel_news_entries.id - ) AS title_text, - ( - SELECT ARRAY_AGG( - channel_news_entry_bodies.channel_news_entry_text_id - ORDER BY channel_news_entry_bodies.channel_news_entry_text_id - ) - FROM channel_news_entry_bodies - WHERE channel_news_entry_id = channel_news_entries.id - ) AS body_text -FROM channel_news_entries -ORDER BY id") - - (define existing - (exec-query->vhash conn - select-channel-news-entries - (match-lambda - ((_ commit tag title-ids body-ids) - (list commit - tag - (map string->number - (parse-postgresql-array-string title-ids)) - (map string->number - (parse-postgresql-array-string body-ids))))) - (lambda (result) - (string->number (first result))))) - (map (lambda (entry) (let ((commit (channel-news-entry-commit entry)) @@ -145,36 +104,29 @@ ORDER BY id") conn (channel-news-entry-body entry)) <))) - (or (and=> (vhash-assoc (list (or commit '()) - (or tag '()) - title-ids - body-ids) - existing) - (match-lambda - ((value . key) - key))) - (let ((channel-news-entry-id - (insert-channel-news-entry conn commit tag))) - (for-each - (lambda (table ids) - (exec-query - conn - (string-append - "INSERT INTO " table - " VALUES " - (string-join - (map (lambda (id) - (simple-format #f "(~A, ~A)" - channel-news-entry-id - id)) - ids) - ", ")))) - '("channel_news_entry_titles" - "channel_news_entry_bodies") - (list title-ids - body-ids)) + (let ((channel-news-entry-id + (insert-channel-news-entry conn commit tag))) + (for-each + (lambda (table ids) + (exec-query + conn + (string-append + "INSERT INTO " table + " VALUES " + (string-join + (map (lambda (id) + (simple-format #f "(~A, ~A)" + channel-news-entry-id + id)) + (vector->list ids)) + ", ") + " ON CONFLICT DO NOTHING"))) + '("channel_news_entry_titles" + "channel_news_entry_bodies") + (list title-ids + body-ids)) - channel-news-entry-id)))) + channel-news-entry-id))) channel-news-entries)) (define (insert-channel-news-entries-for-guix-revision @@ -194,5 +146,6 @@ ORDER BY id") (simple-format #f "(~A,~A,~A)" guix-revision-id id index)) channel-news-entry-ids (iota (length channel-news-entries))) - ", "))))) + ", ") + " ON CONFLICT DO NOTHING")))) #t) diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm index 547c682..32de464 100644 --- a/guix-data-service/model/derivation.scm +++ b/guix-data-service/model/derivation.scm @@ -982,76 +982,30 @@ LOCK TABLE ONLY derivation_output_details conn "derivation_output_details" '(path hash_algorithm hash recursive) - (map (lambda (details) - (list (assq-ref details 'path) - (or (non-empty-string-or-false - (assq-ref details 'hash_algorithm)) - NULL) - (or (non-empty-string-or-false - (assq-ref details 'hash)) - NULL) - (assq-ref details 'recursive))) - derivation-output-details))) + (list->vector + (map (lambda (details) + (list (assq-ref details 'path) + (or (non-empty-string-or-false + (assq-ref details 'hash_algorithm)) + NULL) + (or (non-empty-string-or-false + (assq-ref details 'hash)) + NULL) + (assq-ref details 'recursive))) + derivation-output-details)))) (define (derivation-output-details-ids->derivation-output-details-set-id conn derivation-output-details-ids) - (define sorted-derivation-output-details-ids - (sort derivation-output-details-ids <)) - - (define (select-derivation-output-details-sets-id) - (match (exec-query - conn - (string-append - " -SELECT id -FROM derivation_output_details_sets -WHERE derivation_output_details_ids = ARRAY[" - (string-join (map number->string - sorted-derivation-output-details-ids) - ",") - "]")) - (((id)) - (string->number id)) - (_ #f))) - - (define (insert-into-derivation-output-details-sets) - (match (exec-query - conn - (string-append - " -INSERT INTO derivation_output_details_sets (derivation_output_details_ids) -VALUES (ARRAY[" - (string-join (map number->string - sorted-derivation-output-details-ids) - ",") - "]) -RETURNING id")) - (((id)) - (string->number id)))) - - (or (select-derivation-output-details-sets-id) - (insert-into-derivation-output-details-sets))) + (insert-and-return-id + conn + "derivation_output_details_sets" + '(derivation_output_details_ids) + (list (sort derivation-output-details-ids <)))) (define (insert-derivation-outputs conn derivation-id names-and-derivation-outputs) - (define (insert-into-derivation-outputs output-names - derivation-output-details-ids) - (string-append "INSERT INTO derivation_outputs " - "(derivation_id, name, derivation_output_details_id) VALUES " - (string-join - (map (lambda (output-name derivation-output-details-id) - (simple-format - #f "(~A, '~A', ~A)" - derivation-id - output-name - derivation-output-details-id)) - output-names - derivation-output-details-ids) - ",") - ";")) - (define (insert-into-derivations-by-output-details-set derivation_output_details_set_id) (exec-query @@ -1059,13 +1013,16 @@ RETURNING id")) " INSERT INTO derivations_by_output_details_set (derivation_id, derivation_output_details_set_id) -VALUES ($1, $2)" +VALUES ($1, $2) +ON CONFLICT DO NOTHING" (list (number->string derivation-id) (number->string derivation_output_details_set_id)))) - (let* ((derivation-outputs (map cdr names-and-derivation-outputs)) - (derivation-output-paths (map derivation-output-path - derivation-outputs)) +(let* ((derivation-outputs + (map cdr names-and-derivation-outputs)) + (derivation-output-paths + (map derivation-output-path + derivation-outputs)) (derivation-output-names (map car names-and-derivation-outputs)) @@ -1083,10 +1040,17 @@ VALUES ($1, $2)" (recursive . ,recursive?)))) derivation-outputs)))) - (exec-query conn - (insert-into-derivation-outputs - derivation-output-names - derivation-output-details-ids)) + (insert-missing-data + conn + "derivation_outputs" + '(derivation_id name derivation_output_details_id) + (list->vector + (map (lambda (output-name derivation-output-details-id) + (list derivation-id + output-name + derivation-output-details-id)) + derivation-output-names + (vector->list derivation-output-details-ids)))) (insert-into-derivations-by-output-details-set (derivation-output-details-ids->derivation-output-details-set-id @@ -1434,8 +1398,8 @@ WHERE derivation_source_files.store_path = $1" "', '" sub-derivation "')")) sub-derivations)))) (derivation-inputs derivation))) - derivation-ids - derivations))) + (vector->list derivation-ids) + (vector->list derivations)))) (chunk-for-each! (lambda (query-parts-chunk) @@ -1452,18 +1416,11 @@ INNER JOIN derivations ON derivations.file_name = vals.file_name INNER JOIN derivation_outputs ON derivation_outputs.derivation_id = derivations.id - AND vals.output_name = derivation_outputs.name"))) + AND vals.output_name = derivation_outputs.name +ON CONFLICT DO NOTHING"))) 1000 query-parts))) -(define (select-from-derivation-source-files store-paths) - (string-append - "SELECT id, store_path FROM derivation_source_files " - "WHERE store_path IN (" - (string-join (map quote-string store-paths) - ",") - ");")) - (define (insert-derivation-sources conn derivation-id sources) (define (insert-into-derivation-sources derivation-source-file-ids) (string-append @@ -1473,16 +1430,17 @@ INNER JOIN derivation_outputs (map (lambda (derivation-source-file-id) (simple-format #f "(~A, ~A)" derivation-id derivation-source-file-id)) - derivation-source-file-ids) + (vector->list derivation-source-file-ids)) ",") - ";")) + "ON CONFLICT DO NOTHING;")) (let ((sources-ids (insert-missing-data-and-return-all-ids conn "derivation_source_files" '(store_path) - (map list sources)))) + (list->vector + (map list sources))))) (exec-query conn (insert-into-derivation-sources sources-ids)) @@ -1501,7 +1459,8 @@ INSERT INTO derivation_source_file_nars ( hash, uncompressed_size, data -) VALUES ($1, $2, $3, $4, $5, $6)" +) VALUES ($1, $2, $3, $4, $5, $6) +ON CONFLICT DO NOTHING" (list (number->string id) "lzip" "sha256" @@ -1520,7 +1479,8 @@ INSERT INTO derivation_source_file_nars ( hash, uncompressed_size, data -) VALUES ($1, $2, $3, $4, $5, $6)" +) VALUES ($1, $2, $3, $4, $5, $6) +ON CONFLICT DO NOTHING" (list (number->string id) "lzip" "sha256" diff --git a/guix-data-service/model/git-branch.scm b/guix-data-service/model/git-branch.scm index a800e8f..c4b56c0 100644 --- a/guix-data-service/model/git-branch.scm +++ b/guix-data-service/model/git-branch.scm @@ -47,16 +47,12 @@ WHERE git_repository_id = $1 (define (insert-git-branch-entry conn git-repository-id name) - (match (exec-query - conn - " -INSERT INTO git_branches (git_repository_id, name) -VALUES ($1, $2) -RETURNING id" - (list (number->string git-repository-id) - name)) - (((id)) - (string->number id)))) + (insert-and-return-id + conn + "git_branches" + '(git_repository_id name) + (list git-repository-id + name))) (define (git-branches-for-commit conn commit) (define query diff --git a/guix-data-service/model/guix-revision.scm b/guix-data-service/model/guix-revision.scm index 8bb20da..21e20a5 100644 --- a/guix-data-service/model/guix-revision.scm +++ b/guix-data-service/model/guix-revision.scm @@ -56,8 +56,7 @@ FROM guix_revisions WHERE commit = $1 AND git_repository_id = $2" (list commit git-repository-id)) - (((id)) - id) + (((id)) (string->number id)) (() #f))) (define (insert-guix-revision conn git-repository-id commit) @@ -67,7 +66,7 @@ INSERT INTO guix_revisions (git_repository_id, commit) VALUES ($1, $2) RETURNING id") (match (exec-query conn insert (list git-repository-id commit)) - (((id)) id))) + (((id)) (string->number id)))) (define (guix-commit-exists? conn commit) (define query diff --git a/guix-data-service/model/license-set.scm b/guix-data-service/model/license-set.scm index 102d838..fe4272b 100644 --- a/guix-data-service/model/license-set.scm +++ b/guix-data-service/model/license-set.scm @@ -17,6 +17,7 @@ (define-module (guix-data-service model license-set) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-43) #:use-module (ice-9 vlist) #:use-module (squee) #:use-module (guix-data-service utils) @@ -24,83 +25,12 @@ #:use-module (guix-data-service model license) #:export (inferior-packages->license-set-ids)) -(define select-license-sets - " -SELECT id, license_ids -FROM license_sets") - -(define (insert-license-sets license-id-lists) - (string-append - "INSERT INTO license_sets (license_ids) VALUES " - (string-join - (map (lambda (license-ids) - (string-append - "('{" - (string-join - (map number->string - (sort license-ids <)) - ", ") - "}')")) - license-id-lists) - ", ") - " RETURNING id")) - (define (inferior-packages->license-set-ids conn license-id-lists) - (let* ((existing-license-sets - (exec-query->vhash conn - select-license-sets - (lambda (results) - (if (string=? (second results) "{}") - '() - (map - string->number - (string-split - (string-drop-right - (string-drop (second results) 1) - 1) - #\,)))) - (lambda (result) - (string->number (first result))))) ;; id - (missing-license-sets - (delete-duplicates/sort! - ;; Use filter! with list-copy, as filter may return a list that - ;; shares a portion of the input list, and therefore could be at - ;; risk of being modified when deleting duplicates - (filter! (lambda (license-set-license-ids) - (not (vhash-assoc license-set-license-ids - existing-license-sets))) - (list-copy license-id-lists)) - (lambda (full-a full-b) - (let loop ((a full-a) - (b full-b)) - (cond - ((null? a) #f) - ((null? b) #t) - (else - (let ((a1 (car a)) - (b1 (car b))) - (if (= a1 b1) - (loop (cdr a) - (cdr b)) - (< a1 b1))))))))) - (new-license-set-entries - (if (null? missing-license-sets) - '() - (map (lambda (result) - (string->number (first result))) - (exec-query conn - (insert-license-sets missing-license-sets))))) - (new-entries-id-lookup-vhash - (two-lists->vhash missing-license-sets - new-license-set-entries))) - - (map (lambda (license-id-list) - (cdr - (or (vhash-assoc license-id-list - existing-license-sets) - (vhash-assoc license-id-list - new-entries-id-lookup-vhash) - (begin - (error "missing license set entry" - license-id-list))))) - license-id-lists))) + (insert-missing-data-and-return-all-ids + conn + "license_sets" + '(license_ids) + (vector-map + (lambda (_ license-ids) + (list (sort license-ids <))) + license-id-lists))) diff --git a/guix-data-service/model/license.scm b/guix-data-service/model/license.scm index 452a97b..f16634d 100644 --- a/guix-data-service/model/license.scm +++ b/guix-data-service/model/license.scm @@ -17,6 +17,7 @@ (define-module (guix-data-service model license) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-43) #:use-module (ice-9 vlist) #:use-module (ice-9 match) #:use-module (squee) @@ -50,7 +51,7 @@ (current-error-port) "error: unknown license value ~A for package ~A" x package) - '())) + #f)) values)) (x (simple-format @@ -70,18 +71,21 @@ ;; save non string values as NULL NULL)) - (insert-missing-data-and-return-all-ids - conn - "licenses" - `(name uri comment) - (map (lambda (license-tuples) - (map - (match-lambda - ((name uri comment) - (list name - (string-or-null uri) - (string-or-null comment)))) - license-tuples)) - license-data) - #:delete-duplicates? #t - #:sets-of-data? #t)) + (vector-map + (lambda (_ license-tuples) + (if (null? license-tuples) + #() + (insert-missing-data-and-return-all-ids + conn + "licenses" + `(name uri comment) + (list->vector + (filter-map + (match-lambda + ((name uri comment) + (list name + (string-or-null uri) + (string-or-null comment))) + (#f #f)) + license-tuples))))) + license-data)) diff --git a/guix-data-service/model/lint-checker.scm b/guix-data-service/model/lint-checker.scm index be3cfc6..4ee6521 100644 --- a/guix-data-service/model/lint-checker.scm +++ b/guix-data-service/model/lint-checker.scm @@ -17,6 +17,7 @@ (define-module (guix-data-service model lint-checker) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-43) #:use-module (ice-9 match) #:use-module (squee) #:use-module (guix-data-service model utils) @@ -24,8 +25,7 @@ lint-warning-count-by-lint-checker-for-revision insert-guix-revision-lint-checkers lint-checkers-for-revision - lint-checker-description-data->lint-checker-description-set-id - insert-lint-checker-description-set)) + lint-checker-description-data->lint-checker-description-set-id)) (define (lint-checkers->lint-checker-ids conn lint-checkers-data) (insert-missing-data-and-return-all-ids @@ -40,48 +40,23 @@ conn "lint_checker_descriptions" '(locale description) - (map (match-lambda - ((locale . description) - (list locale description))) - descriptions-by-locale))) - -(define (insert-lint-checker-description-set conn lint-description-ids) - (let ((query - (string-append - "INSERT INTO lint_checker_description_sets (description_ids) VALUES " - (string-append - "('{" - (string-join - (map number->string - (sort lint-description-ids <)) - ", ") - "}')") - " RETURNING id"))) - (match (exec-query conn query) - (((id)) id)))) + (list->vector + (map (match-lambda + ((locale . description) + (list locale description))) + descriptions-by-locale)))) (define (lint-checker-description-data->lint-checker-description-set-id conn descriptions-by-locale) - (let* ((lint-checker-description-ids - (lint-checker-description-data->lint-checker-description-ids - conn - descriptions-by-locale)) - (lint-checker-description-set-id - (exec-query - conn - (string-append - "SELECT id FROM lint_checker_description_sets" - " WHERE description_ids = ARRAY[" - (string-join (map number->string - (sort lint-checker-description-ids <)) ", ") - "]")))) - (string->number - (match lint-checker-description-set-id - (((id)) id) - (() - (insert-lint-checker-description-set conn lint-checker-description-ids)))))) - + (insert-and-return-id + conn + "lint_checker_description_sets" + '(description_ids) + (list + (lint-checker-description-data->lint-checker-description-ids + conn + descriptions-by-locale)))) (define (lint-warning-count-by-lint-checker-for-revision conn commit-hash) (define query @@ -125,7 +100,7 @@ ORDER BY count DESC") "(~A, ~A)" lint-checker-id guix-revision-id)) - lint-checker-ids) + (vector->list lint-checker-ids)) ", ")))) (define (lint-checkers-for-revision conn commit-hash) diff --git a/guix-data-service/model/lint-warning-message.scm b/guix-data-service/model/lint-warning-message.scm index 7dd0e28..c44ba8a 100644 --- a/guix-data-service/model/lint-warning-message.scm +++ b/guix-data-service/model/lint-warning-message.scm @@ -16,6 +16,7 @@ ;;; . (define-module (guix-data-service model lint-warning-message) + #:use-module (srfi srfi-43) #:use-module (ice-9 match) #:use-module (squee) #:use-module (guix-data-service database) @@ -30,48 +31,24 @@ conn "lint_warning_messages" '(locale message) - (map (match-lambda - ((locale . message) - (list locale message))) - messages-by-locale))) + (let ((v (list->vector messages-by-locale))) + (vector-map! (lambda (_ data) + (match data + ((locale . message) + (list locale message)))) + v) + v))) (define (lint-warning-message-data->lint-warning-message-set-id conn messages-by-locale) - - (let* ((lint-warning-message-ids - (lint-warning-message-data->lint-warning-message-ids - conn messages-by-locale)) - (select-query - (string-append - "SELECT id FROM lint_warning_message_sets " - "WHERE message_ids = ARRAY[" - (string-join (map number->string - (sort lint-warning-message-ids <)) ", ") - "]"))) - - (string->number - (match (exec-query conn select-query) - (((id)) id) - (() - (match (exec-query - conn - (string-append - " -INSERT INTO lint_warning_message_sets (message_ids) VALUES " - (string-append - "('{" - (string-join - (map number->string - (sort lint-warning-message-ids <)) - ", ") - "}')") " -ON CONFLICT DO NOTHING RETURNING id")) - (((id)) id) - (() - (match (exec-query conn select-query) - (((id)) id))))))))) - + (insert-and-return-id + conn + "lint_warning_message_sets" + '(message_ids) + (list (lint-warning-message-data->lint-warning-message-ids + conn + messages-by-locale)))) (define (lint-warning-message-locales-for-revision conn commit-hash) (exec-query diff --git a/guix-data-service/model/lint-warning.scm b/guix-data-service/model/lint-warning.scm index eff332f..69241f3 100644 --- a/guix-data-service/model/lint-warning.scm +++ b/guix-data-service/model/lint-warning.scm @@ -40,22 +40,22 @@ (define (insert-guix-revision-lint-warnings conn guix-revision-id lint-warning-ids) - (if (null? lint-warning-ids) - '() - (exec-query - conn - (string-append - "INSERT INTO guix_revision_lint_warnings (lint_warning_id, guix_revision_id) " - "VALUES " - (string-join - (map (lambda (lint-warning-id) - (simple-format - #f - "(~A, ~A)" - lint-warning-id - guix-revision-id)) - lint-warning-ids) - ", "))))) + (unless (= 0 (vector-length lint-warning-ids)) + (exec-query + conn + (string-append + "INSERT INTO guix_revision_lint_warnings (lint_warning_id, guix_revision_id) " + "VALUES " + (string-join + (map (lambda (lint-warning-id) + (simple-format + #f + "(~A, ~A)" + lint-warning-id + guix-revision-id)) + (vector->list lint-warning-ids)) + ", ") + " ON CONFLICT DO NOTHING")))) (define* (lint-warnings-for-guix-revision conn commit-hash #:key diff --git a/guix-data-service/model/location.scm b/guix-data-service/model/location.scm index 630251c..123354f 100644 --- a/guix-data-service/model/location.scm +++ b/guix-data-service/model/location.scm @@ -23,11 +23,10 @@ #:export (location->location-id)) (define (location->location-id conn location) - (car - (insert-missing-data-and-return-all-ids - conn - "locations" - '(file line column_number) - (match location - (($ file line column) - (list (list file line column))))))) + (insert-and-return-id + conn + "locations" + '(file line column_number) + (match location + (($ file line column) + (list file line column))))) diff --git a/guix-data-service/model/nar.scm b/guix-data-service/model/nar.scm index 0e3b3e6..d275421 100644 --- a/guix-data-service/model/nar.scm +++ b/guix-data-service/model/nar.scm @@ -57,11 +57,12 @@ narinfos)) (let ((nar-ids - (insert-missing-data-and-return-all-ids - conn - "nars" - '(store_path hash_algorithm hash size system deriver) - data))) + (vector->list + (insert-missing-data-and-return-all-ids + conn + "nars" + '(store_path hash_algorithm hash size system deriver) + (list->vector data))))) (let ((reference-data (concatenate @@ -176,49 +177,45 @@ VALUES ($1, $2)") #\;) ((version host-name signature-data) - (first - (insert-missing-data-and-return-all-ids - conn - "narinfo_signature_data" - '(version host_name data_hash data_hash_algorithm - data_json sig_val_json narinfo_signature_public_key_id - narinfo_body narinfo_signature_line) - (list - (append (list (string->number version) - host-name) - (let* ((data-sexp - (find (match-lambda - ((component data ...) - (if (eq? component 'data) - data - #f)) - (_ #f)) - signature-sexp)) - (hash-sexp - (third data-sexp)) - (hash-algorithm - (second hash-sexp)) - (hash - (third hash-sexp))) - (list - (bytevector->base16-string hash) - hash-algorithm - (cons "jsonb" - (sexp->json-string data-sexp)))) - (let ((sig-val-sexp - (find (match-lambda - ((component data ...) - (if (eq? component 'sig-val) - data - #f)) - (_ #f)) - signature-sexp))) - (list - (cons "jsonb" - (sexp->json-string sig-val-sexp)))) - (list public-key-id - body - signature-line)))))))))))) + (insert-and-return-id + conn + "narinfo_signature_data" + '(version host_name data_hash data_hash_algorithm + data_json sig_val_json narinfo_signature_public_key_id + narinfo_body narinfo_signature_line) + (append (list (string->number version) + host-name) + (let* ((data-sexp + (find (match-lambda + ((component data ...) + (if (eq? component 'data) + data + #f)) + (_ #f)) + signature-sexp)) + (hash-sexp + (third data-sexp)) + (hash-algorithm + (second hash-sexp)) + (hash + (third hash-sexp))) + (list + (bytevector->base16-string hash) + hash-algorithm + (sexp->json-string data-sexp))) + (let ((sig-val-sexp + (find (match-lambda + ((component data ...) + (if (eq? component 'sig-val) + data + #f)) + (_ #f)) + signature-sexp))) + (list + (sexp->json-string sig-val-sexp))) + (list public-key-id + body + signature-line)))))))))) (define (narinfo-signature->public-key-id conn signature) (let* ((public-key-sexp @@ -232,13 +229,11 @@ VALUES ($1, $2)") (public-key-json-string (sexp->json-string public-key-sexp))) - (first - (insert-missing-data-and-return-all-ids - conn - "narinfo_signature_public_keys" - '(sexp_json) - (list (list (cons "jsonb" - public-key-json-string))))))) + (insert-and-return-id + conn + "narinfo_signature_public_keys" + '(sexp_json) + (list public-key-json-string)))) (define (select-package-output-availability-for-revision conn revision-commit) (define query diff --git a/guix-data-service/model/package-derivation-by-guix-revision-range.scm b/guix-data-service/model/package-derivation-by-guix-revision-range.scm index 9ce527e..cc9f864 100644 --- a/guix-data-service/model/package-derivation-by-guix-revision-range.scm +++ b/guix-data-service/model/package-derivation-by-guix-revision-range.scm @@ -48,7 +48,7 @@ WHERE git_branch_id = $1 AND WHERE revision_id = $2 )" (list (number->string git-branch-id) - guix-revision-id))))) + (number->string guix-revision-id)))))) (define (insert-guix-revision-package-derivation-entries conn git-repository-id @@ -185,7 +185,7 @@ PARTITION OF package_derivations_by_guix_revision_range FOR VALUES IN (" query (list git-repository-id (number->string git-branch-id) - guix-revision-id)))) + (number->string guix-revision-id))))) (define (update-package-derivations-table conn git-repository-id diff --git a/guix-data-service/model/package-derivation.scm b/guix-data-service/model/package-derivation.scm index 2008409..fc7dca7 100644 --- a/guix-data-service/model/package-derivation.scm +++ b/guix-data-service/model/package-derivation.scm @@ -49,12 +49,13 @@ derivation-ids)) (if (null? data-4-tuples) - '() + #() (insert-missing-data-and-return-all-ids conn "package_derivations" '(package_id derivation_id system_id target) - data-4-tuples))) + (list->vector + data-4-tuples)))) (define (count-packages-derivations-in-revision conn commit-hash) (define query diff --git a/guix-data-service/model/package-metadata.scm b/guix-data-service/model/package-metadata.scm index 8f5643c..496b920 100644 --- a/guix-data-service/model/package-metadata.scm +++ b/guix-data-service/model/package-metadata.scm @@ -205,27 +205,6 @@ WHERE packages.id IN ( (json-string->scm license-json))))) (exec-query conn query (list revision-commit-hash name version locale)))) -(define (insert-package-metadata metadata-rows) - (string-append "INSERT INTO package_metadata " - "(synopsis, description, home_page, location_id, license_set_id) " - "VALUES " - (string-join - (map (match-lambda - ((synopsis description home_page - location-id license-set-id) - (string-append - "(" - (value->quoted-string-or-null synopsis) "," - (value->quoted-string-or-null description) "," - (value->quoted-string-or-null home_page) "," - location-id "," - license-set-id - ")"))) - metadata-rows) - ",") - " RETURNING id" - ";")) - (define (inferior-packages->translated-package-descriptions-and-synopsis inferior inferior-package) @@ -293,73 +272,12 @@ WHERE packages.id IN ( (prevent-inlining-for-tests inferior-packages->translated-package-descriptions-and-synopsis) -(define (insert-package-synopsis-set conn package-synopsis-ids) - (let ((query - (string-append - "INSERT INTO package_synopsis_sets (synopsis_ids) VALUES " - (string-append - "('{" - (string-join - (map number->string - (sort package-synopsis-ids <)) - ", ") - "}')") - " RETURNING id"))) - (match (exec-query conn query) - (((id)) id)))) - -(define (package-synopsis-data->package-synopsis-set-id - conn package-synopsis-ids) - (let ((package-synopsis-set-id - (exec-query - conn - (string-append - "SELECT id FROM package_synopsis_sets" - " WHERE synopsis_ids = ARRAY[" - (string-join (map number->string - (sort package-synopsis-ids <)) ", ") - "]")))) - (string->number - (match package-synopsis-set-id - (((id)) id) - (() - (insert-package-synopsis-set conn package-synopsis-ids)))))) - -(define (insert-package-description-set conn package-description-ids) - (let ((query - (string-append - "INSERT INTO package_description_sets (description_ids) VALUES " - (string-append - "('{" - (string-join - (map number->string - (sort package-description-ids <)) - ", ") - "}')") - " RETURNING id"))) - (match (exec-query conn query) - (((id)) id)))) - -(define (package-description-data->package-description-set-id - conn package-description-ids) - (let* ((package-description-set-id - (exec-query - conn - (string-append - "SELECT id FROM package_description_sets" - " WHERE description_ids = ARRAY[" - (string-join (map number->string - (sort package-description-ids <)) ", ") - "]")))) - (string->number - (match package-description-set-id - (((id)) id) - (() - (insert-package-description-set conn package-description-ids)))))) - (define (inferior-packages->package-metadata-ids conn package-metadata license-set-ids) + (define package-metadata-list + (vector->list package-metadata)) + (insert-missing-data-and-return-all-ids conn "package_metadata" @@ -369,68 +287,63 @@ WHERE packages.id IN ( package_description_set_id package_synopsis_set_id) - (zip - (map (match-lambda - ((home-page rest ...) - (if (string? home-page) - home-page - NULL))) - package-metadata) - (with-time-logging "preparing location ids" - (map (match-lambda - ((_ location rest ...) - (if location - (location->location-id - conn - location) - NULL))) - package-metadata)) - license-set-ids - (with-time-logging "preparing package description set ids" - (map (lambda (package-description-ids) - (package-description-data->package-description-set-id - conn - package-description-ids)) - (with-time-logging "preparing package description ids" - (insert-missing-data-and-return-all-ids - conn - "package_descriptions" - '(locale description) + (list->vector + (zip + (map (match-lambda + ((home-page rest ...) + (if (string? home-page) + home-page + NULL))) + package-metadata-list) + (with-time-logging "preparing location ids" + (map (match-lambda + ((_ location rest ...) + (if location + (location->location-id + conn + location) + NULL))) + package-metadata-list)) + (vector->list license-set-ids) + (with-time-logging "preparing package description set ids" + (map (lambda (package-description-ids) + (insert-and-return-id + conn + "package_description_sets" + '(description_ids) + (list (sort package-description-ids <)))) + (with-time-logging "preparing package description ids" (map (match-lambda ((_ _ package-description-data _) - (map (match-lambda - ((locale . description) - (list locale description))) - package-description-data))) - package-metadata) - #:delete-duplicates? #t - #:sets-of-data? #t)))) - (with-time-logging "preparing package synopsis set ids" - (map (lambda (package-synopsis-ids) - (package-synopsis-data->package-synopsis-set-id - conn - package-synopsis-ids)) - (insert-missing-data-and-return-all-ids - conn - "package_synopsis" - '(locale synopsis) + (insert-missing-data-and-return-all-ids + conn + "package_descriptions" + '(locale description) + (list->vector + (map (match-lambda + ((locale . description) + (list locale description))) + package-description-data))))) + package-metadata-list)))) + (with-time-logging "preparing package synopsis set ids" + (map (lambda (package-synopsis-ids) + (insert-and-return-id + conn + "package_synopsis_sets" + '(synopsis_ids) + (list (sort package-synopsis-ids <)))) (map (match-lambda ((_ _ _ package-synopsis-data) - (map (match-lambda - ((locale . synopsis) - (list locale synopsis))) - package-synopsis-data))) - package-metadata) - #:delete-duplicates? #t - #:sets-of-data? #t)))) - ;; There can be duplicated entires in package-metadata, for example where - ;; you have one package definition which interits from another, and just - ;; overrides the version and the source, the package_metadata entries for - ;; both definitions will be the same. - #:delete-duplicates? #t - ;; There is so much package metadata that it's worth creating a temporary - ;; table - #:use-temporary-table? #t)) + (insert-missing-data-and-return-all-ids + conn + "package_synopsis" + '(locale synopsis) + (list->vector + (map (match-lambda + ((locale . synopsis) + (list locale synopsis))) + package-synopsis-data))))) + package-metadata-list))))))) (define (package-description-and-synopsis-locale-options-guix-revision conn revision-id) @@ -551,8 +464,8 @@ INNER JOIN ( OR translated_package_descriptions.locale = 'en_US.UTF-8') WHERE package_metadata.id IN (" (string-join - (map number->string - package-metadata-ids) ", ") ")" + (map number->string (vector->list package-metadata-ids)) + ", ") ")" " ORDER BY package_metadata.id, locale, CASE WHEN translated_package_synopsis.locale = diff --git a/guix-data-service/model/package.scm b/guix-data-service/model/package.scm index 8d62ef3..395cbd4 100644 --- a/guix-data-service/model/package.scm +++ b/guix-data-service/model/package.scm @@ -250,23 +250,6 @@ WHERE packages.id IN ( (exec-query conn query (list commit-hash))) -(define (insert-into-package-entries package-entries) - (string-append - " -INSERT INTO packages (name, version, package_metadata_id) VALUES " - (string-join - (map - (match-lambda - ((name version package_metadata_id) - (simple-format #f "('~A', '~A', ~A)" - name - version - package_metadata_id))) - package-entries) - ",") - " -RETURNING id")) - (define (inferior-packages->package-ids conn package-entries) (insert-missing-data-and-return-all-ids conn diff --git a/guix-data-service/model/system.scm b/guix-data-service/model/system.scm index 9939a73..d6efa46 100644 --- a/guix-data-service/model/system.scm +++ b/guix-data-service/model/system.scm @@ -34,17 +34,16 @@ (let ((cached-value (hash-ref system->system-id-cache system))) (or cached-value - (match (insert-missing-data-and-return-all-ids - conn - "systems" - '(system) - `((,system))) - ((id) - (hash-set! system->system-id-cache - system - id) - (set! systems-cache #f) - id))))) + (let ((id (insert-and-return-id + conn + "systems" + '(system) + (list system)))) + (hash-set! system->system-id-cache + system + id) + (set! systems-cache #f) + id)))) (define (lookup-system-id conn system) (let ((cached-value (hash-ref system->system-id-cache diff --git a/guix-data-service/model/utils.scm b/guix-data-service/model/utils.scm index 2f048fb..0c2cb72 100644 --- a/guix-data-service/model/utils.scm +++ b/guix-data-service/model/utils.scm @@ -17,6 +17,7 @@ (define-module (guix-data-service model utils) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-43) #:use-module (ice-9 match) #:use-module (ice-9 vlist) #:use-module (ice-9 receive) @@ -26,14 +27,16 @@ #:export (quote-string value->quoted-string-or-null non-empty-string-or-false - exec-query->vhash - two-lists->vhash parse-postgresql-array-string deduplicate-strings group-list-by-first-n-fields group-to-alist group-to-alist/vector - insert-missing-data-and-return-all-ids)) + insert-missing-data-and-return-all-ids + insert-missing-data + bulk-select + insert-and-return-id + prepare-insert-and-return-id)) (define (quote-string s) (string-append "$STR$" s "$STR$")) @@ -50,22 +53,6 @@ s) #f)) -(define* (exec-query->vhash conn query field-function value-function - #:key (vhash vlist-null)) - (fold (lambda (row result) - (vhash-cons (field-function row) - (value-function row) - result)) - vhash - (exec-query-with-null-handling conn query))) - -(define (two-lists->vhash l1 l2) - (fold (lambda (key value result) - (vhash-cons key value result)) - vlist-null - l1 - l2)) - (define (parse-postgresql-array-string s) (if (string=? s "{}") '() @@ -170,300 +157,368 @@ WHERE table_name = $1" schema-details) (error "error: field-can-be-null?")))))))) +(define value->sql + (match-lambda + ((? string? s) + (string-append "$STR$" s "$STR$")) + ((? NULL?) + "NULL") + ((? symbol? s) + (value->sql (symbol->string s))) + ((? number? n) + (number->string n)) + ((? boolean? b) + (if b "TRUE" "FALSE")) + ((? vector? v) + (string-append + "ARRAY[" (string-join (map value->sql (vector->list v)) ",") "]")) + ((cast . value) + (string-append + (value->sql value) "::" cast)) + (v + (error + (simple-format #f "error: unknown type for value: ~A" v))))) + +(define value->sql-literal + (match-lambda + ((? string? s) s) + ((? NULL?) + "NULL") + ((? symbol? s) (symbol->string s)) + ((? number? n) + (number->string n)) + ((? boolean? b) + (if b "TRUE" "FALSE")) + ((? vector? v) + (string-append + "{" (string-join (map value->sql-literal (vector->list v)) ",") "}")) + ((cast . value) + (string-append + (value->sql-literal value) "::" cast)) + (v + (error + (simple-format #f "error: unknown type for value: ~A" v))))) + +(define* (bulk-select conn + table-name + fields + data + #:key (id-proc string->number)) + (define field-strings + (map symbol->string fields)) + + (define query + (string-append + " +SELECT vals.bulk_select_index, id +FROM " table-name " +JOIN (VALUES " +(string-join + (if (vector? data) + (vector-fold + (lambda (index result field-values) + (cons + (string-append + "(" + (number->string index) ", " + (string-join (map value->sql field-values) ",") + ")") + result)) + '() + data) + (map + (lambda (index field-values) + (string-append + "(" + (number->string index) ", " + (string-join (map value->sql field-values) ",") + ")")) + (iota (length data)) + data)) + ", ") +")\n AS vals (bulk_select_index, " (string-join field-strings ", ") ") " +"ON " +(string-join + (map (lambda (field) + (string-concatenate + `("(" + ,table-name "." ,field " = vals." ,field + ,@(if (field-can-be-null? conn table-name field) + `(" OR (" ,table-name "." ,field " IS NULL AND" + " vals." ,field " IS NULL" + ")") + '()) + ")"))) + field-strings) + " AND\n "))) + + (let ((result (make-vector (if (vector? data) + (vector-length data) + (length data)) + #f))) + (for-each + (match-lambda + ((index id) + (vector-set! result (string->number index) + (id-proc id)))) + (exec-query conn query)) + + result)) + +(define* (bulk-insert + conn + table-name + fields + data + #:key (id-proc string->number)) + (define field-strings + (map symbol->string fields)) + + (define query + (string-append + " +INSERT INTO " table-name " (\n" +(string-join field-strings ",\n") +") VALUES " +(string-join + (map (lambda (field-values) + (string-append + "(" + (string-join + (map (lambda (value) + (value->sql value)) + field-values) + ", ") + ")")) + data) + ", ") +" ON CONFLICT DO NOTHING +RETURNING id")) + + (if (null? data) + #() + (let* ((query-result (exec-query conn query)) + (expected-ids (length data)) + (returned-ids (length query-result))) + (if (= expected-ids returned-ids) + (let ((result + (make-vector returned-ids))) + (fold + (lambda (row index) + (match row + ((id) + (vector-set! result index + (id-proc id)))) + (1+ index)) + 0 + query-result) + result) + ;; Can't match up the ids to the data, so just query for them + (bulk-select conn + table-name + fields + data + #:id-proc id-proc))))) + +(define* (insert-missing-data + conn + table-name + fields + data) + (define field-strings + (map symbol->string fields)) + + (let* ((result + (bulk-select + conn + table-name + fields + data)) + (missing-data-indexes + (vector-fold + (lambda (i missing-data-indexes id-or-f) + (if id-or-f + missing-data-indexes + (cons i missing-data-indexes))) + '() + result))) + + (bulk-insert + conn + table-name + fields + (map (lambda (index) + (vector-ref data index)) + missing-data-indexes)) + + *unspecified*)) + (define* (insert-missing-data-and-return-all-ids conn table-name fields data - #:key - sets-of-data? - delete-duplicates? - use-temporary-table?) + #:key (id-proc string->number)) (define field-strings (map symbol->string fields)) - (define value->sql - (match-lambda - ((? string? s) - (string-append "$STR$" s "$STR$")) - ((? NULL?) - "NULL") - ((? symbol? s) - (string-append "$STR$" - (symbol->string s) - "$STR$")) - ((? number? n) - (number->string n)) - ((? boolean? b) - (if b "TRUE" "FALSE")) - ((cast . value) - (string-append - (value->sql value) "::" cast)) - (v - (error - (simple-format #f "error: unknown type for value: ~A" v))))) - - (define (delete-duplicates* data) - (delete-duplicates/sort! - (list-copy data) - (lambda (full-a full-b) - (let loop ((a full-a) - (b full-b)) - (if (null? a) - #f - (let ((a-val (match (car a) - ((_ . val) val) - ((? symbol? val) (symbol->string val)) - (val val))) - (b-val (match (car b) - ((_ . val) val) - ((? symbol? val) (symbol->string val)) - (val val)))) - (cond - ((NULL? a-val) - (if (NULL? b-val) - (loop (cdr a) (cdr b)) - #t)) - ((NULL? b-val) - #f) - (else - (match a-val - ((? string? v) - (if (string=? a-val b-val) - (loop (cdr a) (cdr b)) - (stringnumber)) + (define field-strings + (map symbol->string fields)) + + (define select (string-append - "SELECT id,\n" - (string-join (map (lambda (field) - (string-append table-name "." field)) - field-strings) - ",\n") - " FROM " table-name - " JOIN (VALUES " - (string-join - (map - (lambda (field-values) - (string-append - "(" - (string-join (map value->sql field-values) ",") - ")")) - data) - ", ") - ")\n AS vals (" (string-join field-strings ", ") ") " - "ON " - (string-join - (map (lambda (field) - (string-concatenate - `("(" - ,table-name "." ,field " = vals." ,field - ,@(if (field-can-be-null? conn table-name field) - `(" OR (" ,table-name "." ,field " IS NULL AND" - " vals." ,field " IS NULL" - ")") - '()) - ")"))) - field-strings) - " AND\n "))) - - (define (temp-table-select-query temp-table-name) + " +SELECT id FROM " table-name " +WHERE " +(string-join + (map (lambda (i field) + (string-append + "(" field " = $" i + (if (field-can-be-null? conn table-name field) + (string-append + " OR (" field " IS NULL AND $" i " IS NULL)") + "") + ")")) + (map number->string + (iota (length fields) 1)) + field-strings) + " AND\n ") +";")) + + (define insert (string-append - "SELECT " table-name ".id, " - (string-join (map (lambda (field) - (string-append table-name "." field)) - field-strings) - ", ") - " FROM " table-name - " INNER JOIN " temp-table-name - " ON " - (string-join - (map (lambda (field) - (string-concatenate - `("(" - ,table-name "." ,field " = " ,temp-table-name "." ,field - ,@(if (field-can-be-null? conn table-name field) - `(" OR (" - ,table-name "." ,field " IS NULL" - " AND " - ,temp-table-name "." ,field " IS NULL" - ")") - '()) - ")"))) - field-strings) - " AND "))) - - (define* (insert-sql missing-data - #:key - (table-name table-name)) + " +INSERT INTO " table-name " (\n" (string-join field-strings ",\n") ") +VALUES (" (string-join + (map (lambda (i) + (simple-format #f "$~A" i)) + (iota (length fields) 1)) + ", ") ") +ON CONFLICT DO NOTHING +RETURNING id;")) + + (let ((sql-field-values + (map value->sql-literal field-vals))) + (id-proc + (match (exec-query + conn + select + sql-field-values) + (((id)) id) + (() + (match (exec-query + conn + insert + sql-field-values) + (((id)) id) + (() + (match (exec-query + conn + select + sql-field-values) + (((id)) id))))))))) + +(define (prepare-insert-and-return-id conn + table-name + fields + types) + (define field-strings + (map symbol->string fields)) + + (define prepared-insert-select (string-append - "INSERT INTO " table-name " (\n" - (string-join field-strings ",\n") - ") VALUES " - (string-join - (map (lambda (field-values) + " +PREPARE " table-name "PreparedInsertSelect + (" (string-join (map symbol->string types) ",") ") AS +SELECT id FROM " table-name " +WHERE " +(string-join + (map (lambda (i field) + (string-append + "(" field " = $" i + (if (field-can-be-null? conn table-name field) (string-append - "(" - (string-join - (map (lambda (value) - (value->sql value)) - field-values) - ", ") - ")")) - missing-data) - ", ") - " ON CONFLICT DO NOTHING")) - - (define (format-json json) - ;; PostgreSQL formats JSON strings differently to guile-json, so use - ;; PostgreSQL to do the formatting - (caar - (exec-query - conn - (string-append - "SELECT $STR$" json "$STR$::jsonb")))) - - (define (normalise-values data) - (map (match-lambda - ((? boolean? b) - (if b "t" "f")) - ((? number? n) - (number->string n)) - ((? NULL? n) n) - ((? symbol? s) - (symbol->string s)) - ((? string? s) - s) - ((cast . value) - (if (string=? cast "jsonb") - (format-json value) - value)) - (unknown - (error (simple-format #f "normalise-values: error: ~A\n" unknown)))) - data)) - - (let* ((flattened-deduplicated-data - (if sets-of-data? - (delete-duplicates* - (concatenate data)) - (if delete-duplicates? - (delete-duplicates* data) - data))) - (existing-entries - (if use-temporary-table? - (let ((temp-table-name - (string-append "temp_" table-name))) - ;; Create a temporary table to store the data - (exec-query - conn - (string-append "CREATE TEMPORARY TABLE " - temp-table-name - " (LIKE " - table-name - " INCLUDING ALL)")) - (exec-query - conn - (string-append - "ANALYZE " temp-table-name)) - - ;; Populate the temporary table - (unless (null? flattened-deduplicated-data) - (with-time-logging (string-append "populating " temp-table-name) - (exec-query conn - (insert-sql flattened-deduplicated-data - #:table-name temp-table-name)))) - ;; Use the temporary table to find the existing values - (let ((result - (with-time-logging - (string-append "querying the " temp-table-name) - (exec-query->vhash - conn - (temp-table-select-query temp-table-name) - cdr - (lambda (result) - (string->number (first result))))))) - - (exec-query conn (string-append "DROP TABLE " temp-table-name)) - result)) - - ;; If not using a temporary table, just do a single SELECT query - (if (null? flattened-deduplicated-data) - '() - (fold - (lambda (data-chunk result) - (exec-query->vhash conn - (select-query data-chunk) - cdr - (lambda (result) - (string->number (first result))) - #:vhash result)) - vlist-null - (chunk flattened-deduplicated-data - 3000))))) - (missing-entries - (let loop ((lst flattened-deduplicated-data) - (result '())) - (if (null? lst) - (if delete-duplicates? - (delete-duplicates* result) - result) - (let ((field-values (car lst))) - (if (vhash-assoc - ;; Normalise at this point, so that the proper value - ;; to insert is carried forward - (normalise-values field-values) - existing-entries) - (loop (cdr lst) - result) - (loop (cdr lst) - (cons field-values result))))))) - (new-entries - (if (null? missing-entries) - '() - (append-map! - (lambda (missing-entries-chunk) - (exec-query conn - (insert-sql missing-entries-chunk)) - - (map (lambda (row) - (string->number (first row))) - (exec-query conn (select-query missing-entries-chunk)))) - (chunk missing-entries 3000)))) - - (new-entries-lookup-vhash - (two-lists->vhash missing-entries - new-entries)) - (all-ids - (if sets-of-data? - (map (lambda (field-value-lists) - ;; Normalise the result at this point, ensuring that the id's - ;; in the set are sorted - (sort - (map (lambda (field-values) - (cdr - (or (vhash-assoc (normalise-values field-values) - existing-entries) - (vhash-assoc field-values - new-entries-lookup-vhash) - (error "missing entry" field-values)))) - field-value-lists) - <)) - data) - (map (lambda (field-values) - (cdr - (or (vhash-assoc (normalise-values field-values) - existing-entries) - (vhash-assoc field-values - new-entries-lookup-vhash) - (error "missing entry" field-values)))) - data)))) - (values all-ids - (delete-duplicates/sort! new-entries <)))) + " OR (" field " IS NULL AND $" i " IS NULL)") + "") + ")")) + (map number->string + (iota (length fields) 1)) + field-strings) + " AND\n ") +";")) + + (define prepared-insert + (string-append + " +PREPARE " table-name "PreparedInsert + (" (string-join (map symbol->string types) ",") ") AS +INSERT INTO " table-name " (\n" (string-join field-strings ",\n") ") +VALUES (" (string-join + (map (lambda (i) + (simple-format #f "$~A" i)) + (iota (length fields) 1)) + ", ") ") +ON CONFLICT DO NOTHING +RETURNING id;")) + + (exec-query conn prepared-insert) + (exec-query conn prepared-insert-select) + + (lambda (conn field-vals) + (match (exec-query + conn + (string-append + " +EXECUTE " table-name "PreparedInsert(" + (string-join (map value->sql field-vals) ", ") + ");")) + (((id)) id) + (() + (match (exec-query + conn + (string-append + " +EXECUTE " table-name "PreparedInsertSelect(" +(string-join (map value->sql field-vals) ", ") +");")) + (((id)) id)))))) diff --git a/tests/model-license-set.scm b/tests/model-license-set.scm index 1b377b7..38a86de 100644 --- a/tests/model-license-set.scm +++ b/tests/model-license-set.scm @@ -9,15 +9,15 @@ (test-begin "test-model-license-set") (define license-data - '((("License 1" - "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1")) - (("License 1" - "https://gnu.org/licenses/test-1.html" - #f) - ("License 2" - #f - #f)))) + '#((("License 1" + "https://gnu.org/licenses/test-1.html" + "https://example.com/why-license-1")) + (("License 1" + "https://gnu.org/licenses/test-1.html" + #f) + ("License 2" + #f + #f)))) (with-postgresql-connection "test-model-license-set" diff --git a/tests/model-license.scm b/tests/model-license.scm index 32b5623..e34b4f8 100644 --- a/tests/model-license.scm +++ b/tests/model-license.scm @@ -8,18 +8,18 @@ (test-begin "test-model-license") (define license-data - '((("License 1" - "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1")) - (("License 1" - "https://gnu.org/licenses/test-1.html" - #f) - ("License 2" - "https://gnu.org/licenses/test-2.html" - #f) - ("License 3" - #f - #f)))) + '#((("License 1" + "https://gnu.org/licenses/test-1.html" + "https://example.com/why-license-1")) + (("License 1" + "https://gnu.org/licenses/test-1.html" + #f) + ("License 2" + "https://gnu.org/licenses/test-2.html" + #f) + ("License 3" + #f + #f)))) (with-postgresql-connection "test-model-license" diff --git a/tests/model-lint-checker.scm b/tests/model-lint-checker.scm index e6740b1..73ac405 100644 --- a/tests/model-lint-checker.scm +++ b/tests/model-lint-checker.scm @@ -16,32 +16,23 @@ conn (lambda (conn) (define data - `((name-1 #t ,(string->number (insert-lint-checker-description-set - conn '(37)))) - (name-2 #f ,(string->number (insert-lint-checker-description-set - conn '(38)))))) + `#((name-1 + #t + ,(lint-checker-description-data->lint-checker-description-set-id + conn + '(("en_US" . "foo")))) + (name-2 + #f + ,(lint-checker-description-data->lint-checker-description-set-id + conn + '(("en_US" . "bar")))))) (match (lint-checkers->lint-checker-ids conn data) - (((? number? id1) (? number? id2)) - #t))) - #:always-rollback? #t)) - - (test-assert "double insert" - (with-postgresql-transaction - conn - (lambda (conn) - (define data - `((name-1 #t ,(string->number (insert-lint-checker-description-set - conn '(37)))) - (name-2 #f ,(string->number (insert-lint-checker-description-set - conn '(38)))))) - - (match (lint-checkers->lint-checker-ids conn data) - (((? number? id1) (? number? id2)) + (#((? number? id1) (? number? id2)) (match (lint-checkers->lint-checker-ids conn data) - (((? number? second-id1) (? number? second-id2)) - (and (eq? id1 second-id1) - (eq? id2 second-id2))))))) + (#((? number? second-id1) (? number? second-id2)) + (and (= id1 second-id1) + (= id2 second-id2))))))) #:always-rollback? #t)))) (test-end) diff --git a/tests/model-lint-warning-message.scm b/tests/model-lint-warning-message.scm index 7231a34..88cedd1 100644 --- a/tests/model-lint-warning-message.scm +++ b/tests/model-lint-warning-message.scm @@ -20,7 +20,7 @@ conn (lambda (conn) (match (lint-warning-message-data->lint-warning-message-ids conn data) - (((? number? id1) (? number? id2)) + (#((? number? id1) (? number? id2)) #t))) #:always-rollback? #t)) @@ -29,11 +29,11 @@ conn (lambda (conn) (match (lint-warning-message-data->lint-warning-message-ids conn data) - (((? number? id1) (? number? id2)) + (#((? number? id1) (? number? id2)) (match (lint-warning-message-data->lint-warning-message-ids conn data) - (((? number? second-id1) (? number? second-id2)) - (and (eq? id1 second-id1) - (eq? id2 second-id2))))))) + (#((? number? second-id1) (? number? second-id2)) + (and (= id1 second-id1) + (= id2 second-id2))))))) #:always-rollback? #t)) (test-assert "single set insert" @@ -53,7 +53,7 @@ ((? number? id) (match (lint-warning-message-data->lint-warning-message-set-id conn data) ((? number? second-id) - (eq? id second-id)))))) + (= id second-id)))))) #:always-rollback? #t)))) (test-end) diff --git a/tests/model-package-metadata.scm b/tests/model-package-metadata.scm index 407b7d2..c262f57 100644 --- a/tests/model-package-metadata.scm +++ b/tests/model-package-metadata.scm @@ -34,21 +34,22 @@ mock-inferior-package-foo-2)) (define mock-package-metadata - (map (lambda (mock-inf-pkg) - (list - (mock-inferior-package-home-page mock-inf-pkg) - (mock-inferior-package-location mock-inf-pkg) - `(("en_US.UTF-8" . "Fake synopsis")) - `(("en_US.UTF-8" . "Fake description")))) - mock-inferior-packages)) + (list->vector + (map (lambda (mock-inf-pkg) + (list + (mock-inferior-package-home-page mock-inf-pkg) + (mock-inferior-package-location mock-inf-pkg) + `(("en_US.UTF-8" . "Fake synopsis")) + `(("en_US.UTF-8" . "Fake description")))) + mock-inferior-packages))) (define (test-license-set-ids conn) (let ((license-id-lists (inferior-packages->license-id-lists conn - '((("License 1" - "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1")))))) + '#((("License 1" + "https://gnu.org/licenses/test-1.html" + "https://example.com/why-license-1")))))) (inferior-packages->license-set-ids conn license-id-lists))) @@ -73,7 +74,7 @@ conn mock-package-metadata (test-license-set-ids conn)) - ((x) (number? x)))) + (#(x) (number? x)))) #:always-rollback? #t)) (with-postgresql-transaction diff --git a/tests/model-package.scm b/tests/model-package.scm index bf2cf71..70882bd 100644 --- a/tests/model-package.scm +++ b/tests/model-package.scm @@ -36,9 +36,9 @@ (let ((license-id-lists (inferior-packages->license-id-lists conn - '((("License 1" - "https://gnu.org/licenses/test-1.html" - "https://example.com/why-license-1")))))) + '#((("License 1" + "https://gnu.org/licenses/test-1.html" + "https://example.com/why-license-1")))))) (inferior-packages->license-set-ids conn license-id-lists))) @@ -47,13 +47,14 @@ mock-inferior-package-foo-2)) (define mock-package-metadata - (map (lambda (mock-inf-pkg) - (list - (mock-inferior-package-home-page mock-inf-pkg) - (mock-inferior-package-location mock-inf-pkg) - `(("en_US.UTF-8" . "Fake synopsis")) - `(("en_US.UTF-8" . "Fake description")))) - mock-inferior-packages)) + (list->vector + (map (lambda (mock-inf-pkg) + (list + (mock-inferior-package-home-page mock-inf-pkg) + (mock-inferior-package-location mock-inf-pkg) + `(("en_US.UTF-8" . "Fake synopsis")) + `(("en_US.UTF-8" . "Fake description")))) + mock-inferior-packages))) (with-mock-inferior-packages (lambda () @@ -81,11 +82,12 @@ (cons "integer" NULL)))) (match (inferior-packages->package-ids conn - (zip (map mock-inferior-package-name mock-inferior-packages) - (map mock-inferior-package-version mock-inferior-packages) - package-metadata-ids - package-replacement-package-ids)) - ((x) (number? x)))))) + (list->vector + (zip (map mock-inferior-package-name mock-inferior-packages) + (map mock-inferior-package-version mock-inferior-packages) + (vector->list package-metadata-ids) + package-replacement-package-ids))) + (#(x) (number? x)))))) #:always-rollback? #t) (with-postgresql-transaction @@ -102,16 +104,18 @@ (test-equal "inferior-packages->package-ids is idempotent" (inferior-packages->package-ids conn - (zip (map mock-inferior-package-name mock-inferior-packages) - (map mock-inferior-package-version mock-inferior-packages) - package-metadata-ids - package-replacement-package-ids)) + (list->vector + (zip (map mock-inferior-package-name mock-inferior-packages) + (map mock-inferior-package-version mock-inferior-packages) + (vector->list package-metadata-ids) + package-replacement-package-ids))) (inferior-packages->package-ids conn - (zip (map mock-inferior-package-name mock-inferior-packages) - (map mock-inferior-package-version mock-inferior-packages) - package-metadata-ids - package-replacement-package-ids))))) + (list->vector + (zip (map mock-inferior-package-name mock-inferior-packages) + (map mock-inferior-package-version mock-inferior-packages) + (vector->list package-metadata-ids) + package-replacement-package-ids)))))) #:always-rollback? #t))))) (test-end) -- cgit v1.2.3