diff options
author | Christopher Baines <mail@cbaines.net> | 2021-09-24 12:55:45 +0100 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2021-09-24 12:55:45 +0100 |
commit | 261552bd5e6d74d4166905f297a241e31c8e6b5b (patch) | |
tree | 79e8b9d205c56ee57014fd978615934091d5251d | |
parent | 8b34126d220c8375ac185955f540dc13fd38e446 (diff) | |
download | data-service-261552bd5e6d74d4166905f297a241e31c8e6b5b.tar data-service-261552bd5e6d74d4166905f297a241e31c8e6b5b.tar.gz |
Speed up inserting missing derivation sources
Split the recursive part of the query from the non-recursive part, since
PostgreSQL doesn't do a great job of estimating the number of rows which will
come back from the recursive part, and thus generates a bad plan.
-rw-r--r-- | guix-data-service/model/derivation.scm | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm index 3e882cb..9b0b951 100644 --- a/guix-data-service/model/derivation.scm +++ b/guix-data-service/model/derivation.scm @@ -31,6 +31,7 @@ #:use-module (guix inferior) #:use-module (guix memoization) #:use-module (guix derivations) + #:use-module (guix-data-service utils) #:use-module (guix-data-service database) #:use-module (guix-data-service model utils) #:use-module (guix-data-service model system) @@ -1599,15 +1600,17 @@ LIMIT $1" derivation-inputs derivations)))) - (simple-format - #t "debug: insert-missing-derivations: inserting inputs\n") - (for-each (lambda (derivation-id derivation) - (insert-derivation-inputs conn - derivation-id - (derivation-inputs derivation))) + (with-time-logging + (simple-format + #f "insert-missing-derivations: inserting inputs for ~A derivations" + (length derivations)) + (for-each (lambda (derivation-id derivation) + (insert-derivation-inputs conn + derivation-id + (derivation-inputs derivation))) - derivation-ids - derivations) + derivation-ids + derivations)) derivation-ids)) @@ -1738,19 +1741,20 @@ WHERE " criteria ";")) (define (derivation-file-names->derivation-ids conn derivation-file-names) (define (select-source-files-missing-nars derivation-ids) - (define (split ids) + (define (split ids max-length) (if (> (length ids) - 1000) + max-length) (call-with-values (lambda () - (split-at ids 1000)) + (split-at ids max-length)) (lambda (ids-lst rest) (cons ids-lst - (split rest)))) + (split rest max-length)))) (list ids))) - (define (query ids) - (string-append - " + (define (derivation-ids->all-related-derivation-ids ids) + (define query + (string-append + " WITH RECURSIVE all_derivations AS ( SELECT column1 AS derivation_id FROM (VALUES " @@ -1768,23 +1772,35 @@ WITH RECURSIVE all_derivations AS ( INNER JOIN derivation_outputs ON derivation_outputs.id = derivation_inputs.derivation_output_id ) +SELECT all_derivations.derivation_id +FROM all_derivations")) + + (map car (exec-query conn query))) + + (define (derivation-ids->missing-sources ids) + (define query + (string-append + " SELECT derivation_sources.derivation_source_file_id, derivation_source_files.store_path -FROM all_derivations -INNER JOIN derivation_sources - ON derivation_sources.derivation_id = all_derivations.derivation_id +FROM derivation_sources LEFT JOIN derivation_source_file_nars ON derivation_sources.derivation_source_file_id = derivation_source_file_nars.derivation_source_file_id INNER JOIN derivation_source_files ON derivation_sources.derivation_source_file_id = derivation_source_files.id -WHERE derivation_source_file_nars.derivation_source_file_id IS NULL")) + WHERE derivation_sources.derivation_id IN (" + (string-join ids ", ") + ") + AND derivation_source_file_nars.derivation_source_file_id IS NULL")) + + (exec-query conn query)) - (delete-duplicates - (append-map - (lambda (ids) - (exec-query conn (query ids))) - (split derivation-ids)))) + (let ((all-derivation-ids + (append-map + derivation-ids->all-related-derivation-ids + (split derivation-ids 250)))) + (derivation-ids->missing-sources all-derivation-ids))) (if (null? derivation-file-names) '() @@ -1827,12 +1843,13 @@ WHERE derivation_source_file_nars.derivation_source_file_id IS NULL")) (error "missing derivation id")))) derivation-file-names))) - (for-each (match-lambda - ((derivation-source-file-id store-path) - (insert-derivation-source-file-nar - conn - (string->number derivation-source-file-id) - store-path))) - (select-source-files-missing-nars all-ids)) + (with-time-logging "inserting missing source files" + (for-each (match-lambda + ((derivation-source-file-id store-path) + (insert-derivation-source-file-nar + conn + (string->number derivation-source-file-id) + store-path))) + (select-source-files-missing-nars all-ids))) all-ids)))) |