aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2021-09-24 12:55:45 +0100
committerChristopher Baines <mail@cbaines.net>2021-09-24 12:55:45 +0100
commit261552bd5e6d74d4166905f297a241e31c8e6b5b (patch)
tree79e8b9d205c56ee57014fd978615934091d5251d
parent8b34126d220c8375ac185955f540dc13fd38e446 (diff)
downloaddata-service-261552bd5e6d74d4166905f297a241e31c8e6b5b.tar
data-service-261552bd5e6d74d4166905f297a241e31c8e6b5b.tar.gz
Speed up inserting missing derivation sources
Split the recursive part of the query from the non-recursive part, since PostgreSQL doesn't do a great job of estimating the number of rows which will come back from the recursive part, and thus generates a bad plan.
-rw-r--r--guix-data-service/model/derivation.scm79
1 files changed, 48 insertions, 31 deletions
diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm
index 3e882cb..9b0b951 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -31,6 +31,7 @@
#:use-module (guix inferior)
#:use-module (guix memoization)
#:use-module (guix derivations)
+ #:use-module (guix-data-service utils)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model utils)
#:use-module (guix-data-service model system)
@@ -1599,15 +1600,17 @@ LIMIT $1"
derivation-inputs
derivations))))
- (simple-format
- #t "debug: insert-missing-derivations: inserting inputs\n")
- (for-each (lambda (derivation-id derivation)
- (insert-derivation-inputs conn
- derivation-id
- (derivation-inputs derivation)))
+ (with-time-logging
+ (simple-format
+ #f "insert-missing-derivations: inserting inputs for ~A derivations"
+ (length derivations))
+ (for-each (lambda (derivation-id derivation)
+ (insert-derivation-inputs conn
+ derivation-id
+ (derivation-inputs derivation)))
- derivation-ids
- derivations)
+ derivation-ids
+ derivations))
derivation-ids))
@@ -1738,19 +1741,20 @@ WHERE " criteria ";"))
(define (derivation-file-names->derivation-ids conn derivation-file-names)
(define (select-source-files-missing-nars derivation-ids)
- (define (split ids)
+ (define (split ids max-length)
(if (> (length ids)
- 1000)
+ max-length)
(call-with-values (lambda ()
- (split-at ids 1000))
+ (split-at ids max-length))
(lambda (ids-lst rest)
(cons ids-lst
- (split rest))))
+ (split rest max-length))))
(list ids)))
- (define (query ids)
- (string-append
- "
+ (define (derivation-ids->all-related-derivation-ids ids)
+ (define query
+ (string-append
+ "
WITH RECURSIVE all_derivations AS (
SELECT column1 AS derivation_id
FROM (VALUES "
@@ -1768,23 +1772,35 @@ WITH RECURSIVE all_derivations AS (
INNER JOIN derivation_outputs
ON derivation_outputs.id = derivation_inputs.derivation_output_id
)
+SELECT all_derivations.derivation_id
+FROM all_derivations"))
+
+ (map car (exec-query conn query)))
+
+ (define (derivation-ids->missing-sources ids)
+ (define query
+ (string-append
+ "
SELECT derivation_sources.derivation_source_file_id, derivation_source_files.store_path
-FROM all_derivations
-INNER JOIN derivation_sources
- ON derivation_sources.derivation_id = all_derivations.derivation_id
+FROM derivation_sources
LEFT JOIN derivation_source_file_nars
ON derivation_sources.derivation_source_file_id =
derivation_source_file_nars.derivation_source_file_id
INNER JOIN derivation_source_files
ON derivation_sources.derivation_source_file_id =
derivation_source_files.id
-WHERE derivation_source_file_nars.derivation_source_file_id IS NULL"))
+ WHERE derivation_sources.derivation_id IN ("
+ (string-join ids ", ")
+ ")
+ AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
+
+ (exec-query conn query))
- (delete-duplicates
- (append-map
- (lambda (ids)
- (exec-query conn (query ids)))
- (split derivation-ids))))
+ (let ((all-derivation-ids
+ (append-map
+ derivation-ids->all-related-derivation-ids
+ (split derivation-ids 250))))
+ (derivation-ids->missing-sources all-derivation-ids)))
(if (null? derivation-file-names)
'()
@@ -1827,12 +1843,13 @@ WHERE derivation_source_file_nars.derivation_source_file_id IS NULL"))
(error "missing derivation id"))))
derivation-file-names)))
- (for-each (match-lambda
- ((derivation-source-file-id store-path)
- (insert-derivation-source-file-nar
- conn
- (string->number derivation-source-file-id)
- store-path)))
- (select-source-files-missing-nars all-ids))
+ (with-time-logging "inserting missing source files"
+ (for-each (match-lambda
+ ((derivation-source-file-id store-path)
+ (insert-derivation-source-file-nar
+ conn
+ (string->number derivation-source-file-id)
+ store-path)))
+ (select-source-files-missing-nars all-ids)))
all-ids))))