diff options
author | Christopher Baines <mail@cbaines.net> | 2024-08-07 16:51:57 +0100 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2024-08-07 17:21:28 +0100 |
commit | 77962f7c2c4cb1f6b78e5bac48e9471dee009136 (patch) | |
tree | 0e4d77e76d9b580a30248f6c330aef7855457603 /guix-data-service | |
parent | 7f746b358b07c434fd8df1c5cf4dacf9d0e8698e (diff) | |
download | data-service-77962f7c2c4cb1f6b78e5bac48e9471dee009136.tar data-service-77962f7c2c4cb1f6b78e5bac48e9471dee009136.tar.gz |
Move inserting derivations in to the load-new-guix-revision module
And start to more closely integrate it. This makes it possible to start making
it faster by doing more in parallel.
Diffstat (limited to 'guix-data-service')
-rw-r--r-- | guix-data-service/jobs/load-new-guix-revision.scm | 596 | ||||
-rw-r--r-- | guix-data-service/model/channel-instance.scm | 34 | ||||
-rw-r--r-- | guix-data-service/model/derivation.scm | 280 | ||||
-rw-r--r-- | guix-data-service/model/system-test.scm | 24 |
4 files changed, 507 insertions, 427 deletions
diff --git a/guix-data-service/jobs/load-new-guix-revision.scm b/guix-data-service/jobs/load-new-guix-revision.scm index ebd067a..c9ec9e1 100644 --- a/guix-data-service/jobs/load-new-guix-revision.scm +++ b/guix-data-service/jobs/load-new-guix-revision.scm @@ -25,13 +25,18 @@ #:use-module (ice-9 textual-ports) #:use-module (ice-9 hash-table) #:use-module (ice-9 suspendable-ports) + #:use-module (ice-9 binary-ports) #:use-module ((ice-9 ports internal) #:select (port-poll)) + #:use-module (rnrs bytevectors) #:use-module (rnrs exceptions) + #:use-module (lzlib) #:use-module (json) #:use-module (squee) + #:use-module (gcrypt hash) #:use-module (fibers) #:use-module (fibers channels) #:use-module (guix monads) + #:use-module (guix base32) #:use-module (guix store) #:use-module (guix channels) #:use-module (guix inferior) @@ -41,6 +46,7 @@ #:use-module (guix progress) #:use-module (guix packages) #:use-module (guix derivations) + #:use-module (guix serialization) #:use-module (guix build utils) #:use-module ((guix build syscalls) #:select (set-thread-name)) @@ -49,6 +55,7 @@ #:use-module (guix-data-service utils) #:use-module (guix-data-service model utils) #:use-module (guix-data-service model build) + #:use-module (guix-data-service model system) #:use-module (guix-data-service model channel-instance) #:use-module (guix-data-service model channel-news) #:use-module (guix-data-service model package) @@ -477,10 +484,7 @@ (package-derivation store package system)))) ;; You don't always get what you ask for, so check (if (string=? system (derivation-system derivation)) - (let ((file-name - (derivation-file-name derivation))) - (add-temp-root store file-name) - file-name) + (derivation-file-name derivation) (begin (simple-format (current-error-port) @@ -907,6 +911,294 @@ lint-checker-ids lint-warnings-data))) +(define (update-derivation-ids-hash-table! conn + derivation-ids-hash-table + file-names) + (define file-names-count (vector-length file-names)) + + (simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n" + file-names-count) + (let ((missing-file-names + (vector-fold + (lambda (_ result file-name) + (if (and file-name + (hash-ref derivation-ids-hash-table + file-name)) + result + (cons file-name result))) + '() + file-names))) + + (simple-format + #t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n" + file-names-count (length missing-file-names)) + + (unless (null? missing-file-names) + (for-each + (lambda (chunk) + (for-each + (match-lambda + ((id file-name) + (hash-set! derivation-ids-hash-table + file-name + (string->number id)))) + (exec-query conn (select-existing-derivations chunk)))) + (chunk! missing-file-names 1000))))) + +(define (insert-missing-derivations postgresql-connection-pool + utility-thread-channel + derivation-ids-hash-table + derivations) + + (define (ensure-input-derivations-exist input-derivation-file-names) + (unless (null? input-derivation-file-names) + (simple-format + #t "debug: ensure-input-derivations-exist: processing ~A derivations\n" + (length input-derivation-file-names)) + + (with-resource-from-pool postgresql-connection-pool conn + (update-derivation-ids-hash-table! conn + derivation-ids-hash-table + (list->vector + input-derivation-file-names))) + (simple-format + #t + "debug: ensure-input-derivations-exist: checking for missing input derivations\n") + (let ((missing-derivations-filenames + (remove (lambda (derivation-file-name) + (hash-ref derivation-ids-hash-table + derivation-file-name)) + input-derivation-file-names))) + + (unless (null? missing-derivations-filenames) + (simple-format + #f + "debug: ensure-input-derivations-exist: inserting missing input derivations\n") + ;; Ensure all the input derivations exist + (insert-missing-derivations + postgresql-connection-pool + utility-thread-channel + derivation-ids-hash-table + (call-with-worker-thread + utility-thread-channel + (lambda () + (map read-derivation-from-file + missing-derivations-filenames)))))))) + + (define (insert-into-derivations conn drvs) + (string-append + "INSERT INTO derivations " + "(file_name, builder, args, env_vars, system_id) VALUES " + (string-join + (map (match-lambda + (($ <derivation> outputs inputs sources + system builder args env-vars file-name) + (simple-format + #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')" + file-name + builder + (string-join (map quote-string args) ",") + (string-join (map (match-lambda + ((key . value) + (string-append + "['" key '"', $$" + value "$$ ]"))) + env-vars) + ",") + (system->system-id conn system)))) + drvs) + ",") + " RETURNING id" + ";")) + + (with-time-logging + (simple-format + #f "insert-missing-derivations: inserting ~A derivations" + (length derivations)) + (let* ((chunks (chunk derivations 500)) + (derivation-ids + (with-resource-from-pool postgresql-connection-pool conn + (append-map! + (lambda (chunk) + (map (lambda (result) + (string->number (car result))) + (exec-query conn (insert-into-derivations conn chunk)))) + chunks)))) + + (with-time-logging + "insert-missing-derivations: updating hash table" + (for-each (lambda (derivation derivation-id) + (hash-set! derivation-ids-hash-table + (derivation-file-name derivation) + derivation-id)) + derivations + derivation-ids)) + + (with-time-logging + "insert-missing-derivations: inserting sources" + (for-each + (lambda (derivation-id derivation) + (let ((sources (derivation-sources derivation))) + (unless (null? sources) + (let ((sources-ids + (with-resource-from-pool postgresql-connection-pool conn + (insert-derivation-sources conn + derivation-id + sources)))) + (par-map& + (lambda (id source-file) + (match + (with-resource-from-pool postgresql-connection-pool conn + (exec-query + conn + " +SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" + (list (number->string id)))) + (() + (let ((nar-bytevector + (call-with-worker-thread + utility-thread-channel + (lambda () + (call-with-values + (lambda () + (open-bytevector-output-port)) + (lambda (port get-bytevector) + (write-file source-file port) + (get-bytevector))))))) + (letpar& + ((compressed-nar-bytevector + (call-with-worker-thread + utility-thread-channel + (lambda () + (call-with-values + (lambda () + (open-bytevector-output-port)) + (lambda (port get-bytevector) + (call-with-lzip-output-port port + (lambda (port) + (put-bytevector port nar-bytevector)) + #:level 9) + (get-bytevector)))))) + (hash + (call-with-worker-thread + utility-thread-channel + (lambda () + (bytevector->nix-base32-string + (sha256 nar-bytevector))))) + (uncompressed-size (bytevector-length nar-bytevector))) + + (with-resource-from-pool postgresql-connection-pool conn + (insert-derivation-source-file-nar + conn + id + hash + compressed-nar-bytevector + uncompressed-size))))) + (_ #f))) + sources-ids + sources))))) + derivation-ids + derivations)) + + (with-resource-from-pool postgresql-connection-pool conn + (with-time-logging + "insert-missing-derivations: inserting outputs" + (for-each (lambda (derivation-id derivation) + (insert-derivation-outputs conn + derivation-id + (derivation-outputs derivation))) + derivation-ids + derivations))) + + (with-time-logging + "insert-missing-derivations: ensure-input-derivations-exist" + (ensure-input-derivations-exist (deduplicate-strings + (map derivation-input-path + (append-map derivation-inputs + derivations))))) + + (with-resource-from-pool postgresql-connection-pool conn + (with-time-logging + (simple-format + #f "insert-missing-derivations: inserting inputs for ~A derivations" + (length derivations)) + (insert-derivation-inputs conn + derivation-ids + derivations)))))) + +(define (derivation-file-names->derivation-ids postgresql-connection-pool + utility-thread-channel + derivation-file-names) + (define derivations-count + (vector-length derivation-file-names)) + + (if (= 0 derivations-count) + #() + (let* ((derivation-ids-hash-table (make-hash-table + ;; Account for more derivations in + ;; the graph + (* 2 derivations-count)))) + (simple-format + #t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n" + derivations-count) + + (with-resource-from-pool postgresql-connection-pool conn + (update-derivation-ids-hash-table! conn + derivation-ids-hash-table + derivation-file-names)) + + (let* ((missing-derivation-filenames + (deduplicate-strings + (vector-fold + (lambda (_ result derivation-file-name) + (if (not derivation-file-name) + result + (if (hash-ref derivation-ids-hash-table + derivation-file-name) + result + (cons derivation-file-name result)))) + '() + derivation-file-names))) + (missing-derivations-chunked-promises + (map + (lambda (chunk) + (fibers-delay + (lambda () + (map read-derivation-from-file chunk)))) + (chunk! missing-derivation-filenames 1000)))) + + (for-each + (lambda (missing-derivation-filenames-chunk) + (let ((missing-derivations-chunk + ;; Do the filter again, since processing the last chunk + ;; might have inserted some of the derivations in this + ;; chunk + (remove! (lambda (derivation) + (hash-ref derivation-ids-hash-table + (derivation-file-name + derivation))) + (fibers-force + missing-derivation-filenames-chunk)))) + + (unless (null? missing-derivations-chunk) + (insert-missing-derivations postgresql-connection-pool + utility-thread-channel + derivation-ids-hash-table + missing-derivations-chunk)))) + missing-derivations-chunked-promises)) + + (let ((all-ids + (vector-map + (lambda (_ derivation-file-name) + (if derivation-file-name + (or (hash-ref derivation-ids-hash-table + derivation-file-name) + (error "missing derivation id")) + #f)) + derivation-file-names))) + + all-ids)))) + (define guix-store-path (let ((store-path #f)) (lambda (store) @@ -1162,7 +1454,7 @@ (cons inferior inferior-store))) parallelism #:min-size 0 - #:idle-seconds 10 + #:idle-seconds 30 #:destructor (match-lambda ((inferior . store) (close-inferior inferior) @@ -1399,6 +1691,7 @@ (define* (extract-information-from db-conn guix-revision-id commit guix-source store-item + utility-thread-channel #:key skip-system-tests? extra-inferior-environment-variables parallelism) @@ -1454,25 +1747,29 @@ 1 #:min-size 0)) - (define packages-data-promise - (fibers-delay - (lambda () - (with-resource-from-pool inf-and-store-pool res - (match res - ((inferior . inferior-store) - (with-time-logging "getting all inferior package data" - (let ((packages - pkg-to-replacement-hash-table - (inferior-packages-plus-replacements inferior))) - (all-inferior-packages-data - inferior - packages - pkg-to-replacement-hash-table))))))))) + (define derivation-file-names->derivation-ids/fiberized + (fiberize + (lambda (derivation-file-names) + (derivation-file-names->derivation-ids + postgresql-connection-pool + utility-thread-channel + derivation-file-names)))) (define package-ids-promise (fibers-delay (lambda () - (let ((packages-data (fibers-force packages-data-promise))) + (let ((packages-data + (with-resource-from-pool inf-and-store-pool res + (match res + ((inferior . inferior-store) + (with-time-logging "getting all inferior package data" + (let ((packages + pkg-to-replacement-hash-table + (inferior-packages-plus-replacements inferior))) + (all-inferior-packages-data + inferior + packages + pkg-to-replacement-hash-table)))))))) (with-resource-from-pool postgresql-connection-pool conn (insert-packages conn packages-data)))))) @@ -1534,94 +1831,99 @@ (define (extract-and-store-package-derivations) (define packages-count - (vector-length - (assq-ref (fibers-force packages-data-promise) - 'names))) + (with-resource-from-pool inf-and-store-pool res + (match res + ((inferior . inferior-store) + (ensure-gds-inferior-packages-defined! inferior) - (define chunk-size 3000) + (inferior-eval '(vector-length gds-inferior-packages) inferior))))) - (fibers-for-each - (match-lambda - ((system . target) - (let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal")))) - (when (> wal-bytes (* 512 (expt 2 20))) - (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n" - wal-bytes) + (define chunk-size 5000) - (sleep 30) - (loop (stat:size (stat "/var/guix/db/db.sqlite-wal"))))) + (define (process-system-and-target system target) + (let loop ((wal-bytes + (catch #t + (lambda () + (stat:size (stat "/var/guix/db/db.sqlite-wal"))) + (lambda _ 0)))) + (when (> wal-bytes (* 512 (expt 2 20))) + (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n" + wal-bytes) + + (sleep 30) + (loop (catch #t + (lambda () + (stat:size (stat "/var/guix/db/db.sqlite-wal"))) + (lambda _ 0))))) + (with-time-logging + (simple-format #f "processing derivations for ~A" (cons system target)) (let ((derivations-vector (make-vector packages-count))) (with-time-logging (simple-format #f "getting derivations for ~A" (cons system target)) (let loop ((start-index 0)) - (if (>= (+ start-index chunk-size) packages-count) - (let* ((remaining-count - (- packages-count start-index)) - (chunk - (with-resource-from-pool inf-and-store-pool res - (match res - ((inferior . inferior-store) - (ensure-gds-inferior-packages-defined! inferior) - - (inferior-package-derivations - inferior-store - inferior - system - target - start-index - remaining-count)))))) - (vector-copy! derivations-vector - start-index - chunk)) - (let ((chunk - (with-resource-from-pool inf-and-store-pool res - (match res - ((inferior . inferior-store) - (ensure-gds-inferior-packages-defined! inferior) - - (inferior-package-derivations - inferior-store - inferior - system - target - start-index - chunk-size)))))) - (vector-copy! derivations-vector - start-index - chunk) - (loop (+ start-index chunk-size)))))) - - (let ((package-ids (fibers-force package-ids-promise))) - (with-resource-from-pool postgresql-connection-pool conn - (let* ((derivation-ids + (let* ((count + (if (>= (+ start-index chunk-size) packages-count) + (- packages-count start-index) + chunk-size)) + (chunk + (with-resource-from-pool inf-and-store-pool res + (match res + ((inferior . inferior-store) + (ensure-gds-inferior-packages-defined! inferior) + + (inferior-package-derivations + inferior-store + inferior + system + target + start-index + count)))))) + (vector-copy! derivations-vector + start-index + chunk) + (unless (>= (+ start-index chunk-size) packages-count) + (loop (+ start-index chunk-size)))))) + + (let* ((derivation-ids + (with-time-logging + (simple-format #f "derivation-file-names->derivation-ids (~A ~A)" + system target) + (derivation-file-names->derivation-ids/fiberized + derivations-vector)))) + + (let* ((package-ids (fibers-force package-ids-promise)) + (package-derivation-ids + (with-resource-from-pool postgresql-connection-pool conn (with-time-logging - (simple-format #f "derivation-file-names->derivation-ids (~A ~A)" + (simple-format #f "insert-package-derivations (~A ~A)" system target) - (derivation-file-names->derivation-ids - conn - derivations-vector)))) - - (let ((package-derivation-ids - (with-time-logging - (simple-format #f "insert-package-derivations (~A ~A)" - system target) - (insert-package-derivations conn - system - (or target "") - package-ids - derivation-ids)))) - (chunk-for-each! (lambda (package-derivation-ids-chunk) - (insert-guix-revision-package-derivations - conn - guix-revision-id - package-derivation-ids-chunk)) - 2000 - package-derivation-ids)))))))) - (with-resource-from-pool inf-and-store-pool res - (match res - ((inferior . inferior-store) - (inferior-fetch-system-target-pairs inferior))))) + (insert-package-derivations conn + system + (or target "") + package-ids + derivation-ids))))) + (chunk-for-each! + (lambda (package-derivation-ids-chunk) + (with-resource-from-pool postgresql-connection-pool conn + (insert-guix-revision-package-derivations + conn + guix-revision-id + package-derivation-ids-chunk))) + 2000 + package-derivation-ids)))))) + + (let ((process-system-and-target/fiberized + (fiberize process-system-and-target + #:parallelism parallelism))) + (par-map& + (match-lambda + ((system . target) + (process-system-and-target/fiberized system target))) + (with-resource-from-pool inf-and-store-pool res + (match res + ((inferior . inferior-store) + (inferior-fetch-system-target-pairs inferior)))))) (with-resource-from-pool postgresql-connection-pool conn (with-time-logging @@ -1635,7 +1937,7 @@ (begin (simple-format #t "debug: skipping system tests\n") '()) - (let ((data + (let ((data-with-derivation-file-names (with-resource-from-pool inf-and-store-pool res (match res ((inferior . inferior-store) @@ -1645,22 +1947,41 @@ inferior-store guix-source commit))))))) - (when data - (with-resource-from-pool postgresql-connection-pool conn - (insert-system-tests-for-guix-revision conn - guix-revision-id - data)))))) - - (simple-format #t "debug: extract-information-from: ~A\n" store-path) - (parallel-via-fibers - (fibers-force package-ids-promise) - (extract-and-store-lint-checkers-and-warnings) - (extract-and-store-package-derivations) - (extract-and-store-system-tests))) + (when data-with-derivation-file-names + (let ((data-with-derivation-ids + (map (match-lambda + ((name description derivation-file-names-by-system location-data) + (list name + description + (let ((systems + (map car derivation-file-names-by-system)) + (derivation-ids + (derivation-file-names->derivation-ids/fiberized + (list->vector + (map cdr derivation-file-names-by-system))))) + (map cons systems derivation-ids)) + location-data))) + data-with-derivation-file-names))) + (with-resource-from-pool postgresql-connection-pool conn + (insert-system-tests-for-guix-revision + conn + guix-revision-id + data-with-derivation-ids))))))) + + (with-time-logging + (simple-format #f "extract-information-from: ~A\n" store-path) + (parallel-via-fibers + (fibers-force package-ids-promise) + (extract-and-store-package-derivations) + (extract-and-store-system-tests) + (extract-and-store-lint-checkers-and-warnings))) + + #t) (prevent-inlining-for-tests extract-information-from) -(define (load-channel-instances git-repository-id commit +(define (load-channel-instances utility-thread-channel + git-repository-id commit channel-derivations-by-system) ;; Load the channel instances in a different transaction, so that this can ;; commit prior to the outer transaction @@ -1685,19 +2006,35 @@ (guix-revision-id (or existing-guix-revision-id (insert-guix-revision channel-instances-conn - git-repository-id commit)))) + git-repository-id commit))) + (postgresql-connection-pool + (make-resource-pool + (const channel-instances-conn) + 1 + #:min-size 0))) + (unless existing-guix-revision-id - (insert-channel-instances channel-instances-conn - guix-revision-id - (filter-map - (match-lambda - ((system . derivations) - (and=> - (assoc-ref derivations - 'manifest-entry-item) - (lambda (drv) - (cons system drv))))) - channel-derivations-by-system)) + (let* ((derivations-by-system + (filter-map + (match-lambda + ((system . derivations) + (and=> + (assoc-ref derivations + 'manifest-entry-item) + (lambda (drv) + (cons system drv))))) + channel-derivations-by-system)) + (derivation-ids + (derivation-file-names->derivation-ids + postgresql-connection-pool + utility-thread-channel + (list->vector (map cdr derivations-by-system))))) + + (insert-channel-instances channel-instances-conn + guix-revision-id + (map cons + (map car derivations-by-system) + (vector->list derivation-ids)))) (simple-format (current-error-port) "guix-data-service: saved the channel instance derivations to the database\n")) @@ -1709,6 +2046,13 @@ (define* (load-new-guix-revision conn git-repository-id commit #:key skip-system-tests? parallelism extra-inferior-environment-variables) + (define utility-thread-channel + (make-worker-thread-channel + (const '()) + #:parallelism parallelism)) + + (%worker-thread-default-timeout #f) + (let* ((git-repository-fields (select-git-repository conn git-repository-id)) (git-repository-url @@ -1727,7 +2071,8 @@ fetch-with-authentication? #:parallelism parallelism)) (guix-revision-id - (load-channel-instances git-repository-id commit + (load-channel-instances utility-thread-channel + git-repository-id commit channel-derivations-by-system))) (let ((store-item (channel-derivations-by-system->guix-store-item @@ -1737,6 +2082,7 @@ (extract-information-from conn guix-revision-id commit guix-source store-item + utility-thread-channel #:skip-system-tests? skip-system-tests? #:extra-inferior-environment-variables diff --git a/guix-data-service/model/channel-instance.scm b/guix-data-service/model/channel-instance.scm index 84fc901..2cce2da 100644 --- a/guix-data-service/model/channel-instance.scm +++ b/guix-data-service/model/channel-instance.scm @@ -22,36 +22,28 @@ #:use-module (json) #:use-module (guix utils) #:use-module (guix-data-service model utils) - #:use-module (guix-data-service model derivation) #:export (insert-channel-instances channel-instances-exist-for-guix-revision? select-channel-instances-for-guix-revision)) (define (insert-channel-instances conn guix-revision-id - derivations-by-system) - (let ((derivation-ids - (derivation-file-names->derivation-ids - conn - (list->vector - (map cdr derivations-by-system))))) - - (exec-query - conn - (string-append - " + derivation-ids-by-system) + (exec-query + conn + (string-append + " INSERT INTO channel_instances (guix_revision_id, system, derivation_id) VALUES " - (string-join - (map (lambda (system derivation-id) - (simple-format #f "(~A, '~A', ~A)" - guix-revision-id - system - derivation-id)) - (map car derivations-by-system) - (vector->list derivation-ids)) - ", ")))) + (string-join + (map (lambda (derivation-id-and-system) + (simple-format #f "(~A, '~A', ~A)" + guix-revision-id + (car derivation-id-and-system) + (cdr derivation-id-and-system))) + derivation-ids-by-system) + ", "))) #t) (define (channel-instances-exist-for-guix-revision? conn commit-hash) diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm index 1b82889..35b1a29 100644 --- a/guix-data-service/model/derivation.scm +++ b/guix-data-service/model/derivation.scm @@ -23,13 +23,10 @@ #:use-module (ice-9 format) #:use-module (ice-9 binary-ports) #:use-module (rnrs bytevectors) - #:use-module (gcrypt hash) #:use-module (squee) #:use-module (json) #:use-module (guix base16) #:use-module (guix base32) - #:use-module (guix serialization) - #:use-module (lzlib) #:use-module (guix inferior) #:use-module (guix memoization) #:use-module (guix derivations) @@ -58,6 +55,10 @@ select-fixed-output-package-derivations-in-revision select-derivation-outputs-in-revision fix-derivation-output-details-hash-encoding + insert-derivation-sources + insert-derivation-source-file-nar + insert-derivation-outputs + insert-derivation-inputs derivation-output-details->derivation-output-details-ids derivation-output-details-ids->derivation-output-details-set-id select-derivations-by-revision-name-and-version @@ -66,7 +67,6 @@ select-existing-derivations select-derivations-by-id select-derivations-and-build-status - derivation-file-names->derivation-ids update-derivation-inputs-statistics vacuum-derivation-inputs-table update-derivation-outputs-statistics @@ -1487,38 +1487,11 @@ INNER JOIN derivation_outputs sources-ids)) -(define (insert-derivation-source-file-nar conn id source-file) - (define missing? - (match (exec-query - conn - "SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1" - (list (number->string id))) - (() #t) - (_ #f))) - - (when missing? - (let* ((nar-bytevector (call-with-values - (lambda () - (open-bytevector-output-port)) - (lambda (port get-bytevector) - (write-file source-file port) - (get-bytevector)))) - (data-string (bytevector->base16-string - (call-with-values - (lambda () - (open-bytevector-output-port)) - (lambda (port get-bytevector) - (call-with-lzip-output-port port - (lambda (port) - (put-bytevector port nar-bytevector)) - #:level 9) - (get-bytevector))))) - (hash (bytevector->nix-base32-string - (sha256 nar-bytevector))) - (uncompressed-size (bytevector-length nar-bytevector))) - (exec-query - conn - " +(define (insert-derivation-source-file-nar conn id + hash bytevector uncompressed-size) + (exec-query + conn + " INSERT INTO derivation_source_file_nars ( derivation_source_file_id, compression, @@ -1527,12 +1500,12 @@ INSERT INTO derivation_source_file_nars ( uncompressed_size, data ) VALUES ($1, $2, $3, $4, $5, $6)" - (list (number->string id) - "lzip" - "sha256" - hash - (number->string uncompressed-size) - (string-append "\\x" data-string)))))) + (list (number->string id) + "lzip" + "sha256" + hash + (number->string uncompressed-size) + (string-append "\\x" (bytevector->base16-string bytevector))))) (define* (backfill-derivation-source-file-nars conn #:key (batch-size 10000) @@ -1564,130 +1537,6 @@ LIMIT $1" batch) (when loop? (loop (missing-batch)))))) -(define (insert-missing-derivations conn - derivation-ids-hash-table - derivations) - (define (ensure-input-derivations-exist input-derivation-file-names) - (unless (null? input-derivation-file-names) - (simple-format - #t "debug: ensure-input-derivations-exist: processing ~A derivations\n" - (length input-derivation-file-names)) - - (update-derivation-ids-hash-table! conn - derivation-ids-hash-table - (list->vector - input-derivation-file-names)) - (simple-format - #t - "debug: ensure-input-derivations-exist: checking for missing input derivations\n") - (let ((missing-derivations-filenames - (filter (lambda (derivation-file-name) - (not (hash-ref derivation-ids-hash-table - derivation-file-name))) - input-derivation-file-names))) - - (unless (null? missing-derivations-filenames) - (simple-format - #f - "debug: ensure-input-derivations-exist: inserting missing input derivations\n") - ;; Ensure all the input derivations exist - (insert-missing-derivations - conn - derivation-ids-hash-table - (map read-derivation-from-file - missing-derivations-filenames)))))) - - (define (insert-into-derivations dervs) - (string-append - "INSERT INTO derivations " - "(file_name, builder, args, env_vars, system_id) VALUES " - (string-join - (map (match-lambda - (($ <derivation> outputs inputs sources - system builder args env-vars file-name) - (simple-format - #f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')" - file-name - builder - (string-join (map quote-string args) ",") - (string-join (map (match-lambda - ((key . value) - (string-append - "['" key '"', $$" - value "$$ ]"))) - env-vars) - ",") - (system->system-id conn system)))) - dervs) - ",") - " RETURNING id" - ";")) - - (with-time-logging - (simple-format - #f "insert-missing-derivations: inserting ~A derivations" - (length derivations)) - (let ((derivation-ids - (append-map - (lambda (chunk) - (map (lambda (result) - (string->number (car result))) - (exec-query conn (insert-into-derivations chunk)))) - (chunk derivations 500)))) - - (with-time-logging - "insert-missing-derivations: updating hash table" - (for-each (lambda (derivation derivation-id) - (hash-set! derivation-ids-hash-table - (derivation-file-name derivation) - derivation-id)) - derivations - derivation-ids)) - - (with-time-logging - "insert-missing-derivations: inserting outputs" - (for-each (lambda (derivation-id derivation) - (insert-derivation-outputs conn - derivation-id - (derivation-outputs derivation))) - derivation-ids - derivations)) - - (with-time-logging - "insert-missing-derivations: inserting sources" - (for-each (lambda (derivation-id derivation) - (let ((sources (derivation-sources derivation))) - (unless (null? sources) - (let ((sources-ids - (insert-derivation-sources conn - derivation-id - sources))) - (map (lambda (id source-file) - (insert-derivation-source-file-nar conn - id - source-file)) - sources-ids - sources))))) - derivation-ids - derivations)) - - (with-time-logging - "insert-missing-derivations: ensure-input-derivations-exist" - (ensure-input-derivations-exist (deduplicate-strings - (map derivation-input-path - (append-map derivation-inputs - derivations))))) - - (with-time-logging - (simple-format - #f "insert-missing-derivations: inserting inputs for ~A derivations" - (length derivations)) - (insert-derivation-inputs conn - derivation-ids - derivations)) - - derivation-ids))) - (define (select-derivations-by-id conn ids) (define query (string-append "SELECT id, file_name " @@ -1772,40 +1621,6 @@ WHERE " criteria ";")) '() sorted-derivations)) -(define (update-derivation-ids-hash-table! conn - derivation-ids-hash-table - file-names) - (define file-names-count (vector-length file-names)) - - (simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n" - file-names-count) - (let ((missing-file-names - (vector-fold - (lambda (_ result file-name) - (if (and file-name - (hash-ref derivation-ids-hash-table - file-name)) - result - (cons file-name result))) - '() - file-names))) - - (simple-format - #t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n" - file-names-count (length missing-file-names)) - - (unless (null? missing-file-names) - (for-each - (lambda (chunk) - (for-each - (match-lambda - ((id file-name) - (hash-set! derivation-ids-hash-table - file-name - (string->number id)))) - (exec-query conn (select-existing-derivations chunk)))) - (chunk! missing-file-names 1000))))) - (define (insert-source-files-missing-nars conn derivation-ids) (define (derivation-ids->next-related-derivation-ids! ids seen-ids) (delete-duplicates/sort! @@ -1888,71 +1703,6 @@ INNER JOIN derivation_source_files next-related-derivation-ids seen-ids)))))) -(define (derivation-file-names->derivation-ids conn derivation-file-names) - (define derivations-count - (vector-length derivation-file-names)) - - (if (= 0 derivations-count) - #() - (let* ((derivation-ids-hash-table (make-hash-table - ;; Account for more derivations in - ;; the graph - (* 2 derivations-count)))) - (simple-format - #t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n" - derivations-count) - - (update-derivation-ids-hash-table! conn - derivation-ids-hash-table - derivation-file-names) - - (let ((missing-derivation-filenames - (deduplicate-strings - (vector-fold - (lambda (_ result derivation-file-name) - (if (not derivation-file-name) - result - (if (hash-ref derivation-ids-hash-table - derivation-file-name) - result - (cons derivation-file-name result)))) - '() - derivation-file-names)))) - - (chunk-for-each! - (lambda (missing-derivation-filenames-chunk) - (let ((missing-derivations-chunk - (with-time-logging - (simple-format #f "reading ~A missing derivations" - (length missing-derivation-filenames-chunk)) - (map read-derivation-from-file - ;; Do the filter again, since processing the last - ;; chunk might have inserted some of the - ;; derivations in this chunk - (filter (lambda (derivation-file-name) - (not (hash-ref derivation-ids-hash-table - derivation-file-name))) - missing-derivation-filenames-chunk))))) - - (unless (null? missing-derivations-chunk) - (insert-missing-derivations conn - derivation-ids-hash-table - missing-derivations-chunk)))) - 1000 - missing-derivation-filenames) - - (let ((all-ids - (vector-map - (lambda (_ derivation-file-name) - (if derivation-file-name - (or (hash-ref derivation-ids-hash-table - derivation-file-name) - (error "missing derivation id")) - #f)) - derivation-file-names))) - - all-ids))))) - (define (update-derivation-inputs-statistics conn) (let ((query " diff --git a/guix-data-service/model/system-test.scm b/guix-data-service/model/system-test.scm index fe2fb83..ab438b7 100644 --- a/guix-data-service/model/system-test.scm +++ b/guix-data-service/model/system-test.scm @@ -23,7 +23,6 @@ #:use-module (guix utils) #:use-module (guix-data-service model utils) #:use-module (guix-data-service model location) - #:use-module (guix-data-service model derivation) #:export (insert-system-tests-for-guix-revision select-system-tests-for-guix-revision @@ -39,7 +38,7 @@ "system_tests" '(name description location_id) (map (match-lambda - ((name description derivation-file-names-by-system location-data) + ((name description derivation-ids-by-system location-data) (list name description (location->location-id @@ -48,20 +47,13 @@ system-test-data))) (data (append-map - (lambda (system-test-id derivation-file-names-by-system) - (let ((systems - (map car derivation-file-names-by-system)) - (derivation-ids - (derivation-file-names->derivation-ids - conn - (map cdr derivation-file-names-by-system)))) - (map (lambda (system derivation-id) - (list guix-revision-id - system-test-id - derivation-id - system)) - systems - derivation-ids))) + (lambda (system-test-id derivation-ids-by-system) + (map (lambda (system-and-derivation-id) + (list guix-revision-id + system-test-id + (cdr system-and-derivation-id) + (car system-and-derivation-id))) + derivation-ids-by-system)) system-test-ids (map third system-test-data)))) |