aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2024-07-19 17:06:56 +0100
committerChristopher Baines <mail@cbaines.net>2024-07-19 19:44:53 +0100
commit3f1c2ad6038515981adf3fbafd7c1e49e145047d (patch)
tree3ae034f97d6b76339f9a42012e46d42c66c7091e
parent5439159a169661ee4507fa2f565c38e2b14398d8 (diff)
downloaddata-service-3f1c2ad6038515981adf3fbafd7c1e49e145047d.tar
data-service-3f1c2ad6038515981adf3fbafd7c1e49e145047d.tar.gz
Rewrite the key parts of loading data to be even more parallel
Use a pool for the database connection, and a fibers promise for the package ids, and run other parts of the process in parallel too. This change also means that inserting starts as soon as some data is available, rather than when all the data is available.
-rw-r--r--guix-data-service/jobs/load-new-guix-revision.scm351
1 files changed, 174 insertions, 177 deletions
diff --git a/guix-data-service/jobs/load-new-guix-revision.scm b/guix-data-service/jobs/load-new-guix-revision.scm
index 79e5b1a..00b20f7 100644
--- a/guix-data-service/jobs/load-new-guix-revision.scm
+++ b/guix-data-service/jobs/load-new-guix-revision.scm
@@ -902,32 +902,6 @@
lint-checker-ids
lint-warnings-data)))
-(define (inferior-data->package-derivation-ids
- conn inf
- package-ids
- inferior-packages-system-and-target-to-derivations-alist)
- (append-map!
- (lambda (data)
- (let* ((system-and-target (car data))
- (derivations-vector (cdr data))
- (derivation-ids
- (with-time-logging
- (simple-format #f "derivation-file-names->derivation-ids (~A)"
- system-and-target)
- (derivation-file-names->derivation-ids
- conn
- derivations-vector))))
-
- (with-time-logging
- (simple-format #f "insert-package-derivations (~A)"
- system-and-target)
- (insert-package-derivations conn
- (car system-and-target)
- (or (cdr system-and-target) "")
- package-ids
- derivation-ids))))
- inferior-packages-system-and-target-to-derivations-alist))
-
(define guix-store-path
(let ((store-path #f))
(lambda (store)
@@ -1418,8 +1392,8 @@
inf))))
-(define* (extract-information-from conn guix-revision-id commit
- guix-source store-path
+(define* (extract-information-from db-conn guix-revision-id commit
+ guix-source store-item
#:key skip-system-tests?
extra-inferior-environment-variables
parallelism)
@@ -1432,7 +1406,7 @@
(string-append
(with-store-connection
(lambda (store)
- (glibc-locales-for-guix-store-path store store-path)))
+ (glibc-locales-for-guix-store-path store store-item)))
"/lib/locale"
":" (getenv "GUIX_LOCPATH")))
@@ -1442,7 +1416,7 @@
(let* ((inferior-store (open-store-connection))
(inferior (start-inferior-for-data-extration
inferior-store
- store-path
+ store-item
guix-locpath
extra-inferior-environment-variables)))
(ensure-non-blocking-store-connection inferior-store)
@@ -1462,161 +1436,184 @@
(close-connection store)
(close-inferior inferior)))))
- (simple-format #t "debug: extract-information-from: ~A\n" store-path)
-
- (letpar& ((inferior-lint-checkers-and-warnings-data
- (let ((inferior-lint-checkers-data
- (with-resource-from-pool inf-and-store-pool res
- (match res
- ((inferior . inferior-store)
- (inferior-lint-checkers inferior))))))
- (cons
- inferior-lint-checkers-data
- (and inferior-lint-checkers-data
- (par-map&
- (match-lambda
- ((checker-name _ network-dependent?)
- (and (and (not network-dependent?)
- ;; Running the derivation linter is
- ;; currently infeasible
- (not (eq? checker-name 'derivation)))
- (with-resource-from-pool inf-and-store-pool res
- (match res
- ((inferior . inferior-store)
- (inferior-lint-warnings inferior
- inferior-store
- checker-name)))))))
- inferior-lint-checkers-data)))))
- (inferior-packages-system-and-target-to-derivations-alist
- (par-map&
- (match-lambda
- ((system . target)
- (let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal"))))
- (when (> wal-bytes (* 2048 (expt 2 20)))
- (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
- wal-bytes)
-
- (sleep 30)
- (loop (stat:size (stat "/var/guix/db/db.sqlite-wal")))))
-
- (with-resource-from-pool inf-and-store-pool res
- (with-time-logging
- (simple-format #f "getting derivations for ~A" (cons system target))
- (match res
- ((inferior . inferior-store)
- (ensure-gds-inferior-packages-defined! inferior)
-
- (let ((drvs
- (inferior-package-derivations
- inferior-store
- inferior
- system
- target)))
-
- (cons (cons system target)
- drvs))))))))
+ (define postgresql-connection-pool
+ (make-resource-pool
+ (lambda ()
+ (with-time-logging
+ "acquiring advisory transaction lock: load-new-guix-revision-inserts"
+ ;; Wait until this is the only transaction inserting data, to
+ ;; avoid any concurrency issues
+ (obtain-advisory-transaction-lock db-conn
+ 'load-new-guix-revision-inserts))
+ db-conn)
+ 1
+ #:min-size 0))
+
+ (define package-ids-promise
+ (fibers-delay
+ (lambda ()
+ (let ((packages-data
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
- (inferior-fetch-system-target-pairs inferior))))))
- (inferior-system-tests
- (if skip-system-tests?
- (begin
- (simple-format #t "debug: skipping system tests\n")
- '())
- (with-resource-from-pool inf-and-store-pool res
- (match res
- ((inferior . inferior-store)
- (with-time-logging "getting inferior system tests"
- (all-inferior-system-tests inferior inferior-store
- guix-source commit)))))))
- (packages-data
- (with-resource-from-pool inf-and-store-pool res
- (match res
- ((inferior . inferior-store)
- (with-time-logging "getting all inferior package data"
- (let ((packages
- pkg-to-replacement-hash-table
- (inferior-packages-plus-replacements inferior)))
- (all-inferior-packages-data inferior
- packages
- pkg-to-replacement-hash-table))))))))
-
- (destroy-resource-pool inf-and-store-pool)
-
- (simple-format
- #t "debug: finished loading information from inferior\n")
-
- (with-time-logging
- "acquiring advisory transaction lock: load-new-guix-revision-inserts"
- ;; Wait until this is the only transaction inserting data, to
- ;; avoid any concurrency issues
- (obtain-advisory-transaction-lock conn
- 'load-new-guix-revision-inserts))
- (with-time-logging
- "inserting data"
- (let* ((package-ids
- (insert-packages conn packages-data)))
- (let* ((package-derivation-ids
- (with-time-logging "inferior-data->package-derivation-ids"
- (inferior-data->package-derivation-ids
- conn
- inf
- package-ids
- inferior-packages-system-and-target-to-derivations-alist)))
- (ids-count
- (length package-derivation-ids)))
- (chunk-for-each! (lambda (package-derivation-ids-chunk)
- (insert-guix-revision-package-derivations
- conn
- guix-revision-id
- package-derivation-ids-chunk))
- 2000
- package-derivation-ids)
- (simple-format
- #t "Successfully loaded ~A package/derivation pairs\n"
- ids-count))
-
- (when inferior-lint-warnings
- (let* ((lint-checker-ids
- (lint-checkers->lint-checker-ids
- conn
- (map (match-lambda
- ((name descriptions-by-locale network-dependent)
- (list
- name
- network-dependent
- (lint-checker-description-data->lint-checker-description-set-id
- conn descriptions-by-locale))))
- (car inferior-lint-checkers-and-warnings-data))))
- (lint-warning-ids
- (insert-lint-warnings
- conn
- package-ids
- lint-checker-ids
- (cdr inferior-lint-checkers-and-warnings-data))))
+ (with-time-logging "getting all inferior package data"
+ (let ((packages
+ pkg-to-replacement-hash-table
+ (inferior-packages-plus-replacements inferior)))
+ (all-inferior-packages-data
+ inferior
+ packages
+ pkg-to-replacement-hash-table))))))))
+ (with-resource-from-pool postgresql-connection-pool conn
+ (insert-packages conn packages-data))))))
+
+ (define (extract-and-store-lint-checkers-and-warnings)
+ (define inferior-lint-checkers-data
+ (with-resource-from-pool inf-and-store-pool res
+ (match res
+ ((inferior . inferior-store)
+ (inferior-lint-checkers inferior)))))
+
+ (when inferior-lint-checkers-data
+ (letpar& ((lint-checker-ids
+ (with-resource-from-pool postgresql-connection-pool conn
+ (lint-checkers->lint-checker-ids
+ conn
+ (map (match-lambda
+ ((name descriptions-by-locale network-dependent)
+ (list
+ name
+ network-dependent
+ (lint-checker-description-data->lint-checker-description-set-id
+ conn descriptions-by-locale))))
+ inferior-lint-checkers-data))))
+ (lint-warnings-data
+ (par-map&
+ (match-lambda
+ ((checker-name _ network-dependent?)
+ (and (and (not network-dependent?)
+ ;; Running the derivation linter is
+ ;; currently infeasible
+ (not (eq? checker-name 'derivation)))
+ (with-resource-from-pool inf-and-store-pool res
+ (match res
+ ((inferior . inferior-store)
+ (inferior-lint-warnings inferior
+ inferior-store
+ checker-name)))))))
+ inferior-lint-checkers-data)))
+
+ (let ((package-ids (fibers-force package-ids-promise)))
+ (with-resource-from-pool postgresql-connection-pool conn
(insert-guix-revision-lint-checkers conn
guix-revision-id
lint-checker-ids)
- (chunk-for-each!
- (lambda (lint-warning-ids-chunk)
- (insert-guix-revision-lint-warnings conn
- guix-revision-id
- lint-warning-ids-chunk))
- 5000
- lint-warning-ids)))
-
- (when inferior-system-tests
- (insert-system-tests-for-guix-revision conn
- guix-revision-id
- inferior-system-tests))
+ (let ((lint-warning-ids
+ (insert-lint-warnings
+ conn
+ package-ids
+ lint-checker-ids
+ lint-warnings-data)))
+ (chunk-for-each!
+ (lambda (lint-warning-ids-chunk)
+ (insert-guix-revision-lint-warnings conn
+ guix-revision-id
+ lint-warning-ids-chunk))
+ 5000
+ lint-warning-ids)))))))
+
+ (define (extract-and-store-package-derivations)
+ (fibers-for-each
+ (match-lambda
+ ((system . target)
+ (let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal"))))
+ (when (> wal-bytes (* 2048 (expt 2 20)))
+ (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
+ wal-bytes)
+
+ (sleep 30)
+ (loop (stat:size (stat "/var/guix/db/db.sqlite-wal")))))
+
+ (let ((derivations-vector
+ (with-resource-from-pool inf-and-store-pool res
+ (with-time-logging
+ (simple-format #f "getting derivations for ~A" (cons system target))
+ (match res
+ ((inferior . inferior-store)
+ (ensure-gds-inferior-packages-defined! inferior)
+
+ (inferior-package-derivations
+ inferior-store
+ inferior
+ system
+ target)))))))
+
+ (let ((package-ids (fibers-force package-ids-promise)))
+ (with-resource-from-pool postgresql-connection-pool conn
+ (let* ((derivation-ids
+ (with-time-logging
+ (simple-format #f "derivation-file-names->derivation-ids (~A ~A)"
+ system target)
+ (derivation-file-names->derivation-ids
+ conn
+ derivations-vector))))
+
+ (let ((package-derivation-ids
+ (with-time-logging
+ (simple-format #f "insert-package-derivations (~A ~A)"
+ system target)
+ (insert-package-derivations conn
+ system
+ (or target "")
+ package-ids
+ derivation-ids))))
+ (chunk-for-each! (lambda (package-derivation-ids-chunk)
+ (insert-guix-revision-package-derivations
+ conn
+ guix-revision-id
+ package-derivation-ids-chunk))
+ 2000
+ package-derivation-ids))))))))
+ (with-resource-from-pool inf-and-store-pool res
+ (match res
+ ((inferior . inferior-store)
+ (inferior-fetch-system-target-pairs inferior)))))
+
+ (with-resource-from-pool postgresql-connection-pool conn
+ (with-time-logging
+ "insert-guix-revision-package-derivation-distribution-counts"
+ (insert-guix-revision-package-derivation-distribution-counts
+ conn
+ guix-revision-id))))
+
+ (define (extract-and-store-system-tests)
+ (if skip-system-tests?
+ (begin
+ (simple-format #t "debug: skipping system tests\n")
+ '())
+ (let ((data
+ (with-resource-from-pool inf-and-store-pool res
+ (match res
+ ((inferior . inferior-store)
+ (with-time-logging "getting inferior system tests"
+ (all-inferior-system-tests
+ inferior
+ inferior-store
+ guix-source
+ commit)))))))
+ (when data
+ (with-resource-from-pool postgresql-connection-pool conn
+ (insert-system-tests-for-guix-revision conn
+ guix-revision-id
+ data))))))
- (with-time-logging
- "insert-guix-revision-package-derivation-distribution-counts"
- (insert-guix-revision-package-derivation-distribution-counts
- conn
- guix-revision-id))))))
+ (simple-format #t "debug: extract-information-from: ~A\n" store-path)
+ (parallel-via-fibers
+ (fibers-force package-ids-promise)
+ (extract-and-store-lint-checkers-and-warnings)
+ (extract-and-store-package-derivations)
+ (extract-and-store-system-tests)))
(prevent-inlining-for-tests extract-information-from)