diff options
-rw-r--r-- | guix-data-service/database.scm | 83 | ||||
-rw-r--r-- | guix-data-service/jobs/load-new-guix-revision.scm | 100 | ||||
-rw-r--r-- | guix-data-service/model/guix-revision-package-derivation.scm | 29 | ||||
-rw-r--r-- | guix-data-service/utils.scm | 65 | ||||
-rw-r--r-- | guix-data-service/web/controller.scm | 6 | ||||
-rw-r--r-- | guix-data-service/web/server.scm | 66 | ||||
-rw-r--r-- | scripts/guix-data-service.in | 46 | ||||
-rw-r--r-- | tests/jobs-load-new-guix-revision.scm | 82 |
8 files changed, 228 insertions, 249 deletions
diff --git a/guix-data-service/database.scm b/guix-data-service/database.scm index 7270e90..8af53da 100644 --- a/guix-data-service/database.scm +++ b/guix-data-service/database.scm @@ -29,10 +29,7 @@ with-postgresql-connection open-postgresql-connection - - with-postgresql-connection-per-thread - with-thread-postgresql-connection - close-thread-postgresql-connection + close-postgresql-connection with-postgresql-transaction @@ -116,6 +113,10 @@ conn)) +(define (close-postgresql-connection conn name) + (pg-conn-finish conn) + (decrement-connection-gauge name)) + (define (run-sqitch) (with-postgresql-connection "sqitch" @@ -168,11 +169,12 @@ params))))) (simple-format #t "running command: ~A\n" (string-join command)) - (unless (zero? (apply system* command)) - (simple-format - (current-error-port) - "error: sqitch command failed\n") - (exit 1)))))))) + (let ((pid (spawn (%config 'sqitch) command))) + (unless (= 0 (status:exit-val (cdr (waitpid pid)))) + (simple-format + (current-error-port) + "error: sqitch command failed\n") + (primitive-exit 1))))))))) (define* (with-postgresql-connection name f #:key (statement-timeout #f)) (let ((conn (open-postgresql-connection name statement-timeout))) @@ -201,69 +203,6 @@ (define %postgresql-connections-name (make-parameter #f)) -(define* (with-postgresql-connection-per-thread name thunk - #:key (statement-timeout #f)) - (parameterize ((%postgresql-connection-parameters - (list name statement-timeout)) - (%postgresql-connections-hash-table - (make-hash-table)) - (%postgresql-connections-name - name)) - (call-with-values - thunk - (lambda vals - (hash-for-each - (lambda (thread conn) - (pg-conn-finish conn) - (decrement-connection-gauge name)) - (%postgresql-connections-hash-table)) - - (apply values vals))))) - -(define %thread-postgresql-connection - (make-thread-local-fluid)) - -(define (with-thread-postgresql-connection f) - (define (set-current-thread-connection conn) - (if conn - (hash-set! (%postgresql-connections-hash-table) - (current-thread) - conn) - (hash-remove! (%postgresql-connections-hash-table) - (current-thread))) - (fluid-set! %thread-postgresql-connection - conn)) - - (let ((conn (fluid-ref %thread-postgresql-connection))) - (if conn - ;; Assume an exception here could mean the connection has failed, so - ;; close it - (with-exception-handler - (lambda (exn) - (pg-conn-finish conn) - (decrement-connection-gauge - (%postgresql-connections-name)) - (set-current-thread-connection #f) - (raise-exception exn)) - (lambda () - (f conn))) - - (let ((conn (apply open-postgresql-connection - (%postgresql-connection-parameters)))) - (set-current-thread-connection conn) - - (f conn))))) - -(define (close-thread-postgresql-connection) - (let ((conn (fluid-ref %thread-postgresql-connection))) - (when conn - (pg-conn-finish conn) - (hash-remove! (%postgresql-connections-hash-table) - (current-thread)) - (fluid-set! %thread-postgresql-connection #f) - (decrement-connection-gauge - (%postgresql-connections-name))))) - (define* (with-postgresql-transaction conn f #:key always-rollback?) (exec-query conn "BEGIN;") diff --git a/guix-data-service/jobs/load-new-guix-revision.scm b/guix-data-service/jobs/load-new-guix-revision.scm index f6d4292..5c2744c 100644 --- a/guix-data-service/jobs/load-new-guix-revision.scm +++ b/guix-data-service/jobs/load-new-guix-revision.scm @@ -1160,6 +1160,7 @@ (let* ((inferior-store (open-connection)) (inferior (start-inferior inferior-store))) (ensure-non-blocking-store-connection inferior-store) + (set-build-options inferior-store #:fallback? #t) (make-inferior-non-blocking! inferior) (call-with-blocked-asyncs (lambda () @@ -1304,7 +1305,7 @@ (let* ((derivation (or (and=> - (inferior-eval-with-store + (inferior-eval-with-store/non-blocking inf store '(lambda (store) @@ -1399,7 +1400,8 @@ inf)) -(define* (extract-information-from conn store guix-revision-id commit +(define* (extract-information-from conn long-running-store-connection + guix-revision-id commit guix-source store-path #:key skip-system-tests? parallelism) @@ -1410,7 +1412,9 @@ ;; inferior Guix works, even if it's build using a different ;; glibc version (string-append - (glibc-locales-for-guix-store-path store store-path) + (with-store-connection + (lambda (store) + (glibc-locales-for-guix-store-path store store-path))) "/lib/locale" ":" (getenv "GUIX_LOCPATH"))) @@ -1423,15 +1427,21 @@ guix-locpath))) (ensure-non-blocking-store-connection inferior-store) (make-inferior-non-blocking! inferior) + + (simple-format #t "debug: started new inferior and store connection\n") + (cons inferior inferior-store))) parallelism #:min-size 0 - #:idle-seconds 10 - #:destructor (match-lambda - ((inferior . store) - ;; Don't close the store connection here, because there - ;; are temporary roots to keep alive - (close-inferior inferior))))) + #:idle-seconds 2 + #:destructor + (match-lambda + ((inferior . store) + (simple-format + #t "debug: closing inferior and associated store connection\n") + + (close-connection store) + (close-inferior inferior))))) (define add-temp-root/long-running-store (let ((channel (make-channel))) @@ -1439,22 +1449,12 @@ (spawn-fiber (lambda () (let loop ((filename (get-message channel))) - (add-temp-root store filename) + (add-temp-root long-running-store-connection filename) (loop (get-message channel))))) (lambda (filename) (put-message channel filename)))) - (define lock-num - ;; I'm seeing problems with the guix-dameon WAL growing excessively, which - ;; I think is happening when processing revivions involving lots of new - ;; derivations. So limit the concurrency here in the hope that this'll - ;; help. - (with-time-logging "getting 'inferior-package-derivations lock" - (lock-advisory-session-lock - conn - 'inferior-package-derivations))) - (simple-format #t "debug: extract-information-from: ~A\n" store-path) (letpar& ((inferior-lint-checkers-and-warnings-data @@ -1484,6 +1484,14 @@ (par-map& (match-lambda ((system . target) + (let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal")))) + (when (> wal-bytes 200000000) + (simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n" + wal-bytes) + + (sleep 30) + (loop (stat:size (stat "/var/guix/db/db.sqlite-wal"))))) + (with-resource-from-pool inf-and-store-pool res (with-time-logging (simple-format #f "getting derivations for ~A" (cons system target)) @@ -1534,7 +1542,6 @@ pkg-to-replacement-hash-table)))))))) (destroy-resource-pool inf-and-store-pool) - (unlock-advisory-session-lock conn lock-num) (simple-format #t "debug: finished loading information from inferior\n") @@ -1658,7 +1665,7 @@ (prevent-inlining-for-tests load-channel-instances) -(define* (load-new-guix-revision conn store git-repository-id commit +(define* (load-new-guix-revision conn git-repository-id commit #:key skip-system-tests? parallelism) (let* ((git-repository-fields (select-git-repository conn git-repository-id)) @@ -1670,10 +1677,12 @@ (channel (name 'guix) (url git-repository-url) (commit commit))) + (initial-store-connection + (open-store-connection)) (source-and-channel-derivations-by-system (channel->source-and-derivations-by-system conn - store + initial-store-connection channel-for-commit fetch-with-authentication? #:parallelism parallelism)) @@ -1686,16 +1695,24 @@ channel-derivations-by-system))) (let ((store-item (channel-derivations-by-system->guix-store-item - store + initial-store-connection channel-derivations-by-system))) (if store-item (and - (extract-information-from conn store - guix-revision-id - commit guix-source store-item - #:skip-system-tests? - skip-system-tests? - #:parallelism parallelism) + (with-store-connection + (lambda (store) + (add-temp-root store store-item) + + ;; Close the initial connection now that the store-item has a + ;; root + (close-connection initial-store-connection) + + (extract-information-from conn store + guix-revision-id + commit guix-source store-item + #:skip-system-tests? + skip-system-tests? + #:parallelism parallelism))) (if (defined? 'channel-news-for-commit (resolve-module '(guix channels))) @@ -1720,6 +1737,7 @@ (begin (simple-format #t "Failed to generate store item for ~A\n" commit) + (close-connection initial-store-connection) #f))))) (define (enqueue-load-new-guix-revision-job conn git-repository-id commit source) @@ -2078,6 +2096,15 @@ SKIP LOCKED") (string=? priority "t")))) (exec-query conn query))) +(define (open-store-connection) + (let ((store (open-connection))) + (ensure-non-blocking-store-connection store) + (set-build-options store #:fallback? #t) + + store)) + +(prevent-inlining-for-tests open-store-connection) + (define (with-store-connection f) (with-store store (ensure-non-blocking-store-connection store) @@ -2119,14 +2146,11 @@ SKIP LOCKED") (lambda () (with-throw-handler #t (lambda () - (with-store-connection - (lambda (store) - (load-new-guix-revision conn - store - git-repository-id - commit - #:skip-system-tests? #t - #:parallelism parallelism)))) + (load-new-guix-revision conn + git-repository-id + commit + #:skip-system-tests? #t + #:parallelism parallelism)) (lambda (key . args) (simple-format (current-error-port) "error: load-new-guix-revision: ~A ~A\n" diff --git a/guix-data-service/model/guix-revision-package-derivation.scm b/guix-data-service/model/guix-revision-package-derivation.scm index 2b0ed61..63c23e5 100644 --- a/guix-data-service/model/guix-revision-package-derivation.scm +++ b/guix-data-service/model/guix-revision-package-derivation.scm @@ -166,34 +166,29 @@ VALUES ($1, $2, $3, $4, $5)" (loop (append level-counts (list count)))))))) system-ids-and-targets)) -(define (backfill-guix-revision-package-derivation-distribution-counts) +(define (backfill-guix-revision-package-derivation-distribution-counts conn) (define revision-ids - (with-thread-postgresql-connection - (lambda (conn) - (map - car - (exec-query - conn - " + (map + car + (exec-query + conn + " SELECT id FROM guix_revisions EXCEPT SELECT guix_revision_id FROM guix_revision_package_derivation_distribution_counts -ORDER BY id DESC"))))) +ORDER BY id DESC"))) - (n-par-for-each - 4 + (for-each (lambda (revision-id) (simple-format #t "backfilling guix_revision_package_derivation_distribution_counts for revision ~A\n" revision-id) - (with-thread-postgresql-connection + (with-postgresql-transaction + conn (lambda (conn) - (with-postgresql-transaction + (insert-guix-revision-package-derivation-distribution-counts conn - (lambda (conn) - (insert-guix-revision-package-derivation-distribution-counts - conn - revision-id)))))) + revision-id)))) revision-ids)) (define* (get-sql-to-select-package-and-related-derivations-for-revision diff --git a/guix-data-service/utils.scm b/guix-data-service/utils.scm index d72fa55..d01fb5c 100644 --- a/guix-data-service/utils.scm +++ b/guix-data-service/utils.scm @@ -39,6 +39,7 @@ prevent-inlining-for-tests resource-pool-default-timeout + %resource-pool-timeout-handler make-resource-pool destroy-resource-pool call-with-resource-from-pool @@ -55,8 +56,6 @@ delete-duplicates/sort! - get-gc-metrics-updater - get-port-metrics-updater get-guix-metrics-updater call-with-sigint @@ -343,7 +342,11 @@ (define resource-pool-timeout-error? (record-predicate &resource-pool-timeout)) -(define* (call-with-resource-from-pool pool proc #:key (timeout 'default)) +(define %resource-pool-timeout-handler + (make-parameter #f)) + +(define* (call-with-resource-from-pool pool proc #:key (timeout 'default) + (timeout-handler (%resource-pool-timeout-handler))) "Call PROC with a resource from POOL, blocking until a resource becomes available. Return the resource once PROC has returned." @@ -396,6 +399,9 @@ available. Return the resource once PROC has returned." (when (or (not resource) (eq? resource 'resource-pool-retry-checkout)) + (when timeout-handler + (timeout-handler pool proc timeout)) + (raise-exception (make-resource-pool-timeout-error))) @@ -580,55 +586,6 @@ available. Return the resource once PROC has returned." (cons current-element result))))))))) -(define (get-gc-metrics-updater registry) - (define metrics - `((gc-time-taken - . ,(make-gauge-metric registry "guile_gc_time_taken")) - (heap-size - . ,(make-gauge-metric registry "guile_heap_size")) - (heap-free-size - . ,(make-gauge-metric registry "guile_heap_free_size")) - (heap-total-allocated - . ,(make-gauge-metric registry "guile_heap_total_allocated")) - (heap-allocated-since-gc - . ,(make-gauge-metric registry "guile_allocated_since_gc")) - (protected-objects - . ,(make-gauge-metric registry "guile_gc_protected_objects")) - (gc-times - . ,(make-gauge-metric registry "guile_gc_times")))) - - (lambda () - (let ((stats (gc-stats))) - (for-each - (match-lambda - ((name . metric) - (let ((value (assq-ref stats name))) - (metric-set metric value)))) - metrics)))) - -(define (get-port-metrics-updater registry) - (let ((ports-metric - (make-gauge-metric registry "guile_ports_total")) - (fds-metric - (make-gauge-metric registry "file_descriptors_total"))) - (lambda () - (let ((count 0)) - (port-for-each - (lambda _ - (set! count (+ 1 count)))) - - (metric-set ports-metric count)) - - (metric-set - fds-metric - (length - ;; In theory 'scandir' cannot return #f, but in practice, - ;; we've seen it before. - (or (scandir "/proc/self/fd" - (lambda (file) - (not (member file '("." ".."))))) - '())))))) - (define (get-guix-metrics-updater registry) (define guix-db "/var/guix/db/db.sqlite") (define guix-db-wal (string-append guix-db "-wal")) @@ -820,6 +777,7 @@ available. Return the resource once PROC has returned." (spawn-fiber (lambda () (while #t + (sleep 20) (with-exception-handler (lambda (exn) (simple-format (current-error-port) @@ -833,8 +791,7 @@ available. Return the resource once PROC has returned." (connect sock AF_INET INADDR_LOOPBACK port) (close-port sock))) #:timeout 20)) - #:unwind? #t) - (sleep 20))))) + #:unwind? #t))))) ;; Copied from (fibers web server) (define (call-with-sigint thunk cvar) diff --git a/guix-data-service/web/controller.scm b/guix-data-service/web/controller.scm index 1fb369c..d503052 100644 --- a/guix-data-service/web/controller.scm +++ b/guix-data-service/web/controller.scm @@ -200,8 +200,8 @@ (gc-metrics-updater (get-gc-metrics-updater registry)) - (port-metrics-updater - (get-port-metrics-updater registry)) + (process-metrics-updater + (get-process-metrics-updater registry)) (guix-metrics-updater (get-guix-metrics-updater registry))) @@ -439,7 +439,7 @@ (or load-new-guix-revision-job-metrics '())) (gc-metrics-updater) - (port-metrics-updater) + (process-metrics-updater) (guix-metrics-updater) (guile-time-metrics-updater) diff --git a/guix-data-service/web/server.scm b/guix-data-service/web/server.scm index 1c0bce1..6e91809 100644 --- a/guix-data-service/web/server.scm +++ b/guix-data-service/web/server.scm @@ -35,6 +35,7 @@ #:use-module (guix-data-service database) #:use-module (guix-data-service web controller) #:use-module (guix-data-service web util) + #:use-module (guix-data-service model guix-revision-package-derivation) #:export (%guix-data-service-metrics-registry start-guix-data-service-web-server)) @@ -48,12 +49,25 @@ #t) #f)) -(define (handler request body controller secret-key-base startup-completed +(define (handler request finished? + body controller secret-key-base startup-completed render-metrics) - (display - (format #f "~a ~a\n" - (request-method request) - (uri-path (request-uri request)))) + (with-exception-handler + (lambda (exn) + (with-exception-handler + (lambda _ #f) + (lambda () + (simple-format (current-error-port) + "exception when logging: ~A\n" exn)) + #:unwind? #t) + ;; If we can't log, exit + (signal-condition! finished?)) + (lambda () + (display + (format #f "~a ~a\n" + (request-method request) + (uri-path (request-uri request))))) + #:unwind? #t) (apply values (let-values (((request-components mime-types) (request->path-components-and-mime-type request))) @@ -108,7 +122,11 @@ (open-postgresql-connection "web" postgresql-statement-timeout)) - (floor (/ postgresql-connections 2)))) + (floor (/ postgresql-connections 2)) + #:idle-seconds 30 + #:destructor + (lambda (conn) + (close-postgresql-connection conn "web")))) (reserved-connection-pool (make-resource-pool @@ -116,12 +134,40 @@ (open-postgresql-connection "web-reserved" postgresql-statement-timeout)) - (floor (/ postgresql-connections 2)))) + (floor (/ postgresql-connections 2)) + #:idle-seconds 600 + #:destructor + (lambda (conn) + (close-postgresql-connection conn "web-reserved")))) (resource-pool-default-timeout 5)) + (let ((resource-pool-checkout-failures-metric + (make-counter-metric registry + "resource_pool_checkout_timeouts_total" + #:labels '(pool_name)))) + (%resource-pool-timeout-handler + (lambda (pool proc timeout) + (let ((pool-name + (cond + ((eq? pool (connection-pool)) "normal") + ((eq? pool (reserved-connection-pool)) "reserved") + (else #f)))) + (when pool-name + (metric-increment + resource-pool-checkout-failures-metric + #:label-values `((pool_name . ,pool-name)))))))) + + (spawn-fiber + (lambda () + (with-resource-from-pool (connection-pool) conn + (backfill-guix-revision-package-derivation-distribution-counts + conn)))) + (let ((render-metrics - (make-render-metrics registry))) + (make-render-metrics registry)) + (requests-metric + (make-counter-metric registry "requests_total"))) (with-exception-handler (lambda (exn) @@ -137,7 +183,9 @@ port. Also, the port used can be changed by passing the --port option.\n" (lambda () (run-server/patched (lambda (request body) - (handler request body controller + (metric-increment requests-metric) + + (handler request finished? body controller secret-key-base startup-completed render-metrics)) diff --git a/scripts/guix-data-service.in b/scripts/guix-data-service.in index dc6b432..8a124ee 100644 --- a/scripts/guix-data-service.in +++ b/scripts/guix-data-service.in @@ -197,33 +197,33 @@ (lambda (port) (simple-format port "~A\n" (getpid))))) - (call-with-new-thread - (lambda () - (with-postgresql-connection-per-thread - "backfill" - (lambda () - (backfill-guix-revision-package-derivation-distribution-counts))))) - (start-substitute-query-threads) (call-with-new-thread (lambda () - (run-sqitch) - - (for-each - (lambda (git-repository-details) - (when (fifth git-repository-details) - (simple-format #t "starting thread to poll ~A (~A)\n" - (second git-repository-details) - (third git-repository-details)) - - (start-thread-to-poll-git-repository - (first git-repository-details)))) - (with-postgresql-connection - "poll-startup" - all-git-repositories)) - - (atomic-box-set! startup-completed #t))) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "startup failed: ~A\n" exn) + (exit 1)) + (lambda () + (run-sqitch) + + (for-each + (lambda (git-repository-details) + (when (fifth git-repository-details) + (simple-format #t "starting thread to poll ~A (~A)\n" + (second git-repository-details) + (third git-repository-details)) + + (start-thread-to-poll-git-repository + (first git-repository-details)))) + (with-postgresql-connection + "poll-startup" + all-git-repositories)) + + (atomic-box-set! startup-completed #t))))) ;; Provide some visual space between the startup output and the ;; server starting diff --git a/tests/jobs-load-new-guix-revision.scm b/tests/jobs-load-new-guix-revision.scm index a2beb64..1a64ce3 100644 --- a/tests/jobs-load-new-guix-revision.scm +++ b/tests/jobs-load-new-guix-revision.scm @@ -37,50 +37,66 @@ (mock ((guix-data-service jobs load-new-guix-revision) - channel->source-and-derivations-by-system - (lambda* (conn store channel fetch-with-authentication? - #:key parallelism) - (cons - "/gnu/store/guix" - '(("x86_64-linux" - . - ((manifest-entry-item . "/gnu/store/foo.drv") - (profile . "/gnu/store/bar.drv"))))))) + open-store-connection + (lambda () + 'fake-store-connection)) (mock ((guix-data-service jobs load-new-guix-revision) - channel-derivations-by-system->guix-store-item - (lambda (store channel-derivations-by-system) - "/gnu/store/test")) + channel->source-and-derivations-by-system + (lambda* (conn store channel fetch-with-authentication? + #:key parallelism) + (cons + "/gnu/store/guix" + '(("x86_64-linux" + . + ((manifest-entry-item . "/gnu/store/foo.drv") + (profile . "/gnu/store/bar.drv"))))))) (mock ((guix-data-service jobs load-new-guix-revision) - extract-information-from - (lambda* (conn store guix-revision-id commit - guix-source store-path - #:key skip-system-tests? - parallelism) - #t)) + channel-derivations-by-system->guix-store-item + (lambda (store channel-derivations-by-system) + "/gnu/store/test")) (mock - ((guix-data-service model channel-instance) - insert-channel-instances - (lambda (conn guix-revision-id derivations-by-system) + ((guix-data-service jobs load-new-guix-revision) + extract-information-from + (lambda* (conn store guix-revision-id commit + guix-source store-path + #:key skip-system-tests? + parallelism) #t)) (mock - ((guix channels) - channel-news-for-commit - (lambda (channel commit) - '())) - - (match (enqueue-load-new-guix-revision-job - conn - (git-repository-url->git-repository-id conn "test-url") - "test-commit" - "test-source") - ((id) - (process-load-new-guix-revision-job id)))))))))) + ((guix-data-service model channel-instance) + insert-channel-instances + (lambda (conn guix-revision-id derivations-by-system) + #t)) + + (mock + ((guix channels) + channel-news-for-commit + (lambda (channel commit) + '())) + + (mock + ((guix store) + add-temp-root + (lambda _ #f)) + + (mock + ((guix store) + close-connection + (lambda _ #f)) + + (match (enqueue-load-new-guix-revision-job + conn + (git-repository-url->git-repository-id conn "test-url") + "test-commit" + "test-source") + ((id) + (process-load-new-guix-revision-job id))))))))))))) (exec-query conn "TRUNCATE guix_revisions CASCADE") (exec-query conn "TRUNCATE load_new_guix_revision_jobs CASCADE") |