diff options
author | Christopher Baines <mail@cbaines.net> | 2023-07-21 21:03:32 +0100 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2023-07-25 17:57:00 +0100 |
commit | bbc53deb1f42852804339a3b7c5e79cd272f9f7c (patch) | |
tree | 3be1b8f601270fb01a46a50ab01a22c89355412d /guix-data-service | |
parent | 348fe36b558ac42d9a63c13cb95ea708c76123cd (diff) | |
download | data-service-bbc53deb1f42852804339a3b7c5e79cd272f9f7c.tar data-service-bbc53deb1f42852804339a3b7c5e79cd272f9f7c.tar.gz |
Rewrite deleting unreferenced derivations
Use fibers more, leaning in on the non-blocking use of Squee for parallelism.
Diffstat (limited to 'guix-data-service')
-rw-r--r-- | guix-data-service/data-deletion.scm | 150 |
1 files changed, 81 insertions, 69 deletions
diff --git a/guix-data-service/data-deletion.scm b/guix-data-service/data-deletion.scm index 241b899..b48f78c 100644 --- a/guix-data-service/data-deletion.scm +++ b/guix-data-service/data-deletion.scm @@ -22,6 +22,7 @@ #:use-module (ice-9 threads) #:use-module (squee) #:use-module (fibers) + #:use-module (fibers channels) #:use-module (guix-data-service utils) #:use-module (guix-data-service database) #:use-module (guix-data-service model git-branch) @@ -538,7 +539,10 @@ DELETE FROM derivations WHERE id = $1" 1))) - (define (delete-batch conn connection-pool) + (define deleted-count 0) + (define channel (make-channel)) + + (define (delete-batch conn) (let* ((derivations (with-time-logging "fetching batch of derivations" (map car @@ -566,77 +570,85 @@ WHERE NOT EXISTS ( ) LIMIT $1" (list (number->string batch-size)))))) (derivations-count (length derivations))) - (let ((deleted-count 0)) - (with-time-logging - (simple-format #f - "Looking at ~A derivations" - derivations-count) - (n-par-for-each - 8 - (lambda (derivation-id) - (unless (string->number derivation-id) - (error - (simple-format #f "derivation-id: ~A is not a number" - derivation-id))) - - (let ((val - (call-with-resource-from-pool connection-pool - (lambda (conn) - (catch 'psql-query-error - (lambda () - (with-postgresql-transaction - conn - (lambda (conn) - (exec-query - conn - " -SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED") - (exec-query conn "SET LOCAL lock_timeout = '5s';") + (with-time-logging + (simple-format #f "Looking at ~A derivations" derivations-count) - (maybe-delete-derivation conn - derivation-id)))) - (lambda (key . args) - (simple-format - (current-error-port) - "error when attempting to delete derivation: ~A ~A\n" - key args) + (set! deleted-count 0) + (for-each + (lambda (derivation-id) + (put-message channel derivation-id)) + derivations)) - 0)))))) - (monitor - (set! deleted-count - (+ val deleted-count))))) - derivations)) - - (simple-format (current-error-port) - "Deleted ~A derivations\n" - deleted-count) - deleted-count))) + (simple-format (current-error-port) + "Deleted ~A derivations\n" + deleted-count) + deleted-count)) (run-fibers (lambda () - (let* ((connection-pool - (make-resource-pool - (lambda () - (open-postgresql-connection "data-deletion" #f)) - 8))) - - (with-postgresql-connection - "data-deletion" - (lambda (conn) - (obtain-advisory-transaction-lock - conn - 'delete-unreferenced-derivations) - - (let loop ((total-deleted 0)) - (let ((batch-deleted-count (delete-batch conn connection-pool))) - (if (eq? 0 batch-deleted-count) - (begin - (with-time-logging - "Deleting unused derivation_source_files entries" - (delete-unreferenced-derivations-source-files conn)) - (simple-format - (current-output-port) - "Finished deleting derivations, deleted ~A in total\n" - total-deleted)) - (loop (+ total-deleted batch-deleted-count))))))))))) + ;; First spawn some fibers to delete the derivations + (for-each + (lambda _ + (spawn-fiber + (lambda () + (with-postgresql-connection + "data-deletion" + (lambda (conn) + (let loop ((derivation-id (get-message channel))) + (unless (string->number derivation-id) + (error + (simple-format #f "derivation-id: ~A is not a number" + derivation-id))) + + (let ((val + (catch 'psql-query-error + (lambda () + (with-postgresql-transaction + conn + (lambda (conn) + (exec-query + conn + " +SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED") + + (exec-query conn "SET LOCAL lock_timeout = '5s';") + + (maybe-delete-derivation conn + derivation-id)))) + (lambda (key . args) + (simple-format + (current-error-port) + "error when attempting to delete derivation: ~A ~A\n" + key args) + + 0)))) + + ;; This is safe as all fibers are in the same + ;; thread and cooperative. + (set! deleted-count + (+ val deleted-count))) + (loop (get-message channel)))))))) + (iota 12)) + + (with-postgresql-connection + "data-deletion" + (lambda (conn) + (obtain-advisory-transaction-lock + conn + 'delete-unreferenced-derivations) + + (let loop ((total-deleted 0)) + (let ((batch-deleted-count (delete-batch conn))) + (if (eq? 0 batch-deleted-count) + (begin + (with-time-logging + "Deleting unused derivation_source_files entries" + (delete-unreferenced-derivations-source-files conn)) + (simple-format + (current-output-port) + "Finished deleting derivations, deleted ~A in total\n" + total-deleted)) + (loop (+ total-deleted batch-deleted-count)))))))) + #:hz 0 + #:parallelism 1)) |