aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2023-07-21 21:03:32 +0100
committerChristopher Baines <mail@cbaines.net>2023-07-25 17:57:00 +0100
commitbbc53deb1f42852804339a3b7c5e79cd272f9f7c (patch)
tree3be1b8f601270fb01a46a50ab01a22c89355412d
parent348fe36b558ac42d9a63c13cb95ea708c76123cd (diff)
downloaddata-service-bbc53deb1f42852804339a3b7c5e79cd272f9f7c.tar
data-service-bbc53deb1f42852804339a3b7c5e79cd272f9f7c.tar.gz
Rewrite deleting unreferenced derivations
Use fibers more, leaning in on the non-blocking use of Squee for parallelism.
-rw-r--r--guix-data-service/data-deletion.scm150
1 files changed, 81 insertions, 69 deletions
diff --git a/guix-data-service/data-deletion.scm b/guix-data-service/data-deletion.scm
index 241b899..b48f78c 100644
--- a/guix-data-service/data-deletion.scm
+++ b/guix-data-service/data-deletion.scm
@@ -22,6 +22,7 @@
#:use-module (ice-9 threads)
#:use-module (squee)
#:use-module (fibers)
+ #:use-module (fibers channels)
#:use-module (guix-data-service utils)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model git-branch)
@@ -538,7 +539,10 @@ DELETE FROM derivations WHERE id = $1"
1)))
- (define (delete-batch conn connection-pool)
+ (define deleted-count 0)
+ (define channel (make-channel))
+
+ (define (delete-batch conn)
(let* ((derivations
(with-time-logging "fetching batch of derivations"
(map car
@@ -566,77 +570,85 @@ WHERE NOT EXISTS (
) LIMIT $1"
(list (number->string batch-size))))))
(derivations-count (length derivations)))
- (let ((deleted-count 0))
- (with-time-logging
- (simple-format #f
- "Looking at ~A derivations"
- derivations-count)
- (n-par-for-each
- 8
- (lambda (derivation-id)
- (unless (string->number derivation-id)
- (error
- (simple-format #f "derivation-id: ~A is not a number"
- derivation-id)))
-
- (let ((val
- (call-with-resource-from-pool connection-pool
- (lambda (conn)
- (catch 'psql-query-error
- (lambda ()
- (with-postgresql-transaction
- conn
- (lambda (conn)
- (exec-query
- conn
- "
-SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED")
- (exec-query conn "SET LOCAL lock_timeout = '5s';")
+ (with-time-logging
+ (simple-format #f "Looking at ~A derivations" derivations-count)
- (maybe-delete-derivation conn
- derivation-id))))
- (lambda (key . args)
- (simple-format
- (current-error-port)
- "error when attempting to delete derivation: ~A ~A\n"
- key args)
+ (set! deleted-count 0)
+ (for-each
+ (lambda (derivation-id)
+ (put-message channel derivation-id))
+ derivations))
- 0))))))
- (monitor
- (set! deleted-count
- (+ val deleted-count)))))
- derivations))
-
- (simple-format (current-error-port)
- "Deleted ~A derivations\n"
- deleted-count)
- deleted-count)))
+ (simple-format (current-error-port)
+ "Deleted ~A derivations\n"
+ deleted-count)
+ deleted-count))
(run-fibers
(lambda ()
- (let* ((connection-pool
- (make-resource-pool
- (lambda ()
- (open-postgresql-connection "data-deletion" #f))
- 8)))
-
- (with-postgresql-connection
- "data-deletion"
- (lambda (conn)
- (obtain-advisory-transaction-lock
- conn
- 'delete-unreferenced-derivations)
-
- (let loop ((total-deleted 0))
- (let ((batch-deleted-count (delete-batch conn connection-pool)))
- (if (eq? 0 batch-deleted-count)
- (begin
- (with-time-logging
- "Deleting unused derivation_source_files entries"
- (delete-unreferenced-derivations-source-files conn))
- (simple-format
- (current-output-port)
- "Finished deleting derivations, deleted ~A in total\n"
- total-deleted))
- (loop (+ total-deleted batch-deleted-count)))))))))))
+ ;; First spawn some fibers to delete the derivations
+ (for-each
+ (lambda _
+ (spawn-fiber
+ (lambda ()
+ (with-postgresql-connection
+ "data-deletion"
+ (lambda (conn)
+ (let loop ((derivation-id (get-message channel)))
+ (unless (string->number derivation-id)
+ (error
+ (simple-format #f "derivation-id: ~A is not a number"
+ derivation-id)))
+
+ (let ((val
+ (catch 'psql-query-error
+ (lambda ()
+ (with-postgresql-transaction
+ conn
+ (lambda (conn)
+ (exec-query
+ conn
+ "
+SET CONSTRAINTS derivations_by_output_details_set_derivation_id_fkey DEFERRED")
+
+ (exec-query conn "SET LOCAL lock_timeout = '5s';")
+
+ (maybe-delete-derivation conn
+ derivation-id))))
+ (lambda (key . args)
+ (simple-format
+ (current-error-port)
+ "error when attempting to delete derivation: ~A ~A\n"
+ key args)
+
+ 0))))
+
+ ;; This is safe as all fibers are in the same
+ ;; thread and cooperative.
+ (set! deleted-count
+ (+ val deleted-count)))
+ (loop (get-message channel))))))))
+ (iota 12))
+
+ (with-postgresql-connection
+ "data-deletion"
+ (lambda (conn)
+ (obtain-advisory-transaction-lock
+ conn
+ 'delete-unreferenced-derivations)
+
+ (let loop ((total-deleted 0))
+ (let ((batch-deleted-count (delete-batch conn)))
+ (if (eq? 0 batch-deleted-count)
+ (begin
+ (with-time-logging
+ "Deleting unused derivation_source_files entries"
+ (delete-unreferenced-derivations-source-files conn))
+ (simple-format
+ (current-output-port)
+ "Finished deleting derivations, deleted ~A in total\n"
+ total-deleted))
+ (loop (+ total-deleted batch-deleted-count))))))))
+ #:hz 0
+ #:parallelism 1))