aboutsummaryrefslogtreecommitdiff
path: root/guix-build-coordinator/datastore
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2020-12-02 19:02:50 +0000
committerChristopher Baines <mail@cbaines.net>2020-12-02 19:02:50 +0000
commit6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3 (patch)
tree3bff19953bb91cfe3d0540be62689253ad57b2da /guix-build-coordinator/datastore
parent806a3551c2e137e4b4d042cf7c5dd1fe1408df9a (diff)
downloadbuild-coordinator-6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3.tar
build-coordinator-6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3.tar.gz
Make WAL checkpointing more reliable
I've seen this fail with a "database table is locked" error from SQLite, this should keep the fiber running even if this happens.
Diffstat (limited to 'guix-build-coordinator/datastore')
-rw-r--r--guix-build-coordinator/datastore/sqlite.scm27
1 files changed, 19 insertions, 8 deletions
diff --git a/guix-build-coordinator/datastore/sqlite.scm b/guix-build-coordinator/datastore/sqlite.scm
index 65dcba9..fdedddc 100644
--- a/guix-build-coordinator/datastore/sqlite.scm
+++ b/guix-build-coordinator/datastore/sqlite.scm
@@ -172,14 +172,25 @@
(lambda ()
(while #t
(sleep (* 60 15)) ; 15 minutes
- (call-with-worker-thread
- (slot-ref datastore 'worker-writer-thread-channel)
- (lambda (db)
- (call-with-duration-metric
- metrics-registry
- checkpoint-duration-metric-name
- (lambda ()
- (sqlite-exec db "PRAGMA wal_checkpoint(RESTART);")))))))
+ (with-exception-handler
+ (lambda (exn)
+ (simple-format (current-error-port)
+ "exception when performing WAL checkpoint: ~A\n"
+ exn))
+ (lambda ()
+ (retry-on-error
+ (lambda ()
+ (call-with-worker-thread
+ (slot-ref datastore 'worker-writer-thread-channel)
+ (lambda (db)
+ (call-with-duration-metric
+ metrics-registry
+ checkpoint-duration-metric-name
+ (lambda ()
+ (sqlite-exec db "PRAGMA wal_checkpoint(RESTART);"))))))
+ #:times 2
+ #:delay 3))
+ #:unwind? #t)))
#:parallel? #t)))
(define-method (datastore-update-metrics!