diff options
author | Christopher Baines <mail@cbaines.net> | 2020-12-02 19:02:50 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2020-12-02 19:02:50 +0000 |
commit | 6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3 (patch) | |
tree | 3bff19953bb91cfe3d0540be62689253ad57b2da /guix-build-coordinator/datastore | |
parent | 806a3551c2e137e4b4d042cf7c5dd1fe1408df9a (diff) | |
download | build-coordinator-6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3.tar build-coordinator-6aa59c026dcc9544c82ebc9e0df1a597ecf13fa3.tar.gz |
Make WAL checkpointing more reliable
I've seen this fail with a "database table is locked" error from SQLite, this
should keep the fiber running even if this happens.
Diffstat (limited to 'guix-build-coordinator/datastore')
-rw-r--r-- | guix-build-coordinator/datastore/sqlite.scm | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/guix-build-coordinator/datastore/sqlite.scm b/guix-build-coordinator/datastore/sqlite.scm index 65dcba9..fdedddc 100644 --- a/guix-build-coordinator/datastore/sqlite.scm +++ b/guix-build-coordinator/datastore/sqlite.scm @@ -172,14 +172,25 @@ (lambda () (while #t (sleep (* 60 15)) ; 15 minutes - (call-with-worker-thread - (slot-ref datastore 'worker-writer-thread-channel) - (lambda (db) - (call-with-duration-metric - metrics-registry - checkpoint-duration-metric-name - (lambda () - (sqlite-exec db "PRAGMA wal_checkpoint(RESTART);"))))))) + (with-exception-handler + (lambda (exn) + (simple-format (current-error-port) + "exception when performing WAL checkpoint: ~A\n" + exn)) + (lambda () + (retry-on-error + (lambda () + (call-with-worker-thread + (slot-ref datastore 'worker-writer-thread-channel) + (lambda (db) + (call-with-duration-metric + metrics-registry + checkpoint-duration-metric-name + (lambda () + (sqlite-exec db "PRAGMA wal_checkpoint(RESTART);")))))) + #:times 2 + #:delay 3)) + #:unwind? #t))) #:parallel? #t))) (define-method (datastore-update-metrics! |