aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2023-11-10 13:52:03 +0000
committerChristopher Baines <mail@cbaines.net>2023-11-10 13:52:03 +0000
commit198a198130ecd3e2174f54276c1dc43e510d097c (patch)
tree16f7a9e97284fb99770400aa7a55897b5dd6a013
parent67c8ede1c53810fdd47039d9796920ef07b60849 (diff)
downloadbuild-coordinator-198a198130ecd3e2174f54276c1dc43e510d097c.tar
build-coordinator-198a198130ecd3e2174f54276c1dc43e510d097c.tar.gz
Try to avoid the metrics endpoint timing out
As this makes it harder to debug issues.
-rw-r--r--guix-build-coordinator/agent-messaging/http/server.scm40
1 files changed, 25 insertions, 15 deletions
diff --git a/guix-build-coordinator/agent-messaging/http/server.scm b/guix-build-coordinator/agent-messaging/http/server.scm
index e2e752c..66b8f87 100644
--- a/guix-build-coordinator/agent-messaging/http/server.scm
+++ b/guix-build-coordinator/agent-messaging/http/server.scm
@@ -154,11 +154,11 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f."
(base-datastore-metrics-updater build-coordinator))
(define (update-managed-metrics!)
- (call-with-delay-logging datastore-metrics-updater)
(call-with-delay-logging gc-metrics-updater)
(metric-set thread-metric
(length (all-threads)))
- (call-with-delay-logging port-metrics-updater))
+ (call-with-delay-logging port-metrics-updater)
+ (call-with-delay-logging datastore-metrics-updater))
(with-exception-handler
(lambda (exn)
@@ -238,15 +238,7 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f."
`((agent_id . ,(assq-ref agent-details 'uuid)))))
(datastore-list-agents datastore)))
- (lambda ()
- (metric-set internal-real-time
- (get-internal-real-time))
- (metric-set internal-run-time
- (get-internal-run-time))
-
- ;; These are the db size metrics
- (datastore-update-metrics! datastore)
-
+ (define (update-agent-allocated-builds)
(zero-metric-for-agents allocated-builds-total)
(for-each (match-lambda
((agent-id . count)
@@ -254,8 +246,9 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f."
count
#:label-values
`((agent_id . ,agent-id)))))
- (datastore-count-allocated-builds datastore))
+ (datastore-count-allocated-builds datastore)))
+ (define (update-unprocessed-hook-events)
(for-each (match-lambda
((event . _)
(metric-set unprocessed-hook-events-total
@@ -268,8 +261,20 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f."
(assq-ref event-count 'count)
#:label-values
`((event . ,(assq-ref event-count 'event)))))
- (datastore-count-unprocessed-hook-events datastore)))))
+ (datastore-count-unprocessed-hook-events datastore)))
+ (lambda ()
+ (metric-set internal-real-time
+ (get-internal-real-time))
+ (metric-set internal-run-time
+ (get-internal-run-time))
+
+ ;; These are the db size metrics
+ (call-with-delay-logging datastore-update-metrics!
+ #:args (list datastore))
+
+ (call-with-delay-logging update-agent-allocated-builds)
+ (call-with-delay-logging update-unprocessed-hook-events))))
(define-record-type <hash-progress>
(make-hash-progress build-uuid file size bytes-hashed)
@@ -992,8 +997,13 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f."
'(("error" . "access denied"))
#:code 403))))
(('GET "metrics")
- (call-with-delay-logging update-managed-metrics!
- #:threshold 0.5)
+ (with-fibers-timeout
+ (lambda ()
+ (call-with-delay-logging update-managed-metrics!
+ #:threshold 0.5))
+ #:timeout 8
+ #:on-timeout (const #f))
+
(list (build-response
#:code 200
#:headers '((content-type . (text/plain))