From 198a198130ecd3e2174f54276c1dc43e510d097c Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Fri, 10 Nov 2023 13:52:03 +0000 Subject: Try to avoid the metrics endpoint timing out As this makes it harder to debug issues. --- .../agent-messaging/http/server.scm | 40 ++++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/guix-build-coordinator/agent-messaging/http/server.scm b/guix-build-coordinator/agent-messaging/http/server.scm index e2e752c..66b8f87 100644 --- a/guix-build-coordinator/agent-messaging/http/server.scm +++ b/guix-build-coordinator/agent-messaging/http/server.scm @@ -154,11 +154,11 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f." (base-datastore-metrics-updater build-coordinator)) (define (update-managed-metrics!) - (call-with-delay-logging datastore-metrics-updater) (call-with-delay-logging gc-metrics-updater) (metric-set thread-metric (length (all-threads))) - (call-with-delay-logging port-metrics-updater)) + (call-with-delay-logging port-metrics-updater) + (call-with-delay-logging datastore-metrics-updater)) (with-exception-handler (lambda (exn) @@ -238,15 +238,7 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f." `((agent_id . ,(assq-ref agent-details 'uuid))))) (datastore-list-agents datastore))) - (lambda () - (metric-set internal-real-time - (get-internal-real-time)) - (metric-set internal-run-time - (get-internal-run-time)) - - ;; These are the db size metrics - (datastore-update-metrics! datastore) - + (define (update-agent-allocated-builds) (zero-metric-for-agents allocated-builds-total) (for-each (match-lambda ((agent-id . count) @@ -254,8 +246,9 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f." count #:label-values `((agent_id . ,agent-id))))) - (datastore-count-allocated-builds datastore)) + (datastore-count-allocated-builds datastore))) + (define (update-unprocessed-hook-events) (for-each (match-lambda ((event . _) (metric-set unprocessed-hook-events-total @@ -268,8 +261,20 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f." (assq-ref event-count 'count) #:label-values `((event . ,(assq-ref event-count 'event))))) - (datastore-count-unprocessed-hook-events datastore))))) + (datastore-count-unprocessed-hook-events datastore))) + (lambda () + (metric-set internal-real-time + (get-internal-real-time)) + (metric-set internal-run-time + (get-internal-run-time)) + + ;; These are the db size metrics + (call-with-delay-logging datastore-update-metrics! + #:args (list datastore)) + + (call-with-delay-logging update-agent-allocated-builds) + (call-with-delay-logging update-unprocessed-hook-events)))) (define-record-type (make-hash-progress build-uuid file size bytes-hashed) @@ -992,8 +997,13 @@ INTERVAL (a time-duration object), otherwise does nothing and returns #f." '(("error" . "access denied")) #:code 403)))) (('GET "metrics") - (call-with-delay-logging update-managed-metrics! - #:threshold 0.5) + (with-fibers-timeout + (lambda () + (call-with-delay-logging update-managed-metrics! + #:threshold 0.5)) + #:timeout 8 + #:on-timeout (const #f)) + (list (build-response #:code 200 #:headers '((content-type . (text/plain)) -- cgit v1.2.3