From 6e7e63f356cb88e3e5fe1a55a0390a1366205c9c Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Thu, 25 Mar 2021 21:29:47 +0000 Subject: Add Guile GC related metrics I'm seeing mmap(PROT_NONE) failed crashes, and maybe these metrics will help in understanding what's going on. --- .../agent-messaging/http/server.scm | 18 +++++++++---- guix-build-coordinator/utils.scm | 31 +++++++++++++++++++++- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/guix-build-coordinator/agent-messaging/http/server.scm b/guix-build-coordinator/agent-messaging/http/server.scm index da602ea..552cf85 100644 --- a/guix-build-coordinator/agent-messaging/http/server.scm +++ b/guix-build-coordinator/agent-messaging/http/server.scm @@ -71,8 +71,16 @@ if there was no request body." (define (http-agent-messaging-start-server port host secret-key-base build-coordinator chunked-request-channel) - (define update-base-datastore-metrics! - (base-datastore-metrics-updater build-coordinator)) + (define update-managed-metrics! + (let ((datastore-metrics-updater + (base-datastore-metrics-updater build-coordinator)) + (gc-metrics-updater + (get-gc-metrics-updater + (build-coordinator-metrics-registry build-coordinator)))) + (lambda () + (datastore-metrics-updater) + (gc-metrics-updater)))) + (call-with-error-handling (lambda () @@ -92,7 +100,7 @@ if there was no request body." secret-key-base build-coordinator chunked-request-channel - update-base-datastore-metrics!))) + update-managed-metrics!))) #:host host #:port port)) #:on-error 'backtrace @@ -198,7 +206,7 @@ port. Also, the port used can be changed by passing the --port option.\n" secret-key-base build-coordinator chunked-request-channel - update-base-datastore-metrics!) + update-managed-metrics!) (define (authenticated? uuid request) (let* ((authorization-base64 (match (assq-ref (request-headers request) @@ -435,7 +443,7 @@ port. Also, the port used can be changed by passing the --port option.\n" '(("error" . "access denied")) #:code 403)))) (('GET "metrics") - (update-base-datastore-metrics!) + (update-managed-metrics!) (list (build-response #:code 200 #:headers '((content-type . (text/plain)) diff --git a/guix-build-coordinator/utils.scm b/guix-build-coordinator/utils.scm index fea9c1c..ee1fea3 100644 --- a/guix-build-coordinator/utils.scm +++ b/guix-build-coordinator/utils.scm @@ -23,6 +23,7 @@ #:use-module (gcrypt hash) #:use-module (gcrypt random) #:use-module (json) + #:use-module (prometheus) #:use-module (guix pki) #:use-module (guix utils) #:use-module (guix config) @@ -59,7 +60,9 @@ get-load-average - running-on-the-hurd?)) + running-on-the-hurd? + + get-gc-metrics-updater)) (eval-when (eval load compile) (begin @@ -838,3 +841,29 @@ again." (unless cached-system (set! cached-system (utsname:sysname (uname)))) (string=? cached-system "GNU"))) + +(define (get-gc-metrics-updater registry) + (define metrics + `((gc-time-taken + . ,(make-gauge-metric registry "guile_gc_time_taken")) + (heap-size + . ,(make-gauge-metric registry "guile_heap_size")) + (heap-free-size + . ,(make-gauge-metric registry "guile_heap_free_size")) + (heap-total-allocated + . ,(make-gauge-metric registry "guile_heap_total_allocated")) + (heap-allocated-since-gc + . ,(make-gauge-metric registry "guile_allocated_since_gc")) + (protected-objects + . ,(make-gauge-metric registry "guile_gc_protected_objects")) + (gc-times + . ,(make-gauge-metric registry "guile_gc_times")))) + + (lambda () + (let ((stats (gc-stats))) + (for-each + (match-lambda + ((name . metric) + (let ((value (assq-ref stats name))) + (metric-set metric value)))) + metrics)))) -- cgit v1.2.3