From 9dec45d2eb1563c5bc9b9b7631682e9c6c743c3e Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Fri, 14 Jul 2023 10:36:02 +0100 Subject: Make fetching metrics work even when having database problems --- guix-data-service/utils.scm | 30 ++++- guix-data-service/web/controller.scm | 221 +++++++++++++++++++++++------------ 2 files changed, 170 insertions(+), 81 deletions(-) (limited to 'guix-data-service') diff --git a/guix-data-service/utils.scm b/guix-data-service/utils.scm index 7212e9b..361d7c8 100644 --- a/guix-data-service/utils.scm +++ b/guix-data-service/utils.scm @@ -273,10 +273,32 @@ available. Return the resource once PROC has returned." pool (lambda (resource) exp ...))) -(define (resource-pool-stats pool) - (let ((reply (make-channel))) - (put-message pool `(stats ,reply)) - (get-message reply))) +(define* (resource-pool-stats pool #:key (timeout 5)) + (let ((reply (make-channel)) + (start-time (get-internal-real-time))) + (perform-operation + (choice-operation + (wrap-operation + (put-operation pool `(stats ,reply)) + (const #t)) + (wrap-operation (sleep-operation timeout) + (const #f)))) + + (let ((time-remaining + (- timeout + (/ (- (get-internal-real-time) + start-time) + internal-time-units-per-second)))) + (if (> time-remaining 0) + (let ((response + (perform-operation + (choice-operation + (get-operation reply) + (wrap-operation (sleep-operation time-remaining) + (const #f)))))) + response) + (raise-exception + (make-resource-pool-timeout-error)))))) (define (defer-to-parallel-fiber thunk) (let ((reply (make-channel))) diff --git a/guix-data-service/web/controller.scm b/guix-data-service/web/controller.scm index 580ae0e..a0b847c 100644 --- a/guix-data-service/web/controller.scm +++ b/guix-data-service/web/controller.scm @@ -208,29 +208,83 @@ (lambda () (letpar& ((metric-values - (call-with-resource-from-pool - (reserved-connection-pool) - fetch-high-level-table-size-metrics)) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching table size metrics: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + fetch-high-level-table-size-metrics)) + #:unwind? #t)) (guix-revisions-count - (call-with-resource-from-pool - (reserved-connection-pool) - count-guix-revisions)) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception counting guix revisions: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + count-guix-revisions)) + #:unwind? #t)) (pg-stat-user-tables-metrics - (call-with-resource-from-pool - (reserved-connection-pool) - fetch-pg-stat-user-tables-metrics)) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching pg_stat user table metrics: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + fetch-pg-stat-user-tables-metrics)) + #:unwind? #t)) (pg-stat-user-indexes-metrics - (call-with-resource-from-pool - (reserved-connection-pool) - fetch-pg-stat-user-indexes-metrics)) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching pg_stat user indexes metrics: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + fetch-pg-stat-user-indexes-metrics)) + #:unwind? #t)) (pg-stats-metric-values - (call-with-resource-from-pool - (reserved-connection-pool) - fetch-pg-stats-metrics)) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching pg_stats metrics: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + fetch-pg-stats-metrics)) + #:unwind? #t)) (load-new-guix-revision-job-metrics - (call-with-resource-from-pool - (reserved-connection-pool) - select-load-new-guix-revision-job-metrics))) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching load_new_guix_revision_job metrics: ~A\n" + exn) + #f) + (lambda () + (call-with-resource-from-pool + (reserved-connection-pool) + select-load-new-guix-revision-job-metrics)) + #:unwind? #t))) (for-each (match-lambda @@ -243,7 +297,16 @@ value #:label-values `((pool_name . ,name))))) - (resource-pool-stats pool)))) + (with-exception-handler + (lambda (exn) + (simple-format + (current-error-port) + "exception fetching resource pool stats: ~A\n" + exn) + '()) + (lambda () + (resource-pool-stats pool)) + #:unwind? #t)))) resource-pools) (for-each (match-lambda @@ -261,65 +324,69 @@ toast-bytes #:label-values `((name . ,name) (tablespace . ,tablespace))))) - metric-values) + (or metric-values '())) - (metric-set revisions-count-metric - guix-revisions-count) + (when guix-revisions-count + (metric-set revisions-count-metric + guix-revisions-count)) - (map (lambda (field-values) - (let ((name (assq-ref field-values 'name))) - (for-each - (match-lambda - (('name . _) #f) - ((field . value) - (let ((metric (or (assq-ref pg-stat-metrics field) - (error field)))) - (metric-set metric - value - #:label-values `((name . ,name)))))) - field-values))) - pg-stat-user-tables-metrics) - - (map (lambda (field-values) - (let ((name (assq-ref field-values 'name)) - (table-name (assq-ref field-values 'table-name)) - (tablespace (assq-ref field-values 'tablespace))) - (for-each - (match-lambda - (('name . _) #f) - (('table-name . _) #f) - (('tablespace . _) #f) - ((field . value) - (let ((metric (or (assq-ref pg-stat-indexes-metrics field) - (error field)))) - (metric-set metric - value - #:label-values - `((name . ,name) - (table . ,table-name) - ,@(if (eq? field 'bytes) - `((tablespace . ,tablespace)) - '())))))) - field-values))) - pg-stat-user-indexes-metrics) - - (map (lambda (field-values) - (let ((table (assq-ref field-values 'table-name)) - (column (assq-ref field-values 'column-name))) - (for-each - (match-lambda - (('table-name . _) #f) - (('column-name . _) #f) - ((_ . #f) #f) - ((field . value) - (let ((metric (or (assq-ref pg-stats-metrics field) - (error field)))) - (metric-set metric - value - #:label-values `((table . ,table) - (column . ,column)))))) - field-values))) - pg-stats-metric-values) + (for-each + (lambda (field-values) + (let ((name (assq-ref field-values 'name))) + (for-each + (match-lambda + (('name . _) #f) + ((field . value) + (let ((metric (or (assq-ref pg-stat-metrics field) + (error field)))) + (metric-set metric + value + #:label-values `((name . ,name)))))) + field-values))) + (or pg-stat-user-tables-metrics '())) + + (for-each + (lambda (field-values) + (let ((name (assq-ref field-values 'name)) + (table-name (assq-ref field-values 'table-name)) + (tablespace (assq-ref field-values 'tablespace))) + (for-each + (match-lambda + (('name . _) #f) + (('table-name . _) #f) + (('tablespace . _) #f) + ((field . value) + (let ((metric (or (assq-ref pg-stat-indexes-metrics field) + (error field)))) + (metric-set metric + value + #:label-values + `((name . ,name) + (table . ,table-name) + ,@(if (eq? field 'bytes) + `((tablespace . ,tablespace)) + '())))))) + field-values))) + (or pg-stat-user-indexes-metrics '())) + + (for-each + (lambda (field-values) + (let ((table (assq-ref field-values 'table-name)) + (column (assq-ref field-values 'column-name))) + (for-each + (match-lambda + (('table-name . _) #f) + (('column-name . _) #f) + ((_ . #f) #f) + ((field . value) + (let ((metric (or (assq-ref pg-stats-metrics field) + (error field)))) + (metric-set metric + value + #:label-values `((table . ,table) + (column . ,column)))))) + field-values))) + (or pg-stats-metric-values '())) (for-each (match-lambda ((repository-label state count) @@ -329,7 +396,7 @@ #:label-values `((repository_label . ,repository-label) (state . ,state))))) - load-new-guix-revision-job-metrics) + (or load-new-guix-revision-job-metrics '())) (gc-metrics-updater) (guile-time-metrics-updater) -- cgit v1.2.3