From c3db17b612dcf1043421ab6fdf87ae3f022c80fe Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Mon, 9 Nov 2020 19:43:26 +0000 Subject: Make hook processing a bit more efficient Rather than polling the database every second, use some condition variables to wake threads when there's probably an event. --- guix-build-coordinator/coordinator.scm | 155 +++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 58 deletions(-) (limited to 'guix-build-coordinator/coordinator.scm') diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm index e531bd6..cfc2be3 100644 --- a/guix-build-coordinator/coordinator.scm +++ b/guix-build-coordinator/coordinator.scm @@ -66,6 +66,7 @@ fetch-builds agent-details trigger-build-allocation + build-coordinator-prompt-hook-processing-for-event start-hook-processing-threads build-output-file-location @@ -86,6 +87,8 @@ build-coordinator? (datastore build-coordinator-datastore) (hooks build-coordinator-hooks) + (hook-condvars build-coordinator-hook-condvars + set-build-coordinator-hook-condvars!) (metrics-registry build-coordinator-metrics-registry) (allocation-strategy build-coordinator-allocation-strategy) (allocator-thread build-coordinator-allocator-thread @@ -147,7 +150,9 @@ build-coordinator (make-build-allocator-thread build-coordinator)) - (start-hook-processing-threads build-coordinator) + (set-build-coordinator-hook-condvars! + build-coordinator + (start-hook-processing-threads build-coordinator)) build-coordinator)) @@ -286,6 +291,8 @@ priority tags) + (build-coordinator-prompt-hook-processing-for-event build-coordinator + 'build-submitted) (trigger-build-allocation build-coordinator) `((build-submitted . ,uuid)))))) @@ -314,6 +321,14 @@ (define (trigger-build-allocation build-coordinator) ((build-coordinator-allocator-thread build-coordinator))) +(define (build-coordinator-prompt-hook-processing-for-event build-coordinator + event-name) + (and=> (assoc-ref (build-coordinator-hook-condvars build-coordinator) + event-name) + (lambda (condvar) + (signal-condition-variable condvar) + #t))) + (define (allocate-builds build-coordinator) (define datastore (build-coordinator-datastore build-coordinator)) @@ -406,6 +421,8 @@ trigger-build-allocation) (define (start-hook-processing-threads build-coordinator) + (define wait-timeout-seconds (* 60 5)) + (define datastore (build-coordinator-datastore build-coordinator)) @@ -421,60 +438,68 @@ "hook_failure_total" #:labels '(event))) - (define (process-events event-name handler) - (while #t - (match (datastore-list-unprocessed-hook-events datastore event-name 1) - (() (sleep 1)) - (((id event arguments)) - (catch - #t - (lambda () - (log-msg (build-coordinator-logger build-coordinator) - 'DEBUG - "processing " event " event: " arguments) - (apply handler build-coordinator arguments) - (log-msg (build-coordinator-logger build-coordinator) - 'DEBUG - event " handler finished") - (datastore-delete-unprocessed-hook-event datastore id) - - ;; If this is the hook for a successful build, once the hook - ;; completed successfully, delete the nar files for this build. - (when (eq? 'build-success event) - (match arguments - ((build-id) - (let ((data-location (build-data-location build-id))) - (when (file-exists? data-location) - (delete-file-recursively data-location)))))) - (metric-increment success-counter-metric - #:label-values - `((event . ,event)))) - (lambda (key . args) - (log-msg (build-coordinator-logger build-coordinator) - 'ERROR - "error running " event " hook: " key " " args) - (metric-increment failure-counter-metric - #:label-values - `((event . ,event))) - #f)))))) - - (for-each + (define (process-event event-name handler) + (match (datastore-list-unprocessed-hook-events datastore event-name 1) + (() #f) + (((id event arguments)) + (catch + #t + (lambda () + (log-msg (build-coordinator-logger build-coordinator) + 'DEBUG + "processing " event " event: " arguments) + (apply handler build-coordinator arguments) + (log-msg (build-coordinator-logger build-coordinator) + 'DEBUG + event " handler finished") + (datastore-delete-unprocessed-hook-event datastore id) + + ;; If this is the hook for a successful build, once the hook + ;; completed successfully, delete the nar files for this build. + (when (eq? 'build-success event) + (match arguments + ((build-id) + (let ((data-location (build-data-location build-id))) + (when (file-exists? data-location) + (delete-file-recursively data-location)))))) + (metric-increment success-counter-metric + #:label-values + `((event . ,event)))) + (lambda (key . args) + (log-msg (build-coordinator-logger build-coordinator) + 'ERROR + "error running " event " hook: " key " " args) + (metric-increment failure-counter-metric + #:label-values + `((event . ,event))))) + #t))) + + (map (match-lambda ((event-name . handler) - (parameterize (((@@ (fibers internal) current-fiber) #f)) - (call-with-new-thread - (lambda () - (with-exception-handler - (lambda (exn) - (log-msg (build-coordinator-logger build-coordinator) - 'CRITICAL - "hook processing thread " event-name " exception: " exn) - (exit 1)) - (lambda () - (process-events event-name handler)))))))) - (build-coordinator-hooks build-coordinator)) - - #t) + (let ((mtx (make-mutex)) + (condvar (make-condition-variable))) + (parameterize (((@@ (fibers internal) current-fiber) #f)) + (call-with-new-thread + (lambda () + (lock-mutex mtx) + (with-exception-handler + (lambda (exn) + (log-msg (build-coordinator-logger build-coordinator) + 'CRITICAL + "hook processing thread " event-name + " exception: " exn) + (primitive-exit 1)) + (lambda () + (while #t + (unless (process-event event-name handler) + (wait-condition-variable condvar + mtx + (+ (current-time) + wait-timeout-seconds + (random 30)))))))))) + (cons event-name condvar)))) + (build-coordinator-hooks build-coordinator))) (define (fetch-builds build-coordinator agent systems count) (call-with-duration-metric @@ -584,18 +609,28 @@ #f) ;; TODO Check what the value of this is (assoc-ref result-json "end_time")) + (build-coordinator-prompt-hook-processing-for-event + build-coordinator + (if success? + 'build-success + 'build-failure)) #t)))) -(define (handle-build-start-report datastore +(define (handle-build-start-report build-coordinator agent-id build-id) - (datastore-store-build-start datastore + (datastore-store-build-start (build-coordinator-datastore build-coordinator) build-id - agent-id)) + agent-id) + (build-coordinator-prompt-hook-processing-for-event build-coordinator + 'build-start)) -(define (handle-setup-failure-report datastore +(define (handle-setup-failure-report build-coordinator agent-id build-id report-json) + (define datastore + (build-coordinator-datastore build-coordinator)) + (let ((failure-reason (assoc-ref report-json "failure_reason"))) (if (string=? failure-reason "missing_inputs") ;; For missing inputs, we need to store the inputs that were missing, @@ -605,7 +640,11 @@ (datastore-store-setup-failure/missing-inputs datastore build-id agent-id - missing-inputs)) + missing-inputs) + (build-coordinator-prompt-hook-processing-for-event + build-coordinator + 'build-missing-inputs)) + (datastore-store-setup-failure datastore build-id agent-id -- cgit v1.2.3