aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2020-11-09 19:43:26 +0000
committerChristopher Baines <mail@cbaines.net>2020-11-09 19:43:26 +0000
commitc3db17b612dcf1043421ab6fdf87ae3f022c80fe (patch)
treef35674de0154b290dec779c80906670707ba76bc
parentdd9572c745d0b40bd6f0ca0c5b6284bb342d5da6 (diff)
downloadbuild-coordinator-c3db17b612dcf1043421ab6fdf87ae3f022c80fe.tar
build-coordinator-c3db17b612dcf1043421ab6fdf87ae3f022c80fe.tar.gz
Make hook processing a bit more efficient
Rather than polling the database every second, use some condition variables to wake threads when there's probably an event.
-rw-r--r--guix-build-coordinator/agent-messaging/http/server.scm4
-rw-r--r--guix-build-coordinator/coordinator.scm155
2 files changed, 99 insertions, 60 deletions
diff --git a/guix-build-coordinator/agent-messaging/http/server.scm b/guix-build-coordinator/agent-messaging/http/server.scm
index 6d73a9d..3c83e4c 100644
--- a/guix-build-coordinator/agent-messaging/http/server.scm
+++ b/guix-build-coordinator/agent-messaging/http/server.scm
@@ -319,7 +319,7 @@ port. Also, the port used can be changed by passing the --port option.\n"
(datastore-agent-for-build datastore uuid)))
(if (authenticated? agent-id-for-build request)
(begin
- (handle-build-start-report datastore
+ (handle-build-start-report build-coordinator
agent-id-for-build
uuid)
(render-json
@@ -333,7 +333,7 @@ port. Also, the port used can be changed by passing the --port option.\n"
(if (authenticated? agent-id-for-build request)
(begin
(handle-setup-failure-report
- datastore
+ build-coordinator
agent-id-for-build uuid
(json-string->scm (utf8->string body)))
;; Trigger build allocation, so that the allocator can handle
diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm
index e531bd6..cfc2be3 100644
--- a/guix-build-coordinator/coordinator.scm
+++ b/guix-build-coordinator/coordinator.scm
@@ -66,6 +66,7 @@
fetch-builds
agent-details
trigger-build-allocation
+ build-coordinator-prompt-hook-processing-for-event
start-hook-processing-threads
build-output-file-location
@@ -86,6 +87,8 @@
build-coordinator?
(datastore build-coordinator-datastore)
(hooks build-coordinator-hooks)
+ (hook-condvars build-coordinator-hook-condvars
+ set-build-coordinator-hook-condvars!)
(metrics-registry build-coordinator-metrics-registry)
(allocation-strategy build-coordinator-allocation-strategy)
(allocator-thread build-coordinator-allocator-thread
@@ -147,7 +150,9 @@
build-coordinator
(make-build-allocator-thread build-coordinator))
- (start-hook-processing-threads build-coordinator)
+ (set-build-coordinator-hook-condvars!
+ build-coordinator
+ (start-hook-processing-threads build-coordinator))
build-coordinator))
@@ -286,6 +291,8 @@
priority
tags)
+ (build-coordinator-prompt-hook-processing-for-event build-coordinator
+ 'build-submitted)
(trigger-build-allocation build-coordinator)
`((build-submitted . ,uuid))))))
@@ -314,6 +321,14 @@
(define (trigger-build-allocation build-coordinator)
((build-coordinator-allocator-thread build-coordinator)))
+(define (build-coordinator-prompt-hook-processing-for-event build-coordinator
+ event-name)
+ (and=> (assoc-ref (build-coordinator-hook-condvars build-coordinator)
+ event-name)
+ (lambda (condvar)
+ (signal-condition-variable condvar)
+ #t)))
+
(define (allocate-builds build-coordinator)
(define datastore
(build-coordinator-datastore build-coordinator))
@@ -406,6 +421,8 @@
trigger-build-allocation)
(define (start-hook-processing-threads build-coordinator)
+ (define wait-timeout-seconds (* 60 5))
+
(define datastore
(build-coordinator-datastore build-coordinator))
@@ -421,60 +438,68 @@
"hook_failure_total"
#:labels '(event)))
- (define (process-events event-name handler)
- (while #t
- (match (datastore-list-unprocessed-hook-events datastore event-name 1)
- (() (sleep 1))
- (((id event arguments))
- (catch
- #t
- (lambda ()
- (log-msg (build-coordinator-logger build-coordinator)
- 'DEBUG
- "processing " event " event: " arguments)
- (apply handler build-coordinator arguments)
- (log-msg (build-coordinator-logger build-coordinator)
- 'DEBUG
- event " handler finished")
- (datastore-delete-unprocessed-hook-event datastore id)
-
- ;; If this is the hook for a successful build, once the hook
- ;; completed successfully, delete the nar files for this build.
- (when (eq? 'build-success event)
- (match arguments
- ((build-id)
- (let ((data-location (build-data-location build-id)))
- (when (file-exists? data-location)
- (delete-file-recursively data-location))))))
- (metric-increment success-counter-metric
- #:label-values
- `((event . ,event))))
- (lambda (key . args)
- (log-msg (build-coordinator-logger build-coordinator)
- 'ERROR
- "error running " event " hook: " key " " args)
- (metric-increment failure-counter-metric
- #:label-values
- `((event . ,event)))
- #f))))))
-
- (for-each
+ (define (process-event event-name handler)
+ (match (datastore-list-unprocessed-hook-events datastore event-name 1)
+ (() #f)
+ (((id event arguments))
+ (catch
+ #t
+ (lambda ()
+ (log-msg (build-coordinator-logger build-coordinator)
+ 'DEBUG
+ "processing " event " event: " arguments)
+ (apply handler build-coordinator arguments)
+ (log-msg (build-coordinator-logger build-coordinator)
+ 'DEBUG
+ event " handler finished")
+ (datastore-delete-unprocessed-hook-event datastore id)
+
+ ;; If this is the hook for a successful build, once the hook
+ ;; completed successfully, delete the nar files for this build.
+ (when (eq? 'build-success event)
+ (match arguments
+ ((build-id)
+ (let ((data-location (build-data-location build-id)))
+ (when (file-exists? data-location)
+ (delete-file-recursively data-location))))))
+ (metric-increment success-counter-metric
+ #:label-values
+ `((event . ,event))))
+ (lambda (key . args)
+ (log-msg (build-coordinator-logger build-coordinator)
+ 'ERROR
+ "error running " event " hook: " key " " args)
+ (metric-increment failure-counter-metric
+ #:label-values
+ `((event . ,event)))))
+ #t)))
+
+ (map
(match-lambda
((event-name . handler)
- (parameterize (((@@ (fibers internal) current-fiber) #f))
- (call-with-new-thread
- (lambda ()
- (with-exception-handler
- (lambda (exn)
- (log-msg (build-coordinator-logger build-coordinator)
- 'CRITICAL
- "hook processing thread " event-name " exception: " exn)
- (exit 1))
- (lambda ()
- (process-events event-name handler))))))))
- (build-coordinator-hooks build-coordinator))
-
- #t)
+ (let ((mtx (make-mutex))
+ (condvar (make-condition-variable)))
+ (parameterize (((@@ (fibers internal) current-fiber) #f))
+ (call-with-new-thread
+ (lambda ()
+ (lock-mutex mtx)
+ (with-exception-handler
+ (lambda (exn)
+ (log-msg (build-coordinator-logger build-coordinator)
+ 'CRITICAL
+ "hook processing thread " event-name
+ " exception: " exn)
+ (primitive-exit 1))
+ (lambda ()
+ (while #t
+ (unless (process-event event-name handler)
+ (wait-condition-variable condvar
+ mtx
+ (+ (current-time)
+ wait-timeout-seconds
+ (random 30))))))))))
+ (cons event-name condvar))))
+ (build-coordinator-hooks build-coordinator)))
(define (fetch-builds build-coordinator agent systems count)
(call-with-duration-metric
@@ -584,18 +609,28 @@
#f)
;; TODO Check what the value of this is
(assoc-ref result-json "end_time"))
+ (build-coordinator-prompt-hook-processing-for-event
+ build-coordinator
+ (if success?
+ 'build-success
+ 'build-failure))
#t))))
-(define (handle-build-start-report datastore
+(define (handle-build-start-report build-coordinator
agent-id
build-id)
- (datastore-store-build-start datastore
+ (datastore-store-build-start (build-coordinator-datastore build-coordinator)
build-id
- agent-id))
+ agent-id)
+ (build-coordinator-prompt-hook-processing-for-event build-coordinator
+ 'build-start))
-(define (handle-setup-failure-report datastore
+(define (handle-setup-failure-report build-coordinator
agent-id build-id report-json)
+ (define datastore
+ (build-coordinator-datastore build-coordinator))
+
(let ((failure-reason (assoc-ref report-json "failure_reason")))
(if (string=? failure-reason "missing_inputs")
;; For missing inputs, we need to store the inputs that were missing,
@@ -605,7 +640,11 @@
(datastore-store-setup-failure/missing-inputs datastore
build-id
agent-id
- missing-inputs))
+ missing-inputs)
+ (build-coordinator-prompt-hook-processing-for-event
+ build-coordinator
+ 'build-missing-inputs))
+
(datastore-store-setup-failure datastore
build-id
agent-id