diff options
Diffstat (limited to 'guix-build-coordinator/coordinator.scm')
-rw-r--r-- | guix-build-coordinator/coordinator.scm | 282 |
1 files changed, 225 insertions, 57 deletions
diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm index 42d5600..834c44b 100644 --- a/guix-build-coordinator/coordinator.scm +++ b/guix-build-coordinator/coordinator.scm @@ -25,6 +25,7 @@ #:use-module (srfi srfi-11) #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) + #:use-module (srfi srfi-43) #:use-module (srfi srfi-71) #:use-module (ice-9 ftw) #:use-module (ice-9 vlist) @@ -98,6 +99,10 @@ build-coordinator-prompt-hook-processing-for-event start-hook-processing-threads + build-coordinator-allocation-plan-stats + build-coordinator-trigger-build-allocation + build-coordinator-list-allocation-plan-builds + build-output-file-location build-log-file-destination build-log-file-location @@ -117,7 +122,8 @@ (define-record-type <build-coordinator> (make-build-coordinator-record datastore hooks metrics-registry - allocation-strategy logger) + allocation-strategy allocator-channel + logger) build-coordinator? (datastore build-coordinator-datastore) (hooks build-coordinator-hooks) @@ -125,8 +131,10 @@ set-build-coordinator-hook-condvars!) (metrics-registry build-coordinator-metrics-registry) (allocation-strategy build-coordinator-allocation-strategy) - (allocator-thread build-coordinator-allocator-thread - set-build-coordinator-allocator-thread!) + (trigger-build-allocation + build-coordinator-trigger-build-allocation + set-build-coordinator-trigger-build-allocation!) + (allocator-channel build-coordinator-allocator-channel) (logger build-coordinator-logger) (events-channel build-coordinator-events-channel set-build-coordinator-events-channel!) @@ -411,6 +419,7 @@ hooks metrics-registry allocation-strategy + (make-channel) lgr))) (add-handler! lgr port-log) @@ -461,7 +470,7 @@ (when update-datastore? (datastore-update (build-coordinator-datastore build-coordinator))) - (set-build-coordinator-allocator-thread! + (set-build-coordinator-trigger-build-allocation! build-coordinator (make-build-allocator-thread build-coordinator)) @@ -553,6 +562,8 @@ (spawn-fiber-to-watch-for-deferred-builds build-coordinator) + (spawn-build-allocation-plan-management-fiber build-coordinator) + (set-build-coordinator-scheduler! build-coordinator (current-scheduler)) @@ -864,7 +875,9 @@ (make-transaction-rollback-exception 'skipped-as-build-required-by-another))) - (datastore-remove-build-from-allocation-plan datastore uuid) + (build-coordinator-remove-build-from-allocation-plan + build-coordinator + uuid) (datastore-cancel-build datastore uuid) (datastore-insert-unprocessed-hook-event datastore "build-canceled" @@ -1002,7 +1015,7 @@ (processor_count . ,processor-count)))) (define (trigger-build-allocation build-coordinator) - ((build-coordinator-allocator-thread build-coordinator))) + ((build-coordinator-trigger-build-allocation build-coordinator))) (define (build-coordinator-prompt-hook-processing-for-event build-coordinator event-name) @@ -1023,16 +1036,20 @@ (allocator-proc datastore #:metrics-registry (build-coordinator-metrics-registry build-coordinator))))) - (datastore-replace-build-allocation-plan datastore new-plan) - - (let ((build-count-per-agent - (datastore-count-build-allocation-plan-entries - datastore))) - (build-coordinator-send-event - build-coordinator - "allocation-plan-update" - `((allocation_plan_counts . ,build-count-per-agent))))) - #t) + (build-coordinator-replace-build-allocation-plan + build-coordinator + new-plan) + + (build-coordinator-send-event + build-coordinator + "allocation-plan-update" + `((allocation_plan_counts + . ,(map + (match-lambda + ((agent-id . builds) + (cons agent-id (length builds)))) + new-plan)))) + #t)) (define (make-build-allocator-thread build-coordinator) (define mtx (make-mutex)) @@ -1104,31 +1121,187 @@ exn) (exit 1)) (lambda () - (let ((build-allocation-plan-total - (make-gauge-metric - (build-coordinator-metrics-registry build-coordinator) - "build_allocation_plan_total" - #:labels '(agent_id)))) - (with-time-logging - "allocator initialise metrics" - (for-each (match-lambda - ((agent-id . count) - (metric-set build-allocation-plan-total - count - #:label-values - `((agent_id . ,agent-id))))) - (datastore-count-build-allocation-plan-entries - (build-coordinator-datastore build-coordinator))))) - (update-build-allocation-plan-loop))))) trigger-build-allocation) +(define (spawn-build-allocation-plan-management-fiber coordinator) + (define allocation-plan '()) + + (define allocation-plan-metric + (make-gauge-metric + (build-coordinator-metrics-registry coordinator) + "build_allocation_plan_total" + #:labels '(agent_id))) + + (define (update-build-allocation-plan-metrics!) + (for-each + (match-lambda + ((agent-id . builds) + (metric-set allocation-plan-metric + (length builds) + #:label-values + `((agent_id . ,agent-id))))) + allocation-plan) + #t) + + (spawn-fiber + (lambda () + (while #t + (with-exception-handler + (lambda (exn) + (simple-format (current-error-port) + "exception in allocation plan fiber: ~A\n" + exn)) + (lambda () + (with-throw-handler #t + (lambda () + (match (get-message (build-coordinator-allocator-channel coordinator)) + (('stats reply) + (put-message + reply + (map + (match-lambda + ((agent-id . builds) + (cons agent-id (length builds)))) + allocation-plan))) + (('fetch-agent-plan agent-id reply) + (put-message + reply + (or (assoc-ref allocation-plan agent-id) '()))) + (('fetch-plan reply) + (put-message reply allocation-plan)) + (('remove-build uuid reply) + (set! + allocation-plan + (map + (match-lambda + ((agent-id . builds) + (cons agent-id + (remove + (lambda (build-uuid) + (string=? build-uuid uuid)) + builds)))) + allocation-plan)) + + (put-message reply #t)) + (('replace new-plan reply) + + (set! allocation-plan new-plan) + + (update-build-allocation-plan-metrics!) + + (put-message reply #t)))) + (lambda _ + (backtrace)))) + #:unwind? #t))))) + +(define (build-coordinator-allocation-plan-stats coordinator) + (let ((reply (make-channel))) + (put-message (build-coordinator-allocator-channel coordinator) + (list 'stats reply)) + (get-message reply))) + +(define (build-coordinator-fetch-agent-allocation-plan coordinator agent-id) + (let ((reply (make-channel))) + (put-message (build-coordinator-allocator-channel coordinator) + (list 'fetch-agent-plan agent-id reply)) + (get-message reply))) + +(define (build-coordinator-allocation-plan coordinator) + (let ((reply (make-channel))) + (put-message (build-coordinator-allocator-channel coordinator) + (list 'fetch-plan reply)) + (get-message reply))) + +(define (build-coordinator-build-in-allocation-plan? coordinator uuid) + (any + (match-lambda + ((agent-id . build-uuids) + (->bool (member uuid build-uuids string=?)))) + (build-coordinator-allocation-plan coordinator))) + +(define (build-coordinator-remove-build-from-allocation-plan coordinator uuid) + (let ((reply (make-channel))) + (put-message (build-coordinator-allocator-channel coordinator) + (list 'remove-build uuid reply)) + (get-message reply))) + +(define (build-coordinator-replace-build-allocation-plan coordinator plan) + (let ((reply (make-channel))) + (put-message (build-coordinator-allocator-channel coordinator) + (list 'replace plan reply)) + (get-message reply))) + +(define (build-coordinator-fetch-build-to-allocate coordinator + agent-id) + (define datastore + (build-coordinator-datastore coordinator)) + + (let loop ((planned-builds + (build-coordinator-fetch-agent-allocation-plan coordinator + agent-id))) + + (if (null? planned-builds) + #f + (match (datastore-fetch-build-to-allocate datastore (first planned-builds)) + (#f #f) + (#(uuid derivation-id derivation-name derived_priority) + + (if (datastore-check-if-derivation-conflicts? + datastore + agent-id + derivation-id) + (loop (cdr planned-builds)) + `((uuid . ,uuid) + (derivation_name . ,derivation-name) + (derived_priority . ,derived_priority)))))))) + +(define* (build-coordinator-list-allocation-plan-builds coordinator + agent-id + #:key limit) + (define (take* lst i) + (if (< (length lst) i) + lst + (take lst i))) + + (define datastore + (build-coordinator-datastore coordinator)) + + (let ((build-ids + (build-coordinator-fetch-agent-allocation-plan coordinator + agent-id))) + (filter-map + (lambda (build-id) + (match (datastore-fetch-build-to-allocate datastore build-id) + (#(uuid derivation_id derivation_name derived_priority) + (let ((build-details (datastore-find-build datastore uuid))) + `((uuid . ,uuid) + (derivation_name . ,derivation_name) + (system . ,(datastore-find-build-derivation-system + datastore + uuid)) + (priority . ,(assq-ref build-details 'priority)) + (derived_priority . ,derived_priority) + (tags . ,(vector-map + (lambda (_ tag) + (match tag + ((key . value) + `((key . ,key) + (value . ,value))))) + (datastore-fetch-build-tags + datastore + uuid)))))) + (#f #f))) + (if limit + (take* build-ids limit) + build-ids)))) + (define (spawn-fiber-to-watch-for-deferred-builds coordinator) (spawn-fiber (lambda () + (sleep 10) (while #t - (sleep 60) ; 1 minute (with-exception-handler (lambda (exn) (simple-format (current-error-port) @@ -1136,14 +1309,21 @@ exn)) (lambda () (let ((first-deferred-build - (datastore-find-first-unallocated-deferred-build - (build-coordinator-datastore coordinator)))) - (when (and first-deferred-build - (time<=? (date->time-utc - (assq-ref first-deferred-build 'deferred-until)) - (current-time))) - (simple-format #t "guix-build-coordinator: triggering build allocation for deferred build: ~A\n" (assq-ref first-deferred-build 'uuid)) - (trigger-build-allocation coordinator)))) + (datastore-find-deferred-build + (build-coordinator-datastore coordinator) + (lambda (build-details) + (build-coordinator-build-in-allocation-plan? + coordinator + (assq-ref build-details 'uuid)))))) + (if (and first-deferred-build + (time<=? (date->time-utc + (assq-ref first-deferred-build 'deferred-until)) + (current-time))) + (begin + (simple-format #t "guix-build-coordinator: triggering build allocation for deferred build: ~A\n" (assq-ref first-deferred-build 'uuid)) + (trigger-build-allocation coordinator) + (sleep 10)) + (sleep 60)))) #:unwind? #t))) #:parallel? #t)) @@ -1322,11 +1502,13 @@ (build-coordinator-datastore build-coordinator)) (define (allocate-one-build agent-id) - (let ((build-details (datastore-fetch-build-to-allocate datastore agent-id))) + (let ((build-details (build-coordinator-fetch-build-to-allocate + build-coordinator agent-id))) (if build-details (let ((build-id (assq-ref build-details 'uuid))) (datastore-insert-to-allocated-builds datastore agent-id (list build-id)) - (datastore-remove-builds-from-plan datastore (list build-id)) + (build-coordinator-remove-build-from-allocation-plan + build-coordinator build-id) build-details) #f))) @@ -1358,20 +1540,6 @@ (let ((new-builds (allocate-several-builds agent (- target-count start-count)))) - (unless (null? new-builds) - (let ((allocation-plan-metric - (metrics-registry-fetch-metric - (slot-ref datastore 'metrics-registry) - "build_allocation_plan_total"))) - (for-each - (match-lambda - ((agent-id . count) - (metric-set allocation-plan-metric - count - #:label-values - `((agent_id . ,agent-id))))) - (datastore-count-build-allocation-plan-entries datastore)))) - ;; Previously allocate builds just returned newly allocated ;; builds, but if max-builds is provided, return all the ;; builds. This means the agent can handle this in a idempotent |