From a43c537109766d6403dbb0f03e551aa7020d1150 Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Fri, 17 Apr 2020 11:48:07 +0100 Subject: Add a hook to handle missing inputs That submits new build jobs to build these missing inputs if appropriate. This means that you can tell the coordinator to build something, and it will automatically attempt to build the dependencies if they're missing. --- guix-build-coordinator/agent-messaging/http.scm | 3 +- guix-build-coordinator/build-allocator.scm | 95 ++++++++++++++++++------- guix-build-coordinator/coordinator.scm | 31 ++++++-- guix-build-coordinator/hooks.scm | 44 +++++++++++- scripts/guix-build-coordinator.in | 15 ++-- 5 files changed, 149 insertions(+), 39 deletions(-) diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm index b2bc9a5..58704e7 100644 --- a/guix-build-coordinator/agent-messaging/http.scm +++ b/guix-build-coordinator/agent-messaging/http.scm @@ -203,7 +203,8 @@ port. Also, the port used can be changed by passing the --port option.\n" (if (authenticated? agent-id-for-build request) (begin (handle-setup-failure-report - datastore agent-id-for-build uuid + datastore hook-channel + agent-id-for-build uuid (json-string->scm (utf8->string body))) ;; Trigger build allocation, so that the allocator can handle ;; this setup failure diff --git a/guix-build-coordinator/build-allocator.scm b/guix-build-coordinator/build-allocator.scm index 1ba1ead..d8ec595 100644 --- a/guix-build-coordinator/build-allocator.scm +++ b/guix-build-coordinator/build-allocator.scm @@ -25,39 +25,82 @@ #:export (basic-build-allocation-strategy)) (define (basic-build-allocation-strategy datastore) + (define (log . args) + (when #f + (simple-format #t "allocator: ~A\n" + (string-join (map (lambda (arg) + (simple-format #f "~A" arg)) + args) + " ")))) + (let ((agents (datastore-list-agents datastore)) (builds (datastore-list-unprocessed-builds datastore)) (setup-failures (datastore-fetch-setup-failures datastore))) (define (filter-builds-for-agent agent-id) + (define (output-has-successful-build? output) + (log "considering missing input:" output) + (any (lambda (output-build) + (let ((build-successful? + (string=? (assq-ref output-build 'result) + "success"))) + (when build-successful? + (log "found successful build:" (assq-ref output-build 'uuid))) + + build-successful?)) + (datastore-list-builds-for-output datastore output))) + + (define (relevant-setup-failure? setup-failure) + (log "setup failure:" setup-failure) + (let ((failure-reason + (assq-ref setup-failure 'failure-reason))) + (cond + ((string=? failure-reason "missing_inputs") + (not + (every output-has-successful-build? + (datastore-list-setup-failure-missing-inputs + datastore + (assq-ref setup-failure 'id))))) + ((string=? failure-reason "could_not_delete_outputs") + ;; This problem might go away, but just don't try the same agent + ;; again for now. + (string=? (assq-ref setup-failure 'agent-id) + agent-id)) + (else + (error "Unknown setup failure " failure-reason))))) + (lambda (build) + (log "build:" (assq-ref build 'uuid)) (let* ((build-id (assq-ref build 'uuid)) (setup-failures-for-build (or (assoc-ref setup-failures build-id) - '()))) - (if (any (lambda (setup-failure) - (string=? (assq-ref setup-failure 'agent-id) - agent-id)) - setup-failures-for-build) - ;; Don't allocated builds to agents where the setup has failed - ;; in the past - #f - #t)))) - - (append-map - (lambda (agent-id) - (let ((builds-for-agent - (filter (filter-builds-for-agent agent-id) - builds))) - (map (lambda (build-id ordering) - (list build-id - agent-id - ordering)) - (map (lambda (build) - (assq-ref build 'uuid)) - builds-for-agent) - (iota (length builds-for-agent))))) - (map (lambda (agent) - (assq-ref agent 'uuid)) - agents)))) + '())) + (relevant-setup-failures + (filter relevant-setup-failure? + setup-failures-for-build))) + (log "relevant setup failures:" relevant-setup-failures) + (if (null? relevant-setup-failures) + #t + #f)))) + + (let ((result + (append-map + (lambda (agent-id) + (log "considering builds for" agent-id) + (let ((builds-for-agent + (filter (filter-builds-for-agent agent-id) + builds))) + (map (lambda (build-id ordering) + (list build-id + agent-id + ordering)) + (map (lambda (build) + (assq-ref build 'uuid)) + builds-for-agent) + (iota (length builds-for-agent))))) + (map (lambda (agent) + (assq-ref agent 'uuid)) + agents)))) + (log "finished") + result))) diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm index 90801f9..37af6c4 100644 --- a/guix-build-coordinator/coordinator.scm +++ b/guix-build-coordinator/coordinator.scm @@ -132,6 +132,17 @@ (simple-format #t "error: running build-failure hook: ~A ~A\n" key args) #f))) + (('build-missing-inputs build-id missing-inputs) + (catch + #t + (lambda () + ((assq-ref hooks 'build-missing-inputs) datastore build-id + missing-inputs)) + (lambda (key . args) + (simple-format + #t "error: running build-missing-inputs hook: ~A ~A\n" + key args) + #f))) (unknown (simple-format #t "error: hooks: unknown message: ~A\n" unknown))) @@ -185,16 +196,24 @@ build-id)))) -(define (handle-setup-failure-report datastore agent-id build-id report-json) +(define (handle-setup-failure-report datastore hook-channel + agent-id build-id report-json) (let ((failure-reason (assoc-ref report-json "failure_reason"))) (if (string=? failure-reason "missing_inputs") ;; For missing inputs, we need to store the inputs that were missing, ;; so that has a special function - (datastore-store-setup-failure/missing-inputs - datastore - build-id - agent-id - (vector->list (assoc-ref report-json "missing_inputs"))) + (let ((missing-inputs + (vector->list (assoc-ref report-json "missing_inputs")))) + (datastore-store-setup-failure/missing-inputs datastore + build-id + agent-id + missing-inputs) + + (put-message hook-channel + (list 'build-missing-inputs + build-id + missing-inputs))) + (datastore-store-setup-failure datastore build-id agent-id diff --git a/guix-build-coordinator/hooks.scm b/guix-build-coordinator/hooks.scm index ed0b890..0116605 100644 --- a/guix-build-coordinator/hooks.scm +++ b/guix-build-coordinator/hooks.scm @@ -19,9 +19,13 @@ ;;; . (define-module (guix-build-coordinator hooks) + #:use-module (srfi srfi-1) + #:use-module (ice-9 match) #:use-module (guix-build-coordinator datastore) + #:use-module (guix-build-coordinator coordinator) #:export (default-build-success-hook - default-build-failure-hook)) + default-build-failure-hook + default-build-missing-inputs-hook)) (define (default-build-success-hook datastore build-id) (let ((agent-id @@ -32,7 +36,7 @@ build-id agent-id) (current-error-port)))) -(define* (default-build-failure-hook datastore build-id) +(define (default-build-failure-hook datastore build-id) (let ((agent-id (datastore-agent-for-build datastore build-id))) (display @@ -40,3 +44,39 @@ "build ~A failed (on agent ~A)\n" build-id agent-id) (current-error-port)))) + +(define (default-build-missing-inputs-hook datastore build-id missing-inputs) + (let ((build (datastore-find-build datastore build-id))) + (let ((derivation-inputs + (datastore-find-derivation-inputs datastore + (assq-ref build 'derivation-name)))) + (simple-format #t "missing-inputs: ~A\n~A\n" + build-id + (string-join (map (lambda (input) + (string-append " - " input)) + missing-inputs) + "\n")) + (for-each (lambda (missing-input) + (let ((input-derivation + (any (lambda (derivation-input) + (if (string=? (assq-ref derivation-input 'output) + missing-input) + (assq-ref derivation-input 'derivation) + #f)) + derivation-inputs))) + (unless input-derivation + (error "couldn't find a derivation for " missing-input)) + + (let ((builds-for-output + (datastore-list-builds-for-output datastore + missing-input))) + (if (null? builds-for-output) + (begin + (simple-format #t + "submitting build for ~A\n" + input-derivation) + (submit-build datastore input-derivation)) + (simple-format #t "~A builds exist for ~A, skipping\n" + (length builds-for-output) + missing-input))))) + missing-inputs)))) diff --git a/scripts/guix-build-coordinator.in b/scripts/guix-build-coordinator.in index 458b693..50dda71 100644 --- a/scripts/guix-build-coordinator.in +++ b/scripts/guix-build-coordinator.in @@ -89,14 +89,20 @@ (lambda (opt name arg result) (alist-cons 'build-failure-hook (read/eval arg) - (alist-delete 'build-failure-hook result)))))) + (alist-delete 'build-failure-hook result)))) + (option '("build-missing-inputs-hook") #t #f + (lambda (opt name arg result) + (alist-cons 'build-missing-inputs-hook + (read/eval arg) + (alist-delete 'build-missing-inputs-hook result)))))) (define %service-option-defaults ;; Alist of default option values `((port . 8745) (host . "0.0.0.0") (build-success-hook . ,default-build-success-hook) - (build-failure-hook . ,default-build-failure-hook))) + (build-failure-hook . ,default-build-failure-hook) + (build-missing-inputs-hook . ,default-build-missing-inputs-hook))) (define %agent-options (list (option '("uuid") #t #f @@ -178,8 +184,9 @@ (datastore (database-uri->datastore (assq-ref opts 'database))) (hooks - `((build-success . ,(assq-ref opts 'build-success-hook)) - (build-failure . ,(assq-ref opts 'build-failure-hook))))) + `((build-success . ,(assq-ref opts 'build-success-hook)) + (build-failure . ,(assq-ref opts 'build-failure-hook)) + (build-missing-inputs . ,(assq-ref opts 'build-missing-inputs-hook))))) (when (assoc-ref opts 'update-database) (datastore-update datastore)) -- cgit v1.2.3