diff options
-rw-r--r-- | guix-build-coordinator/agent-messaging/http.scm | 3 | ||||
-rw-r--r-- | guix-build-coordinator/build-allocator.scm | 95 | ||||
-rw-r--r-- | guix-build-coordinator/coordinator.scm | 31 | ||||
-rw-r--r-- | guix-build-coordinator/hooks.scm | 44 | ||||
-rw-r--r-- | scripts/guix-build-coordinator.in | 15 |
5 files changed, 149 insertions, 39 deletions
diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm index b2bc9a5..58704e7 100644 --- a/guix-build-coordinator/agent-messaging/http.scm +++ b/guix-build-coordinator/agent-messaging/http.scm @@ -203,7 +203,8 @@ port. Also, the port used can be changed by passing the --port option.\n" (if (authenticated? agent-id-for-build request) (begin (handle-setup-failure-report - datastore agent-id-for-build uuid + datastore hook-channel + agent-id-for-build uuid (json-string->scm (utf8->string body))) ;; Trigger build allocation, so that the allocator can handle ;; this setup failure diff --git a/guix-build-coordinator/build-allocator.scm b/guix-build-coordinator/build-allocator.scm index 1ba1ead..d8ec595 100644 --- a/guix-build-coordinator/build-allocator.scm +++ b/guix-build-coordinator/build-allocator.scm @@ -25,39 +25,82 @@ #:export (basic-build-allocation-strategy)) (define (basic-build-allocation-strategy datastore) + (define (log . args) + (when #f + (simple-format #t "allocator: ~A\n" + (string-join (map (lambda (arg) + (simple-format #f "~A" arg)) + args) + " ")))) + (let ((agents (datastore-list-agents datastore)) (builds (datastore-list-unprocessed-builds datastore)) (setup-failures (datastore-fetch-setup-failures datastore))) (define (filter-builds-for-agent agent-id) + (define (output-has-successful-build? output) + (log "considering missing input:" output) + (any (lambda (output-build) + (let ((build-successful? + (string=? (assq-ref output-build 'result) + "success"))) + (when build-successful? + (log "found successful build:" (assq-ref output-build 'uuid))) + + build-successful?)) + (datastore-list-builds-for-output datastore output))) + + (define (relevant-setup-failure? setup-failure) + (log "setup failure:" setup-failure) + (let ((failure-reason + (assq-ref setup-failure 'failure-reason))) + (cond + ((string=? failure-reason "missing_inputs") + (not + (every output-has-successful-build? + (datastore-list-setup-failure-missing-inputs + datastore + (assq-ref setup-failure 'id))))) + ((string=? failure-reason "could_not_delete_outputs") + ;; This problem might go away, but just don't try the same agent + ;; again for now. + (string=? (assq-ref setup-failure 'agent-id) + agent-id)) + (else + (error "Unknown setup failure " failure-reason))))) + (lambda (build) + (log "build:" (assq-ref build 'uuid)) (let* ((build-id (assq-ref build 'uuid)) (setup-failures-for-build (or (assoc-ref setup-failures build-id) - '()))) - (if (any (lambda (setup-failure) - (string=? (assq-ref setup-failure 'agent-id) - agent-id)) - setup-failures-for-build) - ;; Don't allocated builds to agents where the setup has failed - ;; in the past - #f - #t)))) - - (append-map - (lambda (agent-id) - (let ((builds-for-agent - (filter (filter-builds-for-agent agent-id) - builds))) - (map (lambda (build-id ordering) - (list build-id - agent-id - ordering)) - (map (lambda (build) - (assq-ref build 'uuid)) - builds-for-agent) - (iota (length builds-for-agent))))) - (map (lambda (agent) - (assq-ref agent 'uuid)) - agents)))) + '())) + (relevant-setup-failures + (filter relevant-setup-failure? + setup-failures-for-build))) + (log "relevant setup failures:" relevant-setup-failures) + (if (null? relevant-setup-failures) + #t + #f)))) + + (let ((result + (append-map + (lambda (agent-id) + (log "considering builds for" agent-id) + (let ((builds-for-agent + (filter (filter-builds-for-agent agent-id) + builds))) + (map (lambda (build-id ordering) + (list build-id + agent-id + ordering)) + (map (lambda (build) + (assq-ref build 'uuid)) + builds-for-agent) + (iota (length builds-for-agent))))) + (map (lambda (agent) + (assq-ref agent 'uuid)) + agents)))) + (log "finished") + result))) diff --git a/guix-build-coordinator/coordinator.scm b/guix-build-coordinator/coordinator.scm index 90801f9..37af6c4 100644 --- a/guix-build-coordinator/coordinator.scm +++ b/guix-build-coordinator/coordinator.scm @@ -132,6 +132,17 @@ (simple-format #t "error: running build-failure hook: ~A ~A\n" key args) #f))) + (('build-missing-inputs build-id missing-inputs) + (catch + #t + (lambda () + ((assq-ref hooks 'build-missing-inputs) datastore build-id + missing-inputs)) + (lambda (key . args) + (simple-format + #t "error: running build-missing-inputs hook: ~A ~A\n" + key args) + #f))) (unknown (simple-format #t "error: hooks: unknown message: ~A\n" unknown))) @@ -185,16 +196,24 @@ build-id)))) -(define (handle-setup-failure-report datastore agent-id build-id report-json) +(define (handle-setup-failure-report datastore hook-channel + agent-id build-id report-json) (let ((failure-reason (assoc-ref report-json "failure_reason"))) (if (string=? failure-reason "missing_inputs") ;; For missing inputs, we need to store the inputs that were missing, ;; so that has a special function - (datastore-store-setup-failure/missing-inputs - datastore - build-id - agent-id - (vector->list (assoc-ref report-json "missing_inputs"))) + (let ((missing-inputs + (vector->list (assoc-ref report-json "missing_inputs")))) + (datastore-store-setup-failure/missing-inputs datastore + build-id + agent-id + missing-inputs) + + (put-message hook-channel + (list 'build-missing-inputs + build-id + missing-inputs))) + (datastore-store-setup-failure datastore build-id agent-id diff --git a/guix-build-coordinator/hooks.scm b/guix-build-coordinator/hooks.scm index ed0b890..0116605 100644 --- a/guix-build-coordinator/hooks.scm +++ b/guix-build-coordinator/hooks.scm @@ -19,9 +19,13 @@ ;;; <http://www.gnu.org/licenses/>. (define-module (guix-build-coordinator hooks) + #:use-module (srfi srfi-1) + #:use-module (ice-9 match) #:use-module (guix-build-coordinator datastore) + #:use-module (guix-build-coordinator coordinator) #:export (default-build-success-hook - default-build-failure-hook)) + default-build-failure-hook + default-build-missing-inputs-hook)) (define (default-build-success-hook datastore build-id) (let ((agent-id @@ -32,7 +36,7 @@ build-id agent-id) (current-error-port)))) -(define* (default-build-failure-hook datastore build-id) +(define (default-build-failure-hook datastore build-id) (let ((agent-id (datastore-agent-for-build datastore build-id))) (display @@ -40,3 +44,39 @@ "build ~A failed (on agent ~A)\n" build-id agent-id) (current-error-port)))) + +(define (default-build-missing-inputs-hook datastore build-id missing-inputs) + (let ((build (datastore-find-build datastore build-id))) + (let ((derivation-inputs + (datastore-find-derivation-inputs datastore + (assq-ref build 'derivation-name)))) + (simple-format #t "missing-inputs: ~A\n~A\n" + build-id + (string-join (map (lambda (input) + (string-append " - " input)) + missing-inputs) + "\n")) + (for-each (lambda (missing-input) + (let ((input-derivation + (any (lambda (derivation-input) + (if (string=? (assq-ref derivation-input 'output) + missing-input) + (assq-ref derivation-input 'derivation) + #f)) + derivation-inputs))) + (unless input-derivation + (error "couldn't find a derivation for " missing-input)) + + (let ((builds-for-output + (datastore-list-builds-for-output datastore + missing-input))) + (if (null? builds-for-output) + (begin + (simple-format #t + "submitting build for ~A\n" + input-derivation) + (submit-build datastore input-derivation)) + (simple-format #t "~A builds exist for ~A, skipping\n" + (length builds-for-output) + missing-input))))) + missing-inputs)))) diff --git a/scripts/guix-build-coordinator.in b/scripts/guix-build-coordinator.in index 458b693..50dda71 100644 --- a/scripts/guix-build-coordinator.in +++ b/scripts/guix-build-coordinator.in @@ -89,14 +89,20 @@ (lambda (opt name arg result) (alist-cons 'build-failure-hook (read/eval arg) - (alist-delete 'build-failure-hook result)))))) + (alist-delete 'build-failure-hook result)))) + (option '("build-missing-inputs-hook") #t #f + (lambda (opt name arg result) + (alist-cons 'build-missing-inputs-hook + (read/eval arg) + (alist-delete 'build-missing-inputs-hook result)))))) (define %service-option-defaults ;; Alist of default option values `((port . 8745) (host . "0.0.0.0") (build-success-hook . ,default-build-success-hook) - (build-failure-hook . ,default-build-failure-hook))) + (build-failure-hook . ,default-build-failure-hook) + (build-missing-inputs-hook . ,default-build-missing-inputs-hook))) (define %agent-options (list (option '("uuid") #t #f @@ -178,8 +184,9 @@ (datastore (database-uri->datastore (assq-ref opts 'database))) (hooks - `((build-success . ,(assq-ref opts 'build-success-hook)) - (build-failure . ,(assq-ref opts 'build-failure-hook))))) + `((build-success . ,(assq-ref opts 'build-success-hook)) + (build-failure . ,(assq-ref opts 'build-failure-hook)) + (build-missing-inputs . ,(assq-ref opts 'build-missing-inputs-hook))))) (when (assoc-ref opts 'update-database) (datastore-update datastore)) |