diff options
author | Christopher Baines <mail@cbaines.net> | 2020-10-24 14:57:45 +0100 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2020-10-24 14:57:45 +0100 |
commit | c146e61ae6edcf0ae2d4eb11cf553e6500326c3a (patch) | |
tree | e1dddba754899d758b9ffdf331eef4d775929831 | |
parent | d773cf49579af6a96736959e97314ce1218382e7 (diff) | |
download | build-coordinator-c146e61ae6edcf0ae2d4eb11cf553e6500326c3a.tar build-coordinator-c146e61ae6edcf0ae2d4eb11cf553e6500326c3a.tar.gz |
Have the agent handle errors from the coordinator
When submitting builds. The agent will now retry the relevant thing, like
uploading the log file if the coordinator says that still needs doing.
-rw-r--r-- | guix-build-coordinator/agent-messaging/http.scm | 32 | ||||
-rw-r--r-- | guix-build-coordinator/agent.scm | 127 |
2 files changed, 124 insertions, 35 deletions
diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm index 202b90e..285ef5a 100644 --- a/guix-build-coordinator/agent-messaging/http.scm +++ b/guix-build-coordinator/agent-messaging/http.scm @@ -44,7 +44,10 @@ #:use-module (guix-build-coordinator utils) #:use-module (guix-build-coordinator datastore) #:use-module (guix-build-coordinator coordinator) - #:export (submit-status + #:export (agent-error-from-coordinator? + agent-error-from-coordinator-details + + submit-status submit-log-file submit-build-result report-build-start @@ -52,6 +55,11 @@ submit-output fetch-builds-for-agent)) +(define-exception-type &agent-error-from-coordinator &error + make-agent-error-from-coordinator + agent-error-from-coordinator? + (details agent-error-from-coordinator-details)) + (define (coordinator-uri-for-path base-uri-string agent-path) (let* ((base-uri (string->uri base-uri-string)) (scheme (uri-scheme base-uri)) @@ -119,7 +127,19 @@ #:headers `((Authorization . ,auth-value) ,@headers)))))) - (if (>= (response-code response) 400) + (let ((code (response-code response))) + (cond + ((eq? code 400) + (and=> (coordinator-handle-failed-request method + path + response + body) + (lambda (error) + (raise-exception + (make-agent-error-from-coordinator + (assoc-ref error "error")))))) + + ((>= (response-code response) 400) (let ((body (coordinator-handle-failed-request method path @@ -138,14 +158,16 @@ (set! first-request-failed? #t) (raise-exception (make-exception-with-message - body))))) + body)))))) + (else (values (json-string->scm (utf8->string body)) - response)))) + response)))))) (retry-on-error make-request #:times 9 - #:delay 10)) + #:delay 10 + #:ignore agent-error-from-coordinator?)) (define (submit-status coordinator-uri agent-uuid password status) diff --git a/guix-build-coordinator/agent.scm b/guix-build-coordinator/agent.scm index 747c6b1..4951ba6 100644 --- a/guix-build-coordinator/agent.scm +++ b/guix-build-coordinator/agent.scm @@ -269,35 +269,102 @@ (define (post-build-success uuid coordinator-uri password build-id derivation end-time) + (define output-details + (map + (match-lambda + ((output-name . output) + (let ((path-info (with-store store + (query-path-info + store + (derivation-output-path output))))) + `((name . ,output-name) + (hash . ,(bytevector->nix-base32-string + (path-info-hash path-info))) + (size . ,(path-info-nar-size path-info)) + (references . ,(list->vector + (map basename + (path-info-references path-info)))))))) + + (derivation-outputs (read-derivation-from-file derivation)))) + + (define (attempt-submit-build-result) + (with-exception-handler + (lambda (exn) + (unless (agent-error-from-coordinator? exn) + (raise-exception exn)) + + (let ((details + (agent-error-from-coordinator-details exn))) + (if (string? details) + (cond + ((string=? details "build_already_processed") + (simple-format + #t "warning: coordinator indicates this build is already marked as processed\n") + #t) + ((string=? details "missing_build_log_file") + ;; Retry submitting the log file + (agent-submit-log-file uuid coordinator-uri password + build-id derivation) + + (attempt-submit-build-result)) + (else + (raise-exception + (make-exception + (make-exception-with-message + "unrecognised error from coordinator") + (make-exception-with-irritants + details))))) + (or + (and=> (assoc-ref details "missing_output") + (lambda (missing-output-name) + (let ((missing-output + (any (match-lambda + ((name . output) + (if (string=? name missing-output-name) + output + #f))) + (derivation-outputs + (read-derivation-from-file derivation))))) + (unless missing-output + (raise-exception + (make-exception + (make-exception-with-message + "unknown missing output") + (make-exception-with-irritants + missing-output-name)))) + + (submit-one-output missing-output-name + missing-output)) + + (attempt-submit-build-result))) + (raise-exception + (make-exception + (make-exception-with-message + "unrecognised error from coordinator") + (make-exception-with-irritants + details))))))) + (lambda () + (submit-build-result + coordinator-uri uuid password build-id + `((result . success) + (end_time . ,(strftime "%F %T" end-time)) + (outputs . ,(list->vector output-details))))) + #:unwind? #t)) + + (define (submit-one-output output-name output) + (simple-format #t "submitting output ~A\n" + (derivation-output-path output)) + (submit-output coordinator-uri uuid password + build-id output-name + (derivation-output-path output))) + (simple-format #t "build ~A successful, reporting to coordinator\n" build-id) - (let ((output-details - (map - (match-lambda - ((output-name . output) - (simple-format #t "submitting output ~A\n" - (derivation-output-path output)) - (submit-output coordinator-uri uuid password - build-id output-name - (derivation-output-path output)) - - (let ((path-info (with-store store - (query-path-info - store - (derivation-output-path output))))) - `((name . ,output-name) - (hash . ,(bytevector->nix-base32-string - (path-info-hash path-info))) - (size . ,(path-info-nar-size path-info)) - (references . ,(list->vector - (map basename - (path-info-references path-info)))))))) - - (derivation-outputs (read-derivation-from-file derivation))))) - - (simple-format #t "finished submitting outputs, reporting result\n") - (submit-build-result - coordinator-uri uuid password build-id - `((result . success) - (end_time . ,(strftime "%F %T" end-time)) - (outputs . ,(list->vector output-details)))))) + + (for-each (match-lambda + ((output-name . output) + (submit-one-output output-name output))) + (derivation-outputs (read-derivation-from-file derivation))) + + (simple-format #t "finished submitting outputs, reporting result\n") + (attempt-submit-build-result)) |