From c146e61ae6edcf0ae2d4eb11cf553e6500326c3a Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Sat, 24 Oct 2020 14:57:45 +0100 Subject: Have the agent handle errors from the coordinator When submitting builds. The agent will now retry the relevant thing, like uploading the log file if the coordinator says that still needs doing. --- guix-build-coordinator/agent-messaging/http.scm | 32 +++++- guix-build-coordinator/agent.scm | 127 ++++++++++++++++++------ 2 files changed, 124 insertions(+), 35 deletions(-) diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm index 202b90e..285ef5a 100644 --- a/guix-build-coordinator/agent-messaging/http.scm +++ b/guix-build-coordinator/agent-messaging/http.scm @@ -44,7 +44,10 @@ #:use-module (guix-build-coordinator utils) #:use-module (guix-build-coordinator datastore) #:use-module (guix-build-coordinator coordinator) - #:export (submit-status + #:export (agent-error-from-coordinator? + agent-error-from-coordinator-details + + submit-status submit-log-file submit-build-result report-build-start @@ -52,6 +55,11 @@ submit-output fetch-builds-for-agent)) +(define-exception-type &agent-error-from-coordinator &error + make-agent-error-from-coordinator + agent-error-from-coordinator? + (details agent-error-from-coordinator-details)) + (define (coordinator-uri-for-path base-uri-string agent-path) (let* ((base-uri (string->uri base-uri-string)) (scheme (uri-scheme base-uri)) @@ -119,7 +127,19 @@ #:headers `((Authorization . ,auth-value) ,@headers)))))) - (if (>= (response-code response) 400) + (let ((code (response-code response))) + (cond + ((eq? code 400) + (and=> (coordinator-handle-failed-request method + path + response + body) + (lambda (error) + (raise-exception + (make-agent-error-from-coordinator + (assoc-ref error "error")))))) + + ((>= (response-code response) 400) (let ((body (coordinator-handle-failed-request method path @@ -138,14 +158,16 @@ (set! first-request-failed? #t) (raise-exception (make-exception-with-message - body))))) + body)))))) + (else (values (json-string->scm (utf8->string body)) - response)))) + response)))))) (retry-on-error make-request #:times 9 - #:delay 10)) + #:delay 10 + #:ignore agent-error-from-coordinator?)) (define (submit-status coordinator-uri agent-uuid password status) diff --git a/guix-build-coordinator/agent.scm b/guix-build-coordinator/agent.scm index 747c6b1..4951ba6 100644 --- a/guix-build-coordinator/agent.scm +++ b/guix-build-coordinator/agent.scm @@ -269,35 +269,102 @@ (define (post-build-success uuid coordinator-uri password build-id derivation end-time) + (define output-details + (map + (match-lambda + ((output-name . output) + (let ((path-info (with-store store + (query-path-info + store + (derivation-output-path output))))) + `((name . ,output-name) + (hash . ,(bytevector->nix-base32-string + (path-info-hash path-info))) + (size . ,(path-info-nar-size path-info)) + (references . ,(list->vector + (map basename + (path-info-references path-info)))))))) + + (derivation-outputs (read-derivation-from-file derivation)))) + + (define (attempt-submit-build-result) + (with-exception-handler + (lambda (exn) + (unless (agent-error-from-coordinator? exn) + (raise-exception exn)) + + (let ((details + (agent-error-from-coordinator-details exn))) + (if (string? details) + (cond + ((string=? details "build_already_processed") + (simple-format + #t "warning: coordinator indicates this build is already marked as processed\n") + #t) + ((string=? details "missing_build_log_file") + ;; Retry submitting the log file + (agent-submit-log-file uuid coordinator-uri password + build-id derivation) + + (attempt-submit-build-result)) + (else + (raise-exception + (make-exception + (make-exception-with-message + "unrecognised error from coordinator") + (make-exception-with-irritants + details))))) + (or + (and=> (assoc-ref details "missing_output") + (lambda (missing-output-name) + (let ((missing-output + (any (match-lambda + ((name . output) + (if (string=? name missing-output-name) + output + #f))) + (derivation-outputs + (read-derivation-from-file derivation))))) + (unless missing-output + (raise-exception + (make-exception + (make-exception-with-message + "unknown missing output") + (make-exception-with-irritants + missing-output-name)))) + + (submit-one-output missing-output-name + missing-output)) + + (attempt-submit-build-result))) + (raise-exception + (make-exception + (make-exception-with-message + "unrecognised error from coordinator") + (make-exception-with-irritants + details))))))) + (lambda () + (submit-build-result + coordinator-uri uuid password build-id + `((result . success) + (end_time . ,(strftime "%F %T" end-time)) + (outputs . ,(list->vector output-details))))) + #:unwind? #t)) + + (define (submit-one-output output-name output) + (simple-format #t "submitting output ~A\n" + (derivation-output-path output)) + (submit-output coordinator-uri uuid password + build-id output-name + (derivation-output-path output))) + (simple-format #t "build ~A successful, reporting to coordinator\n" build-id) - (let ((output-details - (map - (match-lambda - ((output-name . output) - (simple-format #t "submitting output ~A\n" - (derivation-output-path output)) - (submit-output coordinator-uri uuid password - build-id output-name - (derivation-output-path output)) - - (let ((path-info (with-store store - (query-path-info - store - (derivation-output-path output))))) - `((name . ,output-name) - (hash . ,(bytevector->nix-base32-string - (path-info-hash path-info))) - (size . ,(path-info-nar-size path-info)) - (references . ,(list->vector - (map basename - (path-info-references path-info)))))))) - - (derivation-outputs (read-derivation-from-file derivation))))) - - (simple-format #t "finished submitting outputs, reporting result\n") - (submit-build-result - coordinator-uri uuid password build-id - `((result . success) - (end_time . ,(strftime "%F %T" end-time)) - (outputs . ,(list->vector output-details)))))) + + (for-each (match-lambda + ((output-name . output) + (submit-one-output output-name output))) + (derivation-outputs (read-derivation-from-file derivation))) + + (simple-format #t "finished submitting outputs, reporting result\n") + (attempt-submit-build-result)) -- cgit v1.2.3