aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2020-10-24 14:57:45 +0100
committerChristopher Baines <mail@cbaines.net>2020-10-24 14:57:45 +0100
commitc146e61ae6edcf0ae2d4eb11cf553e6500326c3a (patch)
treee1dddba754899d758b9ffdf331eef4d775929831
parentd773cf49579af6a96736959e97314ce1218382e7 (diff)
downloadbuild-coordinator-c146e61ae6edcf0ae2d4eb11cf553e6500326c3a.tar
build-coordinator-c146e61ae6edcf0ae2d4eb11cf553e6500326c3a.tar.gz
Have the agent handle errors from the coordinator
When submitting builds. The agent will now retry the relevant thing, like uploading the log file if the coordinator says that still needs doing.
-rw-r--r--guix-build-coordinator/agent-messaging/http.scm32
-rw-r--r--guix-build-coordinator/agent.scm127
2 files changed, 124 insertions, 35 deletions
diff --git a/guix-build-coordinator/agent-messaging/http.scm b/guix-build-coordinator/agent-messaging/http.scm
index 202b90e..285ef5a 100644
--- a/guix-build-coordinator/agent-messaging/http.scm
+++ b/guix-build-coordinator/agent-messaging/http.scm
@@ -44,7 +44,10 @@
#:use-module (guix-build-coordinator utils)
#:use-module (guix-build-coordinator datastore)
#:use-module (guix-build-coordinator coordinator)
- #:export (submit-status
+ #:export (agent-error-from-coordinator?
+ agent-error-from-coordinator-details
+
+ submit-status
submit-log-file
submit-build-result
report-build-start
@@ -52,6 +55,11 @@
submit-output
fetch-builds-for-agent))
+(define-exception-type &agent-error-from-coordinator &error
+ make-agent-error-from-coordinator
+ agent-error-from-coordinator?
+ (details agent-error-from-coordinator-details))
+
(define (coordinator-uri-for-path base-uri-string agent-path)
(let* ((base-uri (string->uri base-uri-string))
(scheme (uri-scheme base-uri))
@@ -119,7 +127,19 @@
#:headers
`((Authorization . ,auth-value)
,@headers))))))
- (if (>= (response-code response) 400)
+ (let ((code (response-code response)))
+ (cond
+ ((eq? code 400)
+ (and=> (coordinator-handle-failed-request method
+ path
+ response
+ body)
+ (lambda (error)
+ (raise-exception
+ (make-agent-error-from-coordinator
+ (assoc-ref error "error"))))))
+
+ ((>= (response-code response) 400)
(let ((body
(coordinator-handle-failed-request method
path
@@ -138,14 +158,16 @@
(set! first-request-failed? #t)
(raise-exception
(make-exception-with-message
- body)))))
+ body))))))
+ (else
(values
(json-string->scm (utf8->string body))
- response))))
+ response))))))
(retry-on-error make-request
#:times 9
- #:delay 10))
+ #:delay 10
+ #:ignore agent-error-from-coordinator?))
(define (submit-status coordinator-uri agent-uuid password
status)
diff --git a/guix-build-coordinator/agent.scm b/guix-build-coordinator/agent.scm
index 747c6b1..4951ba6 100644
--- a/guix-build-coordinator/agent.scm
+++ b/guix-build-coordinator/agent.scm
@@ -269,35 +269,102 @@
(define (post-build-success uuid coordinator-uri password
build-id derivation end-time)
+ (define output-details
+ (map
+ (match-lambda
+ ((output-name . output)
+ (let ((path-info (with-store store
+ (query-path-info
+ store
+ (derivation-output-path output)))))
+ `((name . ,output-name)
+ (hash . ,(bytevector->nix-base32-string
+ (path-info-hash path-info)))
+ (size . ,(path-info-nar-size path-info))
+ (references . ,(list->vector
+ (map basename
+ (path-info-references path-info))))))))
+
+ (derivation-outputs (read-derivation-from-file derivation))))
+
+ (define (attempt-submit-build-result)
+ (with-exception-handler
+ (lambda (exn)
+ (unless (agent-error-from-coordinator? exn)
+ (raise-exception exn))
+
+ (let ((details
+ (agent-error-from-coordinator-details exn)))
+ (if (string? details)
+ (cond
+ ((string=? details "build_already_processed")
+ (simple-format
+ #t "warning: coordinator indicates this build is already marked as processed\n")
+ #t)
+ ((string=? details "missing_build_log_file")
+ ;; Retry submitting the log file
+ (agent-submit-log-file uuid coordinator-uri password
+ build-id derivation)
+
+ (attempt-submit-build-result))
+ (else
+ (raise-exception
+ (make-exception
+ (make-exception-with-message
+ "unrecognised error from coordinator")
+ (make-exception-with-irritants
+ details)))))
+ (or
+ (and=> (assoc-ref details "missing_output")
+ (lambda (missing-output-name)
+ (let ((missing-output
+ (any (match-lambda
+ ((name . output)
+ (if (string=? name missing-output-name)
+ output
+ #f)))
+ (derivation-outputs
+ (read-derivation-from-file derivation)))))
+ (unless missing-output
+ (raise-exception
+ (make-exception
+ (make-exception-with-message
+ "unknown missing output")
+ (make-exception-with-irritants
+ missing-output-name))))
+
+ (submit-one-output missing-output-name
+ missing-output))
+
+ (attempt-submit-build-result)))
+ (raise-exception
+ (make-exception
+ (make-exception-with-message
+ "unrecognised error from coordinator")
+ (make-exception-with-irritants
+ details)))))))
+ (lambda ()
+ (submit-build-result
+ coordinator-uri uuid password build-id
+ `((result . success)
+ (end_time . ,(strftime "%F %T" end-time))
+ (outputs . ,(list->vector output-details)))))
+ #:unwind? #t))
+
+ (define (submit-one-output output-name output)
+ (simple-format #t "submitting output ~A\n"
+ (derivation-output-path output))
+ (submit-output coordinator-uri uuid password
+ build-id output-name
+ (derivation-output-path output)))
+
(simple-format #t "build ~A successful, reporting to coordinator\n"
build-id)
- (let ((output-details
- (map
- (match-lambda
- ((output-name . output)
- (simple-format #t "submitting output ~A\n"
- (derivation-output-path output))
- (submit-output coordinator-uri uuid password
- build-id output-name
- (derivation-output-path output))
-
- (let ((path-info (with-store store
- (query-path-info
- store
- (derivation-output-path output)))))
- `((name . ,output-name)
- (hash . ,(bytevector->nix-base32-string
- (path-info-hash path-info)))
- (size . ,(path-info-nar-size path-info))
- (references . ,(list->vector
- (map basename
- (path-info-references path-info))))))))
-
- (derivation-outputs (read-derivation-from-file derivation)))))
-
- (simple-format #t "finished submitting outputs, reporting result\n")
- (submit-build-result
- coordinator-uri uuid password build-id
- `((result . success)
- (end_time . ,(strftime "%F %T" end-time))
- (outputs . ,(list->vector output-details))))))
+
+ (for-each (match-lambda
+ ((output-name . output)
+ (submit-one-output output-name output)))
+ (derivation-outputs (read-derivation-from-file derivation)))
+
+ (simple-format #t "finished submitting outputs, reporting result\n")
+ (attempt-submit-build-result))