;;; Guix Build Coordinator ;;; ;;; Copyright © 2020 Christopher Baines ;;; ;;; This file is part of the guix-build-coordinator. ;;; ;;; The Guix Build Coordinator is free software; you can redistribute ;;; it and/or modify it under the terms of the GNU General Public ;;; License as published by the Free Software Foundation; either ;;; version 3 of the License, or (at your option) any later version. ;;; ;;; The Guix Build Coordinator is distributed in the hope that it will ;;; be useful, but WITHOUT ANY WARRANTY; without even the implied ;;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ;;; See the GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with the guix-data-service. If not, see ;;; . (define-module (guix-build-coordinator agent) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (ice-9 match) #:use-module (ice-9 futures) #:use-module (ice-9 atomic) #:use-module (ice-9 threads) #:use-module (ice-9 exceptions) #:use-module (rnrs bytevectors) #:use-module (web http) #:use-module (guix store) #:use-module (guix derivations) #:use-module (guix base32) #:use-module (guix-build-coordinator utils) #:use-module (guix-build-coordinator agent-messaging http) #:export (run-agent)) (define (run-agent uuid coordinator-uri password systems max-parallel-builds derivation-substitute-urls non-derivation-substitute-urls) (define (fetch-new-jobs count) (let ((received-builds (fetch-builds-for-agent coordinator-uri uuid password systems #:count count))) (simple-format #t "requested ~A builds, received ~A\n" count (length received-builds)) received-builds)) (define (process-job build) (let ((build-id (assoc-ref build "uuid")) (derivation-name (assoc-ref build "derivation-name"))) (simple-format #t "~A: setting up to build: ~A\n" build-id derivation-name) (with-store store (let ((pre-build-status (pre-build-process store derivation-substitute-urls non-derivation-substitute-urls derivation-name))) (if (eq? (assq-ref pre-build-status 'result) 'success) (begin (simple-format #t "~A: setup successful, building: ~A\n" build-id derivation-name) (report-build-start coordinator-uri uuid password build-id) (let* ((result (perform-build store derivation-name)) ;; TODO Check this handles timezones right (end-time (localtime (current-time) "UTC"))) (agent-submit-log-file uuid coordinator-uri password build-id derivation-name) ((if result post-build-success post-build-failure) uuid coordinator-uri password build-id derivation-name end-time))) (begin (simple-format #t "~A: failure: ~A\n" build-id pre-build-status) (report-setup-failure coordinator-uri uuid password build-id pre-build-status))))))) (let-values (((process-job-with-queue count-jobs) (create-work-queue max-parallel-builds process-job))) (let ((details (submit-status coordinator-uri uuid password 'idle))) (let ((builds (vector->list (assoc-ref details "builds")))) (for-each (lambda (job-args) (process-job-with-queue job-args)) builds))) (while #t (let ((job-count (count-jobs))) (when (< job-count max-parallel-builds) (match (fetch-new-jobs (- max-parallel-builds job-count)) (() ;; No new jobs available (sleep 30)) ((jobs ...) (for-each (lambda (job-args) (process-job-with-queue job-args)) jobs))))) (sleep 5)))) (define (agent-submit-log-file uuid coordinator-uri password build-id derivation-name) (retry-on-error (lambda () (let ((log-file (derivation-log-file derivation-name))) (unless log-file (raise-exception (make-exception-with-message (simple-format #f "log file missing for ~A (~A)" derivation-name build-id)))) (simple-format #t "~A: uploading log file ~A\n" build-id log-file) (submit-log-file coordinator-uri uuid password build-id log-file))) #:times 6 #:delay 30)) (define (pre-build-process store derivation-substitute-urls non-derivation-substitute-urls derivation-name) (define (find-missing-inputs derivation inputs) (let* ((output-paths (append-map derivation-input-output-paths inputs)) (missing-paths (remove (lambda (path) (valid-path? store path)) output-paths)) (path-substitutes (begin (set-build-options store #:substitute-urls non-derivation-substitute-urls) (map (lambda (file) (and (has-substiutes-no-cache? non-derivation-substitute-urls file) (if (has-substitutes? store file) #t (begin (simple-format #t "warning: a substitute should be available for ~A, but the daemon claims it's not\n" file) #f)))) missing-paths)))) (if (null? missing-paths) '() (if (member #f path-substitutes) (fold (lambda (file substitute-available? result) (if substitute-available? result (cons file result))) '() missing-paths path-substitutes) (begin (retry-on-error (lambda () (with-timeout (* 1000 60 10) ; 10 minutes (raise-exception (make-exception-with-message "timeout fetching inputs")) (begin ;; Download the substitutes (set-build-options store #:substitute-urls non-derivation-substitute-urls) (build-things store missing-paths)))) #:times 6 #:delay 60) ;; Double check everything is actually present. (let ((missing-files (remove (lambda (path) (valid-path? store path)) output-paths))) (if (null? missing-files) '() (begin (simple-format (current-error-port) "warning: failed to fetch substitutes for: ~A\n" missing-files) (let ((unavailable-outputs (delete-duplicates (append-map (lambda (missing-output) (find-missing-substitutes-for-output store non-derivation-substitute-urls missing-output)) missing-files)))) (simple-format (current-error-port) "warning: the following outputs are missing:\n~A\n" (string-join (map (lambda (output) (string-append " - " output)) unavailable-outputs))) unavailable-outputs))))))))) (define (delete-outputs derivation) (let* ((outputs (derivation-outputs derivation)) (output-file-names (map derivation-output-path (map cdr outputs)))) (if (any file-exists? output-file-names) (catch #t (lambda () ;; There can be issues deleting links when collecting garbage ;; from multiple threads (monitor (with-store store (delete-paths store output-file-names))) #t) (lambda (key args) (simple-format (current-error-port) "error: delete-outputs: ~A ~A\n" key args) #f)) #t))) (let ((derivation (if (valid-path? store derivation-name) (read-derivation-from-file derivation-name) (begin (retry-on-error (lambda () (substitute-derivation derivation-name #:substitute-urls derivation-substitute-urls)) #:times 12 #:delay 20) (read-derivation-from-file derivation-name))))) (match (delete-outputs derivation) (#t (let ((missing-inputs (find-missing-inputs derivation (derivation-inputs derivation)))) (if (null? missing-inputs) '((result . success)) `((result . failure) (failure_reason . missing_inputs) (missing_inputs . ,(list->vector missing-inputs)))))) (failure '((result . failure) (failure_reason . could_not_delete_outputs)))))) (define (perform-build store derivation-name) (set-build-options store #:use-substitutes? #f) (parameterize ((current-build-output-port (%make-void-port "w"))) (catch #t (lambda () (build-things store (list derivation-name)) #t) (lambda (key . args) (simple-format (current-error-port) "error: build: ~A ~A\n" key args) #f)))) (define (post-build-failure uuid coordinator-uri password build-id derivation end-time) (simple-format #t "build ~A failed, reporting to coordinator\n" build-id) (submit-build-result coordinator-uri uuid password build-id `((result . failure) (end_time . ,(strftime "%F %T" end-time))))) (define (post-build-success uuid coordinator-uri password build-id derivation end-time) (define output-details (map (match-lambda ((output-name . output) (let ((path-info (with-store store (query-path-info store (derivation-output-path output))))) `((name . ,output-name) (hash . ,(bytevector->nix-base32-string (path-info-hash path-info))) (size . ,(path-info-nar-size path-info)) (references . ,(list->vector (map basename (path-info-references path-info)))))))) (derivation-outputs (read-derivation-from-file derivation)))) (define (attempt-submit-build-result) (with-exception-handler (lambda (exn) (unless (agent-error-from-coordinator? exn) (raise-exception exn)) (let ((details (agent-error-from-coordinator-details exn))) (if (string? details) (cond ((string=? details "build_already_processed") (simple-format #t "warning: coordinator indicates this build is already marked as processed\n") #t) ((string=? details "missing_build_log_file") ;; Retry submitting the log file (agent-submit-log-file uuid coordinator-uri password build-id derivation) (attempt-submit-build-result)) (else (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details))))) (or (and=> (assoc-ref details "missing_output") (lambda (missing-output-name) (let ((missing-output (any (match-lambda ((name . output) (if (string=? name missing-output-name) output #f))) (derivation-outputs (read-derivation-from-file derivation))))) (unless missing-output (raise-exception (make-exception (make-exception-with-message "unknown missing output") (make-exception-with-irritants missing-output-name)))) (submit-one-output missing-output-name missing-output)) (attempt-submit-build-result))) (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details))))))) (lambda () (submit-build-result coordinator-uri uuid password build-id `((result . success) (end_time . ,(strftime "%F %T" end-time)) (outputs . ,(list->vector output-details))))) #:unwind? #t)) (define (submit-one-output output-name output) (simple-format #t "submitting output ~A\n" (derivation-output-path output)) (submit-output coordinator-uri uuid password build-id output-name (derivation-output-path output))) (simple-format #t "build ~A successful, reporting to coordinator\n" build-id) (for-each (match-lambda ((output-name . output) (submit-one-output output-name output))) (derivation-outputs (read-derivation-from-file derivation))) (simple-format #t "finished submitting outputs, reporting result\n") (attempt-submit-build-result))