;;; Guix Build Coordinator ;;; ;;; Copyright © 2020 Christopher Baines ;;; ;;; This file is part of the guix-build-coordinator. ;;; ;;; The Guix Build Coordinator is free software; you can redistribute ;;; it and/or modify it under the terms of the GNU General Public ;;; License as published by the Free Software Foundation; either ;;; version 3 of the License, or (at your option) any later version. ;;; ;;; The Guix Build Coordinator is distributed in the hope that it will ;;; be useful, but WITHOUT ANY WARRANTY; without even the implied ;;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ;;; See the GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with the guix-data-service. If not, see ;;; . (define-module (guix-build-coordinator agent) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (ice-9 futures) #:use-module (ice-9 atomic) #:use-module (ice-9 threads) #:use-module (ice-9 exceptions) #:use-module (rnrs bytevectors) #:use-module (web http) #:use-module (oop goops) #:use-module (logging logger) #:use-module (logging port-log) #:use-module (guix store) #:use-module (guix derivations) #:use-module (guix base32) #:use-module (guix-build-coordinator utils) #:use-module (guix-build-coordinator agent-messaging http) #:export (run-agent)) (define (run-agent uuid coordinator-uri password systems max-parallel-builds derivation-substitute-urls non-derivation-substitute-urls) (define lgr (make )) (define port-log (make #:port (current-output-port) #:formatter (lambda (lvl time str) (format #f "~a (~5a): ~a~%" (strftime "%F %H:%M:%S" (localtime time)) lvl str)))) (define (fetch-jobs current-count max-count) (let ((received-builds (fetch-builds-for-agent coordinator-uri uuid password systems max-count))) (log-msg lgr 'INFO "have " current-count " builds, max " max-count " builds, received " (- (length received-builds) current-count)) received-builds)) (define (process-job build) (let ((build-id (assoc-ref build "uuid")) (derivation-name (assoc-ref build "derivation-name"))) (log-msg lgr 'INFO build-id ": setting up to build: " derivation-name) (with-store store (let ((pre-build-status (pre-build-process lgr store build-id derivation-substitute-urls non-derivation-substitute-urls derivation-name))) (if (eq? (assq-ref pre-build-status 'result) 'success) (begin (log-msg lgr 'INFO build-id ": setup successful, building: " derivation-name) (report-build-start coordinator-uri uuid password build-id) (let* ((result (perform-build lgr store build-id derivation-name)) ;; TODO Check this handles timezones right (end-time (localtime (current-time) "UTC"))) (agent-submit-log-file lgr uuid coordinator-uri password build-id derivation-name) ((if result post-build-success post-build-failure) lgr uuid coordinator-uri password build-id derivation-name end-time))) (begin (log-msg lgr 'INFO build-id ": setup failure: " (assq-ref pre-build-status 'failure_reason)) (report-setup-failure coordinator-uri uuid password build-id pre-build-status))))))) (add-handler! lgr port-log) (open-log! lgr) (let-values (((process-job-with-queue count-jobs) (create-work-queue max-parallel-builds process-job))) (let ((details (submit-status coordinator-uri uuid password 'idle))) (let* ((builds (vector->list (assoc-ref details "builds"))) (initial-build-ids (map (lambda (build) (assoc-ref build "uuid")) builds))) (for-each (lambda (job-args) (process-job-with-queue job-args)) builds) (let loop ((build-ids initial-build-ids)) (let ((job-count (count-jobs))) (if (< job-count max-parallel-builds) (let* ((fetched-builds (fetch-jobs job-count max-parallel-builds)) (new-builds (remove (lambda (build) (member (assoc-ref build "uuid") build-ids)) fetched-builds))) (for-each (lambda (job-args) (process-job-with-queue job-args)) new-builds) (when (null? new-builds) (sleep 30)) (loop (map (lambda (build) (assoc-ref build "uuid")) fetched-builds))) (begin (sleep 3) (loop build-ids))))))))) (define (agent-submit-log-file lgr uuid coordinator-uri password build-id derivation-name) (retry-on-error (lambda () (let ((log-file (derivation-log-file derivation-name))) (unless log-file (raise-exception (make-exception-with-message (simple-format #f "log file missing for ~A (~A)" derivation-name build-id)))) (log-msg lgr 'INFO build-id ": uploading log file " log-file) (submit-log-file coordinator-uri uuid password build-id log-file))) #:times 6 #:delay 30)) (define (pre-build-process lgr store build-id derivation-substitute-urls non-derivation-substitute-urls derivation-name) (define (find-missing-inputs derivation inputs) (log-msg lgr 'DEBUG build-id ": checking the availability of build inputs") (let* ((output-paths (append-map derivation-input-output-paths inputs)) (missing-paths (remove (lambda (path) (valid-path? store path)) output-paths)) (path-substitutes (begin (when non-derivation-substitute-urls (set-build-options store #:substitute-urls non-derivation-substitute-urls)) (unless non-derivation-substitute-urls (log-msg lgr 'WARNING "non-derivation-substitute-urls unset, unable to query substitute servers without caching")) (map (lambda (file) (and (and (list? non-derivation-substitute-urls) (has-substiutes-no-cache? non-derivation-substitute-urls file)) (let ((log-port (open-output-string))) ;; TODO Do something with the logged output (parameterize ((current-build-output-port log-port)) (if (has-substitutes? store file) #t (begin (log-msg lgr 'WARNING "a substitute should be available for " file ", but the guix-daemon claims it's unavailable") #f)))))) missing-paths)))) (if (null? missing-paths) '() (if (member #f path-substitutes) (fold (lambda (file substitute-available? result) (if substitute-available? result (cons file result))) '() missing-paths path-substitutes) (begin (retry-on-error (lambda () (with-timeout (* 1000 60 10) ; 10 minutes (raise-exception (make-exception-with-message "timeout fetching inputs")) (begin ;; Download the substitutes (when non-derivation-substitute-urls (set-build-options store #:substitute-urls non-derivation-substitute-urls)) (let ((log-port (open-output-string))) (with-throw-handler #t (lambda () (parameterize ((current-build-output-port log-port)) (build-things store missing-paths))) (lambda (key . args) (display (get-output-string log-port)) (close-output-port log-port))))))) #:times 6 #:delay 60) ;; Double check everything is actually present. (let ((missing-files (remove (lambda (path) (valid-path? store path)) output-paths))) (if (null? missing-files) '() (begin (log-msg lgr 'WARNING "failed to fetch substitutes for " missing-files) (let ((unavailable-outputs (delete-duplicates (append-map (lambda (missing-output) (find-missing-substitutes-for-output store non-derivation-substitute-urls missing-output)) missing-files)))) (log-msg lgr 'WARNING "the following outputs are missing: " (string-join (map (lambda (output) (string-append " - " output)) unavailable-outputs))) unavailable-outputs))))))))) (define (delete-outputs derivation) (let* ((outputs (derivation-outputs derivation)) (output-file-names (map derivation-output-path (map cdr outputs)))) (if (any file-exists? output-file-names) (catch #t (lambda () (log-msg lgr 'DEBUG build-id ": deleting " (if (eq? (length output-file-names) 1) "output" "outputs")) ;; There can be issues deleting links when collecting garbage ;; from multiple threads (monitor (with-store store (delete-paths store output-file-names))) #t) (lambda (key args) (log-msg lgr 'ERROR "delete-outputs: " key args) #f)) #t))) (let ((derivation (if (valid-path? store derivation-name) (read-derivation-from-file derivation-name) (and (with-exception-handler (lambda (exn) (log-msg lgr 'ERROR "exception when reading/fetching derivation: " exn) #f) (lambda () (log-msg lgr 'DEBUG build-id ": substituting derivation") (retry-on-error (lambda () (substitute-derivation derivation-name #:substitute-urls derivation-substitute-urls)) #:times 12 #:delay 20) #t) #:unwind? #t) (read-derivation-from-file derivation-name))))) (if derivation (begin (log-msg lgr 'DEBUG build-id ": derivation read from file") (match (delete-outputs derivation) (#t (let ((missing-inputs (find-missing-inputs derivation (derivation-inputs derivation)))) (if (null? missing-inputs) '((result . success)) `((result . failure) (failure_reason . missing_inputs) (missing_inputs . ,(list->vector missing-inputs)))))) (failure '((result . failure) (failure_reason . could_not_delete_outputs))))) '((result . failure) (failure_reason . error_fetching_derivation))))) (define (perform-build lgr store build-id derivation-name) (set-build-options store #:use-substitutes? #f) (parameterize ((current-build-output-port (%make-void-port "w"))) (with-exception-handler (lambda (exn) (if (and (store-protocol-error? exn) (eq? (store-protocol-error-status exn) 100)) (log-msg lgr 'INFO build-id ": build failed") (log-msg lgr 'ERROR build-id ": unknown exception " exn)) #f) (lambda () (build-things store (list derivation-name)) #t) #:unwind? #t))) (define (post-build-failure lgr uuid coordinator-uri password build-id derivation end-time) (log-msg lgr 'INFO build-id ": build failed") (with-exception-handler (lambda (exn) (unless (agent-error-from-coordinator? exn) (raise-exception exn)) (let ((details (agent-error-from-coordinator-details exn))) (if (string? details) (cond ((string=? details "build_already_processed") (log-msg lgr 'WARNING build-id ": coordinator indicates this build is already marked as processed") #t) (else (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details))))) (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details)))))) (lambda () (submit-build-result coordinator-uri uuid password build-id `((result . failure) (end_time . ,(strftime "%F %T" end-time))))) #:unwind? #t)) (define (post-build-success lgr uuid coordinator-uri password build-id derivation end-time) (define output-details (map (match-lambda ((output-name . output) (let ((path-info (with-store store (query-path-info store (derivation-output-path output))))) `((name . ,output-name) (hash . ,(bytevector->nix-base32-string (path-info-hash path-info))) (size . ,(path-info-nar-size path-info)) (references . ,(list->vector (map basename (path-info-references path-info)))))))) (derivation-outputs (read-derivation-from-file derivation)))) (define (attempt-submit-build-result) (with-exception-handler (lambda (exn) (unless (agent-error-from-coordinator? exn) (raise-exception exn)) (let ((details (agent-error-from-coordinator-details exn))) (if (string? details) (cond ((string=? details "build_already_processed") (log-msg lgr 'WARNING build-id ": coordinator indicates this build is already marked as processed") #t) ((string=? details "missing_build_log_file") ;; Retry submitting the log file (agent-submit-log-file lgr uuid coordinator-uri password build-id derivation) (attempt-submit-build-result)) (else (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details))))) (or (and=> (assoc-ref details "missing_output") (lambda (missing-output-name) (let ((missing-output (any (match-lambda ((name . output) (if (string=? name missing-output-name) output #f))) (derivation-outputs (read-derivation-from-file derivation))))) (unless missing-output (raise-exception (make-exception (make-exception-with-message "unknown missing output") (make-exception-with-irritants missing-output-name)))) (submit-one-output missing-output-name missing-output)) (attempt-submit-build-result))) (raise-exception (make-exception (make-exception-with-message "unrecognised error from coordinator") (make-exception-with-irritants details))))))) (lambda () (submit-build-result coordinator-uri uuid password build-id `((result . success) (end_time . ,(strftime "%F %T" end-time)) (outputs . ,(list->vector output-details))))) #:unwind? #t)) (define (submit-one-output output-name output) (log-msg lgr 'INFO build-id ": submitting output " (derivation-output-path output)) (submit-output coordinator-uri uuid password build-id output-name (derivation-output-path output))) (log-msg lgr 'INFO build-id ": build successful, submitting outputs") (for-each (match-lambda ((output-name . output) (submit-one-output output-name output))) (derivation-outputs (read-derivation-from-file derivation))) (log-msg lgr 'INFO build-id ": finished submitting outputs, reporting build success") (attempt-submit-build-result))