diff options
Diffstat (limited to 'guix/scripts/offload.scm')
-rw-r--r-- | guix/scripts/offload.scm | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm new file mode 100644 index 0000000000..d919ede3c7 --- /dev/null +++ b/guix/scripts/offload.scm @@ -0,0 +1,380 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2014 Ludovic Courtès <ludo@gnu.org> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +(define-module (guix scripts offload) + #:use-module (guix config) + #:use-module (guix records) + #:use-module (guix store) + #:use-module (guix derivations) + #:use-module (guix nar) + #:use-module (guix utils) + #:use-module ((guix build utils) #:select (which)) + #:use-module (guix ui) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) + #:use-module (srfi srfi-34) + #:use-module (srfi srfi-35) + #:use-module (ice-9 popen) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 match) + #:use-module (ice-9 regex) + #:use-module (ice-9 format) + #:use-module (rnrs io ports) + #:export (build-machine + build-requirements + guix-offload)) + +;;; Commentary: +;;; +;;; Attempt to offload builds to the machines listed in +;;; /etc/guix/machines.scm, transferring missing dependencies over SSH, and +;;; retrieving the build output(s) over SSH upon success. +;;; +;;; This command should not be used directly; instead, it is called on-demand +;;; by the daemon, unless it was started with '--no-build-hook' or a client +;;; inhibited build hooks. +;;; +;;; Code: + + +(define-record-type* <build-machine> + build-machine make-build-machine + build-machine? + (name build-machine-name) ; string + (system build-machine-system) ; string + (user build-machine-user) ; string + (private-key build-machine-private-key ; file name + (default (user-lsh-private-key))) + (parallel-builds build-machine-parallel-builds ; number + (default 1)) + (speed build-machine-speed ; inexact real + (default 1.0)) + (features build-machine-features ; list of strings + (default '()))) + +(define-record-type* <build-requirements> + build-requirements make-build-requirements + build-requirements? + (system build-requirements-system) ; string + (features build-requirements-features ; list of strings + (default '()))) + +(define %machine-file + ;; File that lists machines available as build slaves. + (string-append %config-directory "/machines.scm")) + +(define %lsh-command + "lsh") + +(define %lshg-command + ;; FIXME: 'lshg' fails to pass large amounts of data, see + ;; <http://lists.lysator.liu.se/pipermail/lsh-bugs/2014q1/000639.html>. + "lsh") + +(define (user-lsh-private-key) + "Return the user's default lsh private key, or #f if it could not be +determined." + (and=> (getenv "HOME") + (cut string-append <> "/.lsh/identity"))) + +(define %user-module + ;; Module in which the machine description file is loaded. + (let ((module (make-fresh-user-module))) + (module-use! module (resolve-interface '(guix scripts offload))) + module)) + +(define* (build-machines #:optional (file %machine-file)) + "Read the list of build machines from FILE and return it." + (catch #t + (lambda () + ;; Avoid ABI incompatibility with the <build-machine> record. + (set! %fresh-auto-compile #t) + + (save-module-excursion + (lambda () + (set-current-module %user-module) + (primitive-load %machine-file)))) + (lambda args + (match args + (('system-error . _) + (let ((err (system-error-errno args))) + ;; Silently ignore missing file since this is a common case. + (if (= ENOENT err) + '() + (leave (_ "failed to open machine file '~a': ~a~%") + %machine-file (strerror err))))) + (_ + (leave (_ "failed to load machine file '~a': ~s~%") + %machine-file args)))))) + +(define (open-ssh-gateway machine) + "Initiate an SSH connection gateway to MACHINE, and return the PID of the +running lsh gateway upon success, or #f on failure." + (catch 'system-error + (lambda () + (let* ((port (open-pipe* OPEN_READ %lsh-command + "-l" (build-machine-user machine) + "-i" (build-machine-private-key machine) + ;; XXX: With lsh 2.1, passing '--write-pid' + ;; last causes the PID not to be printed. + "--write-pid" "--gateway" "--background" "-z" + (build-machine-name machine))) + (line (read-line port)) + (status (close-pipe port))) + (if (zero? status) + (let ((pid (string->number line))) + (if (integer? pid) + pid + (begin + (warning (_ "'~a' did not write its PID on stdout: ~s~%") + %lsh-command line) + #f))) + (begin + (warning (_ "failed to initiate SSH connection to '~a':\ + '~a' exited with ~a~%") + (build-machine-name machine) + %lsh-command + (status:exit-val status)) + #f)))) + (lambda args + (leave (_ "failed to execute '~a': ~a~%") + %lsh-command (strerror (system-error-errno args)))))) + +(define (remote-pipe machine mode command) + "Run COMMAND on MACHINE, assuming an lsh gateway has been set up." + (catch 'system-error + (lambda () + (apply open-pipe* mode %lshg-command + "-l" (build-machine-user machine) "-z" + (build-machine-name machine) + command)) + (lambda args + (warning (_ "failed to execute '~a': ~a~%") + %lshg-command (strerror (system-error-errno args))) + #f))) + +(define* (offload drv machine + #:key print-build-trace? (max-silent-time 3600) + (build-timeout 7200)) + "Perform DRV on MACHINE, assuming DRV and its prerequisites are available +there. Return a read pipe from where to read the build log." + (format (current-error-port) "offloading '~a' to '~a'...~%" + (derivation-file-name drv) (build-machine-name machine)) + (format (current-error-port) "@ build-remote ~a ~a~%" + (derivation-file-name drv) (build-machine-name machine)) + + ;; FIXME: Protect DRV from garbage collection on MACHINE. + (let ((pipe (remote-pipe machine OPEN_READ + `("guix" "build" + ;; FIXME: more options + ,(format #f "--max-silent-time=~a" + max-silent-time) + ,(derivation-file-name drv))))) + pipe)) + +(define (send-files files machine) + "Send the subset of FILES that's missing to MACHINE's store. Return #t on +success, #f otherwise." + (define (missing-files files) + ;; Return the subset of FILES not already on MACHINE. + (let* ((files (format #f "~{~a~%~}" files)) + (missing (filtered-port + (list (which %lshg-command) + "-l" (build-machine-user machine) + "-i" (build-machine-private-key machine) + (build-machine-name machine) + "guix" "archive" "--missing") + (open-input-string files)))) + (string-tokenize (get-string-all missing)))) + + (with-store store + (guard (c ((nix-protocol-error? c) + (warning (_ "failed to export files for '~a': ~s~%") + (build-machine-name machine) + c) + (false-if-exception (close-pipe pipe)) + #f)) + + ;; Compute the subset of FILES missing on MACHINE, and send them in + ;; topologically sorted order so that they can actually be imported. + (let ((files (missing-files (topologically-sorted store files))) + (pipe (remote-pipe machine OPEN_WRITE + '("guix" "archive" "--import")))) + (format #t (_ "sending ~a store files to '~a'...~%") + (length files) (build-machine-name machine)) + (catch 'system-error + (lambda () + (export-paths store files pipe)) + (lambda args + (warning (_ "failed while exporting files to '~a': ~a~%") + (build-machine-name machine) + (strerror (system-error-errno args))))) + (zero? (close-pipe pipe)))))) + +(define (retrieve-files files machine) + "Retrieve FILES from MACHINE's store, and import them." + (define host + (build-machine-name machine)) + + (let ((pipe (remote-pipe machine OPEN_READ + `("guix" "archive" "--export" ,@files)))) + (and pipe + (with-store store + (guard (c ((nix-protocol-error? c) + (warning (_ "failed to import files from '~a': ~s~%") + host c) + #f)) + (format (current-error-port) "retrieving ~a files from '~a'...~%" + (length files) host) + + ;; We cannot use the 'import-paths' RPC here because we already + ;; hold the locks for FILES. + (restore-file-set pipe + #:log-port (current-error-port) + #:lock? #f) + + (zero? (close-pipe pipe))))))) + +(define (machine-matches? machine requirements) + "Return #t if MACHINE matches REQUIREMENTS." + (and (string=? (build-requirements-system requirements) + (build-machine-system machine)) + (lset<= string=? + (build-requirements-features requirements) + (build-machine-features machine)))) + +(define (machine-faster? m1 m2) + "Return #t if M1 is faster than M2." + (> (build-machine-speed m1) (build-machine-speed m2))) + +(define (choose-build-machine requirements machines) + "Return the best machine among MACHINES fulfilling REQUIREMENTS, or #f." + ;; FIXME: Take machine load into account, and/or shuffle MACHINES. + (let ((machines (sort (filter (cut machine-matches? <> requirements) + machines) + machine-faster?))) + (match machines + ((head . _) + head) + (_ #f)))) + +(define* (process-request wants-local? system drv features + #:key + print-build-trace? (max-silent-time 3600) + (build-timeout 7200)) + "Process a request to build DRV." + (let* ((local? (and wants-local? (string=? system (%current-system)))) + (reqs (build-requirements + (system system) + (features features))) + (machine (choose-build-machine reqs (build-machines)))) + (if machine + (match (open-ssh-gateway machine) + ((? integer? pid) + (display "# accept\n") + (let ((inputs (string-tokenize (read-line))) + (outputs (string-tokenize (read-line)))) + (when (send-files (cons (derivation-file-name drv) inputs) + machine) + (let ((log (offload drv machine + #:print-build-trace? print-build-trace? + #:max-silent-time max-silent-time + #:build-timeout build-timeout))) + (let loop ((line (read-line log))) + (if (eof-object? line) + (close-pipe log) + (begin + (display line) (newline) + (loop (read-line log)))))) + (retrieve-files outputs machine))) + (format (current-error-port) "done with offloaded '~a'~%" + (derivation-file-name drv)) + (kill pid SIGTERM)) + (#f + (display "# decline\n"))) + (display "# decline\n")))) + +(define-syntax-rule (with-nar-error-handling body ...) + "Execute BODY with any &nar-error suitably reported to the user." + (guard (c ((nar-error? c) + (let ((file (nar-error-file c))) + (if (condition-has-type? c &message) + (leave (_ "while importing file '~a': ~a~%") + file (gettext (condition-message c))) + (leave (_ "failed to import file '~a'~%") + file))))) + body ...)) + + +;;; +;;; Entry point. +;;; + +(define (guix-offload . args) + (define request-line-rx + ;; The request format. See 'tryBuildHook' method in build.cc. + (make-regexp "([01]) ([a-z0-9_-]+) (/[[:graph:]]+.drv) ([[:graph:]]*)")) + + (define not-coma + (char-set-complement (char-set #\,))) + + ;; Make sure $HOME really corresponds to the current user. This is + ;; necessary since lsh uses that to determine the location of the yarrow + ;; seed file, and fails if it's owned by someone else. + (and=> (passwd:dir (getpw (getuid))) + (cut setenv "HOME" <>)) + + (match args + ((system max-silent-time print-build-trace? build-timeout) + (let ((max-silent-time (string->number max-silent-time)) + (build-timeout (string->number build-timeout)) + (print-build-trace? (string=? print-build-trace? "1"))) + (parameterize ((%current-system system)) + (let loop ((line (read-line))) + (unless (eof-object? line) + (cond ((regexp-exec request-line-rx line) + => + (lambda (match) + (with-nar-error-handling + (process-request (equal? (match:substring match 1) "1") + (match:substring match 2) ; system + (call-with-input-file + (match:substring match 3) + read-derivation) + (string-tokenize + (match:substring match 4) not-coma) + #:print-build-trace? print-build-trace? + #:max-silent-time max-silent-time + #:build-timeout build-timeout)))) + (else + (leave (_ "invalid request line: ~s~%") line))) + (loop (read-line))))))) + (("--version") + (show-version-and-exit "guix offload")) + (("--help") + (format #t (_ "Usage: guix offload SYSTEM PRINT-BUILD-TRACE +Process build offload requests written on the standard input, possibly +offloading builds to the machines listed in '~a'.~%") + %machine-file) + (display (_ " +This tool is meant to be used internally by 'guix-daemon'.\n")) + (show-bug-report-information)) + (x + (leave (_ "invalid arguments: ~{~s ~}~%") x)))) + +;;; offload.scm ends here |