aboutsummaryrefslogtreecommitdiff
path: root/guix/scripts/offload.scm
diff options
context:
space:
mode:
Diffstat (limited to 'guix/scripts/offload.scm')
-rw-r--r--guix/scripts/offload.scm380
1 files changed, 380 insertions, 0 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
new file mode 100644
index 0000000000..d919ede3c7
--- /dev/null
+++ b/guix/scripts/offload.scm
@@ -0,0 +1,380 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2014 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix scripts offload)
+ #:use-module (guix config)
+ #:use-module (guix records)
+ #:use-module (guix store)
+ #:use-module (guix derivations)
+ #:use-module (guix nar)
+ #:use-module (guix utils)
+ #:use-module ((guix build utils) #:select (which))
+ #:use-module (guix ui)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-26)
+ #:use-module (srfi srfi-34)
+ #:use-module (srfi srfi-35)
+ #:use-module (ice-9 popen)
+ #:use-module (ice-9 rdelim)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 regex)
+ #:use-module (ice-9 format)
+ #:use-module (rnrs io ports)
+ #:export (build-machine
+ build-requirements
+ guix-offload))
+
+;;; Commentary:
+;;;
+;;; Attempt to offload builds to the machines listed in
+;;; /etc/guix/machines.scm, transferring missing dependencies over SSH, and
+;;; retrieving the build output(s) over SSH upon success.
+;;;
+;;; This command should not be used directly; instead, it is called on-demand
+;;; by the daemon, unless it was started with '--no-build-hook' or a client
+;;; inhibited build hooks.
+;;;
+;;; Code:
+
+
+(define-record-type* <build-machine>
+ build-machine make-build-machine
+ build-machine?
+ (name build-machine-name) ; string
+ (system build-machine-system) ; string
+ (user build-machine-user) ; string
+ (private-key build-machine-private-key ; file name
+ (default (user-lsh-private-key)))
+ (parallel-builds build-machine-parallel-builds ; number
+ (default 1))
+ (speed build-machine-speed ; inexact real
+ (default 1.0))
+ (features build-machine-features ; list of strings
+ (default '())))
+
+(define-record-type* <build-requirements>
+ build-requirements make-build-requirements
+ build-requirements?
+ (system build-requirements-system) ; string
+ (features build-requirements-features ; list of strings
+ (default '())))
+
+(define %machine-file
+ ;; File that lists machines available as build slaves.
+ (string-append %config-directory "/machines.scm"))
+
+(define %lsh-command
+ "lsh")
+
+(define %lshg-command
+ ;; FIXME: 'lshg' fails to pass large amounts of data, see
+ ;; <http://lists.lysator.liu.se/pipermail/lsh-bugs/2014q1/000639.html>.
+ "lsh")
+
+(define (user-lsh-private-key)
+ "Return the user's default lsh private key, or #f if it could not be
+determined."
+ (and=> (getenv "HOME")
+ (cut string-append <> "/.lsh/identity")))
+
+(define %user-module
+ ;; Module in which the machine description file is loaded.
+ (let ((module (make-fresh-user-module)))
+ (module-use! module (resolve-interface '(guix scripts offload)))
+ module))
+
+(define* (build-machines #:optional (file %machine-file))
+ "Read the list of build machines from FILE and return it."
+ (catch #t
+ (lambda ()
+ ;; Avoid ABI incompatibility with the <build-machine> record.
+ (set! %fresh-auto-compile #t)
+
+ (save-module-excursion
+ (lambda ()
+ (set-current-module %user-module)
+ (primitive-load %machine-file))))
+ (lambda args
+ (match args
+ (('system-error . _)
+ (let ((err (system-error-errno args)))
+ ;; Silently ignore missing file since this is a common case.
+ (if (= ENOENT err)
+ '()
+ (leave (_ "failed to open machine file '~a': ~a~%")
+ %machine-file (strerror err)))))
+ (_
+ (leave (_ "failed to load machine file '~a': ~s~%")
+ %machine-file args))))))
+
+(define (open-ssh-gateway machine)
+ "Initiate an SSH connection gateway to MACHINE, and return the PID of the
+running lsh gateway upon success, or #f on failure."
+ (catch 'system-error
+ (lambda ()
+ (let* ((port (open-pipe* OPEN_READ %lsh-command
+ "-l" (build-machine-user machine)
+ "-i" (build-machine-private-key machine)
+ ;; XXX: With lsh 2.1, passing '--write-pid'
+ ;; last causes the PID not to be printed.
+ "--write-pid" "--gateway" "--background" "-z"
+ (build-machine-name machine)))
+ (line (read-line port))
+ (status (close-pipe port)))
+ (if (zero? status)
+ (let ((pid (string->number line)))
+ (if (integer? pid)
+ pid
+ (begin
+ (warning (_ "'~a' did not write its PID on stdout: ~s~%")
+ %lsh-command line)
+ #f)))
+ (begin
+ (warning (_ "failed to initiate SSH connection to '~a':\
+ '~a' exited with ~a~%")
+ (build-machine-name machine)
+ %lsh-command
+ (status:exit-val status))
+ #f))))
+ (lambda args
+ (leave (_ "failed to execute '~a': ~a~%")
+ %lsh-command (strerror (system-error-errno args))))))
+
+(define (remote-pipe machine mode command)
+ "Run COMMAND on MACHINE, assuming an lsh gateway has been set up."
+ (catch 'system-error
+ (lambda ()
+ (apply open-pipe* mode %lshg-command
+ "-l" (build-machine-user machine) "-z"
+ (build-machine-name machine)
+ command))
+ (lambda args
+ (warning (_ "failed to execute '~a': ~a~%")
+ %lshg-command (strerror (system-error-errno args)))
+ #f)))
+
+(define* (offload drv machine
+ #:key print-build-trace? (max-silent-time 3600)
+ (build-timeout 7200))
+ "Perform DRV on MACHINE, assuming DRV and its prerequisites are available
+there. Return a read pipe from where to read the build log."
+ (format (current-error-port) "offloading '~a' to '~a'...~%"
+ (derivation-file-name drv) (build-machine-name machine))
+ (format (current-error-port) "@ build-remote ~a ~a~%"
+ (derivation-file-name drv) (build-machine-name machine))
+
+ ;; FIXME: Protect DRV from garbage collection on MACHINE.
+ (let ((pipe (remote-pipe machine OPEN_READ
+ `("guix" "build"
+ ;; FIXME: more options
+ ,(format #f "--max-silent-time=~a"
+ max-silent-time)
+ ,(derivation-file-name drv)))))
+ pipe))
+
+(define (send-files files machine)
+ "Send the subset of FILES that's missing to MACHINE's store. Return #t on
+success, #f otherwise."
+ (define (missing-files files)
+ ;; Return the subset of FILES not already on MACHINE.
+ (let* ((files (format #f "~{~a~%~}" files))
+ (missing (filtered-port
+ (list (which %lshg-command)
+ "-l" (build-machine-user machine)
+ "-i" (build-machine-private-key machine)
+ (build-machine-name machine)
+ "guix" "archive" "--missing")
+ (open-input-string files))))
+ (string-tokenize (get-string-all missing))))
+
+ (with-store store
+ (guard (c ((nix-protocol-error? c)
+ (warning (_ "failed to export files for '~a': ~s~%")
+ (build-machine-name machine)
+ c)
+ (false-if-exception (close-pipe pipe))
+ #f))
+
+ ;; Compute the subset of FILES missing on MACHINE, and send them in
+ ;; topologically sorted order so that they can actually be imported.
+ (let ((files (missing-files (topologically-sorted store files)))
+ (pipe (remote-pipe machine OPEN_WRITE
+ '("guix" "archive" "--import"))))
+ (format #t (_ "sending ~a store files to '~a'...~%")
+ (length files) (build-machine-name machine))
+ (catch 'system-error
+ (lambda ()
+ (export-paths store files pipe))
+ (lambda args
+ (warning (_ "failed while exporting files to '~a': ~a~%")
+ (build-machine-name machine)
+ (strerror (system-error-errno args)))))
+ (zero? (close-pipe pipe))))))
+
+(define (retrieve-files files machine)
+ "Retrieve FILES from MACHINE's store, and import them."
+ (define host
+ (build-machine-name machine))
+
+ (let ((pipe (remote-pipe machine OPEN_READ
+ `("guix" "archive" "--export" ,@files))))
+ (and pipe
+ (with-store store
+ (guard (c ((nix-protocol-error? c)
+ (warning (_ "failed to import files from '~a': ~s~%")
+ host c)
+ #f))
+ (format (current-error-port) "retrieving ~a files from '~a'...~%"
+ (length files) host)
+
+ ;; We cannot use the 'import-paths' RPC here because we already
+ ;; hold the locks for FILES.
+ (restore-file-set pipe
+ #:log-port (current-error-port)
+ #:lock? #f)
+
+ (zero? (close-pipe pipe)))))))
+
+(define (machine-matches? machine requirements)
+ "Return #t if MACHINE matches REQUIREMENTS."
+ (and (string=? (build-requirements-system requirements)
+ (build-machine-system machine))
+ (lset<= string=?
+ (build-requirements-features requirements)
+ (build-machine-features machine))))
+
+(define (machine-faster? m1 m2)
+ "Return #t if M1 is faster than M2."
+ (> (build-machine-speed m1) (build-machine-speed m2)))
+
+(define (choose-build-machine requirements machines)
+ "Return the best machine among MACHINES fulfilling REQUIREMENTS, or #f."
+ ;; FIXME: Take machine load into account, and/or shuffle MACHINES.
+ (let ((machines (sort (filter (cut machine-matches? <> requirements)
+ machines)
+ machine-faster?)))
+ (match machines
+ ((head . _)
+ head)
+ (_ #f))))
+
+(define* (process-request wants-local? system drv features
+ #:key
+ print-build-trace? (max-silent-time 3600)
+ (build-timeout 7200))
+ "Process a request to build DRV."
+ (let* ((local? (and wants-local? (string=? system (%current-system))))
+ (reqs (build-requirements
+ (system system)
+ (features features)))
+ (machine (choose-build-machine reqs (build-machines))))
+ (if machine
+ (match (open-ssh-gateway machine)
+ ((? integer? pid)
+ (display "# accept\n")
+ (let ((inputs (string-tokenize (read-line)))
+ (outputs (string-tokenize (read-line))))
+ (when (send-files (cons (derivation-file-name drv) inputs)
+ machine)
+ (let ((log (offload drv machine
+ #:print-build-trace? print-build-trace?
+ #:max-silent-time max-silent-time
+ #:build-timeout build-timeout)))
+ (let loop ((line (read-line log)))
+ (if (eof-object? line)
+ (close-pipe log)
+ (begin
+ (display line) (newline)
+ (loop (read-line log))))))
+ (retrieve-files outputs machine)))
+ (format (current-error-port) "done with offloaded '~a'~%"
+ (derivation-file-name drv))
+ (kill pid SIGTERM))
+ (#f
+ (display "# decline\n")))
+ (display "# decline\n"))))
+
+(define-syntax-rule (with-nar-error-handling body ...)
+ "Execute BODY with any &nar-error suitably reported to the user."
+ (guard (c ((nar-error? c)
+ (let ((file (nar-error-file c)))
+ (if (condition-has-type? c &message)
+ (leave (_ "while importing file '~a': ~a~%")
+ file (gettext (condition-message c)))
+ (leave (_ "failed to import file '~a'~%")
+ file)))))
+ body ...))
+
+
+;;;
+;;; Entry point.
+;;;
+
+(define (guix-offload . args)
+ (define request-line-rx
+ ;; The request format. See 'tryBuildHook' method in build.cc.
+ (make-regexp "([01]) ([a-z0-9_-]+) (/[[:graph:]]+.drv) ([[:graph:]]*)"))
+
+ (define not-coma
+ (char-set-complement (char-set #\,)))
+
+ ;; Make sure $HOME really corresponds to the current user. This is
+ ;; necessary since lsh uses that to determine the location of the yarrow
+ ;; seed file, and fails if it's owned by someone else.
+ (and=> (passwd:dir (getpw (getuid)))
+ (cut setenv "HOME" <>))
+
+ (match args
+ ((system max-silent-time print-build-trace? build-timeout)
+ (let ((max-silent-time (string->number max-silent-time))
+ (build-timeout (string->number build-timeout))
+ (print-build-trace? (string=? print-build-trace? "1")))
+ (parameterize ((%current-system system))
+ (let loop ((line (read-line)))
+ (unless (eof-object? line)
+ (cond ((regexp-exec request-line-rx line)
+ =>
+ (lambda (match)
+ (with-nar-error-handling
+ (process-request (equal? (match:substring match 1) "1")
+ (match:substring match 2) ; system
+ (call-with-input-file
+ (match:substring match 3)
+ read-derivation)
+ (string-tokenize
+ (match:substring match 4) not-coma)
+ #:print-build-trace? print-build-trace?
+ #:max-silent-time max-silent-time
+ #:build-timeout build-timeout))))
+ (else
+ (leave (_ "invalid request line: ~s~%") line)))
+ (loop (read-line)))))))
+ (("--version")
+ (show-version-and-exit "guix offload"))
+ (("--help")
+ (format #t (_ "Usage: guix offload SYSTEM PRINT-BUILD-TRACE
+Process build offload requests written on the standard input, possibly
+offloading builds to the machines listed in '~a'.~%")
+ %machine-file)
+ (display (_ "
+This tool is meant to be used internally by 'guix-daemon'.\n"))
+ (show-bug-report-information))
+ (x
+ (leave (_ "invalid arguments: ~{~s ~}~%") x))))
+
+;;; offload.scm ends here