From ed7b44370f71126087eb953f36aad8dc4c44109f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 24 Dec 2018 15:40:04 +0100 Subject: offload: Use (guix inferior) instead of (ssh dist node). Using inferiors and thus 'guix repl' simplifies setup on build machines (no need to worry about GUILE_LOAD_PATH etc.) Furthermore, the 'guix repl -t machine' protocol running in a remote pipe addresses several issues with the current implementation of nodes and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile --listen' process behind it, stateless (since a new process is started each time), more efficient (the SSH channel can be reused), more reliable (no 'pgrep', 'pkill', and shellology; see as an example.) * guix/ssh.scm (inferior-remote-eval): New procedure. (send-files): Use it instead of 'make-node' and 'node-eval'. * guix/scripts/offload.scm (node-guile-version): New procedure. (node-free-disk-space, transfer-and-offload, node-load) (choose-build-machine, assert-node-has-guix): Use 'remote-inferior' instead of 'make-node' and 'inferior-eval' instead of 'node-eval'. (assert-node-can-import, assert-node-can-export): Likewise, and add 'session' parameter. (check-machine-availability): Likewise, and add calls to 'close-inferior' and 'disconnect!'. (check-machine-status): Likewise. * doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in $PATH and $GUILE_LOAD_PATH; mention 'guix' alone. --- doc/guix.texi | 8 ++-- guix/scripts/offload.scm | 107 +++++++++++++++++++++++++---------------------- guix/ssh.scm | 34 ++++++++++----- 3 files changed, 83 insertions(+), 66 deletions(-) diff --git a/doc/guix.texi b/doc/guix.texi index f86a2885a7..c182995b2b 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines. @end table @end deftp -The @code{guile} command must be in the search path on the build -machines. In addition, the Guix modules must be in -@code{$GUILE_LOAD_PATH} on the build machine---you can check whether -this is the case by running: +The @command{guix} command must be in the search path on the build +machines. You can check whether this is the case by running: @example -ssh build-machine guile -c "'(use-modules (guix config))'" +ssh build-machine guix repl --version @end example There is one last thing to do once @file{machines.scm} is in place. As diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index bfdaa3c011..b472d202a9 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -23,13 +23,12 @@ #:use-module (ssh session) #:use-module (ssh channel) #:use-module (ssh popen) - #:use-module (ssh dist) - #:use-module (ssh dist node) #:use-module (ssh version) #:use-module (guix config) #:use-module (guix records) #:use-module (guix ssh) #:use-module (guix store) + #:use-module (guix inferior) #:use-module (guix derivations) #:use-module ((guix serialization) #:select (nar-error? nar-error-file)) @@ -321,12 +320,15 @@ hook." (set-port-revealed! port 1) port)) +(define (node-guile-version node) + (inferior-eval '(version) node)) + (define (node-free-disk-space node) "Return the free disk space, in bytes, in NODE's store." - (node-eval node - `(begin - (use-modules (guix build syscalls)) - (free-disk-space ,(%store-prefix))))) + (inferior-eval `(begin + (use-modules (guix build syscalls)) + (free-disk-space ,(%store-prefix))) + node)) (define* (transfer-and-offload drv machine #:key @@ -367,8 +369,12 @@ MACHINE." (derivation-file-name drv) (build-machine-name machine) (nix-protocol-error-message c)) - (let* ((space (false-if-exception - (node-free-disk-space (make-node session))))) + (let* ((inferior (false-if-exception (remote-inferior session))) + (space (false-if-exception + (node-free-disk-space inferior)))) + + (when inferior + (close-inferior inferior)) ;; Use exit code 100 for a permanent build failure. The daemon ;; interprets other non-zero codes as transient build failures. @@ -417,11 +423,11 @@ of free disk space on '~a'~%") (define (node-load node) "Return the load on NODE. Return +∞ if NODE is misbehaving." - (let ((line (node-eval node - '(begin - (use-modules (ice-9 rdelim)) - (call-with-input-file "/proc/loadavg" - read-string))))) + (let ((line (inferior-eval '(begin + (use-modules (ice-9 rdelim)) + (call-with-input-file "/proc/loadavg" + read-string)) + node))) (if (eof-object? line) +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded (match (string-tokenize line) @@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." ;; Note: We call 'node-load' only as a last resort because it is ;; too costly to call it once for every machine. (let* ((session (false-if-exception (open-ssh-session best))) - (node (and session (make-node session))) + (node (and session (remote-inferior session))) (load (and node (normalized-load best (node-load node)))) (space (and node (node-free-disk-space node)))) + (when node (close-inferior node)) (when session (disconnect! session)) (if (and node (< load 2.) (>= space %minimum-disk-space)) (match others @@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..." (#f (report-guile-error name)) ((? string? version) - ;; Note: The version string already contains the word "Guile". - (info (G_ "'~a' is running ~a~%") + (info (G_ "'~a' is running GNU Guile ~a~%") name (node-guile-version node))))) (define (assert-node-has-guix node name) "Bail out if NODE lacks the (guix) module, or if its daemon is not running." (catch 'node-repl-error (lambda () - (match (node-eval node - '(begin - (use-modules (guix)) - (and add-text-to-store 'alright))) + (match (inferior-eval '(begin + (use-modules (guix)) + (and add-text-to-store 'alright)) + node) ('alright #t) (_ (report-module-error name)))) (lambda (key . args) @@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..." (catch 'node-repl-error (lambda () - (match (node-eval node - '(begin - (use-modules (guix)) - (with-store store - (add-text-to-store store "test" - "Hello, build machine!")))) + (match (inferior-eval '(begin + (use-modules (guix)) + (with-store store + (add-text-to-store store "test" + "Hello, build machine!"))) + node) ((? string? str) (info (G_ "Guix is usable on '~a' (test returned ~s)~%") name str)) @@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..." (string-append name "-" (number->string (random 1000000 (force %random-state))))) -(define (assert-node-can-import node name daemon-socket) +(define (assert-node-can-import session node name daemon-socket) "Bail out if NODE refuses to import our archives." - (let ((session (node-session node))) - (with-store store - (let* ((item (add-text-to-store store "export-test" (nonce))) - (remote (connect-to-remote-daemon session daemon-socket))) - (with-store local - (send-files local (list item) remote)) - - (if (valid-path? remote item) - (info (G_ "'~a' successfully imported '~a'~%") - name item) - (leave (G_ "'~a' was not properly imported on '~a'~%") - item name)))))) - -(define (assert-node-can-export node name daemon-socket) + (with-store store + (let* ((item (add-text-to-store store "export-test" (nonce))) + (remote (connect-to-remote-daemon session daemon-socket))) + (with-store local + (send-files local (list item) remote)) + + (if (valid-path? remote item) + (info (G_ "'~a' successfully imported '~a'~%") + name item) + (leave (G_ "'~a' was not properly imported on '~a'~%") + item name))))) + +(define (assert-node-can-export session node name daemon-socket) "Bail out if we cannot import signed archives from NODE." - (let* ((session (node-session node)) - (remote (connect-to-remote-daemon session daemon-socket)) + (let* ((remote (connect-to-remote-daemon session daemon-socket)) (item (add-text-to-store remote "import-test" (nonce name)))) (with-store store (if (and (retrieve-files store (list item) remote) @@ -701,11 +705,13 @@ machine." (let* ((names (map build-machine-name machines)) (sockets (map build-machine-daemon-socket machines)) (sessions (map open-ssh-session machines)) - (nodes (map make-node sessions))) + (nodes (map remote-inferior sessions))) (for-each assert-node-repl nodes names) (for-each assert-node-has-guix nodes names) - (for-each assert-node-can-import nodes names sockets) - (for-each assert-node-can-export nodes names sockets)))) + (for-each assert-node-can-import sessions nodes names sockets) + (for-each assert-node-can-export sessions nodes names sockets) + (for-each close-inferior nodes) + (for-each disconnect! sessions)))) (define (check-machine-status machine-file pred) "Print the load of each machine matching PRED in MACHINE-FILE." @@ -722,10 +728,11 @@ machine." (length machines) machine-file) (for-each (lambda (machine) (let* ((session (open-ssh-session machine)) - (node (make-node session)) - (uts (node-eval node '(uname))) - (load (node-load node)) - (free (node-free-disk-space node))) + (inferior (remote-inferior session)) + (uts (inferior-eval '(uname) inferior)) + (load (node-load inferior)) + (free (node-free-disk-space inferior))) + (close-inferior inferior) (disconnect! session) (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" diff --git a/guix/ssh.scm b/guix/ssh.scm index b8bea8028a..1ed8406633 100644 --- a/guix/ssh.scm +++ b/guix/ssh.scm @@ -27,8 +27,6 @@ #:use-module (ssh channel) #:use-module (ssh popen) #:use-module (ssh session) - #:use-module (ssh dist) - #:use-module (ssh dist node) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) @@ -102,6 +100,20 @@ Throw an error on failure." "guix" "repl" "-t" "machine"))) (port->inferior pipe))) +(define (inferior-remote-eval exp session) + "Evaluate EXP in a new inferior running in SESSION, and close the inferior +right away." + (let ((inferior (remote-inferior session))) + (dynamic-wind + (const #t) + (lambda () + (inferior-eval exp inferior)) + (lambda () + ;; Close INFERIOR right away to prevent finalization from happening in + ;; another thread at the wrong time (see + ;; .) + (close-inferior inferior))))) + (define* (remote-daemon-channel session #:optional (socket-name @@ -277,15 +289,15 @@ Return the list of store items actually sent." ;; Compute the subset of FILES missing on SESSION and send them. (let* ((files (if recursive? (requisites local files) files)) (session (channel-get-session (nix-server-socket remote))) - (node (make-node session)) - (missing (node-eval node - `(begin - (use-modules (guix) - (srfi srfi-1) (srfi srfi-26)) - - (with-store store - (remove (cut valid-path? store <>) - ',files))))) + (missing (inferior-remote-eval + `(begin + (use-modules (guix) + (srfi srfi-1) (srfi srfi-26)) + + (with-store store + (remove (cut valid-path? store <>) + ',files))) + session)) (count (length missing)) (sizes (map (lambda (item) (path-info-nar-size (query-path-info local item))) -- cgit v1.2.3