aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-12-24 15:40:04 +0100
committerLudovic Courtès <ludo@gnu.org>2018-12-24 16:06:32 +0100
commited7b44370f71126087eb953f36aad8dc4c44109f (patch)
treec9d9e952f956e58b5878ed1cee70d8423b58b531
parentaf15fe13b69d27f9902353540fd8ad0001ce8311 (diff)
downloadpatches-ed7b44370f71126087eb953f36aad8dc4c44109f.tar
patches-ed7b44370f71126087eb953f36aad8dc4c44109f.tar.gz
offload: Use (guix inferior) instead of (ssh dist node).
Using inferiors and thus 'guix repl' simplifies setup on build machines (no need to worry about GUILE_LOAD_PATH etc.) Furthermore, the 'guix repl -t machine' protocol running in a remote pipe addresses several issues with the current implementation of nodes and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile --listen' process behind it, stateless (since a new process is started each time), more efficient (the SSH channel can be reused), more reliable (no 'pgrep', 'pkill', and shellology; see <https://github.com/artyom-poptsov/guile-ssh/issues/11> as an example.) * guix/ssh.scm (inferior-remote-eval): New procedure. (send-files): Use it instead of 'make-node' and 'node-eval'. * guix/scripts/offload.scm (node-guile-version): New procedure. (node-free-disk-space, transfer-and-offload, node-load) (choose-build-machine, assert-node-has-guix): Use 'remote-inferior' instead of 'make-node' and 'inferior-eval' instead of 'node-eval'. (assert-node-can-import, assert-node-can-export): Likewise, and add 'session' parameter. (check-machine-availability): Likewise, and add calls to 'close-inferior' and 'disconnect!'. (check-machine-status): Likewise. * doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in $PATH and $GUILE_LOAD_PATH; mention 'guix' alone.
-rw-r--r--doc/guix.texi8
-rw-r--r--guix/scripts/offload.scm107
-rw-r--r--guix/ssh.scm34
3 files changed, 83 insertions, 66 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index f86a2885a7..c182995b2b 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines.
@end table
@end deftp
-The @code{guile} command must be in the search path on the build
-machines. In addition, the Guix modules must be in
-@code{$GUILE_LOAD_PATH} on the build machine---you can check whether
-this is the case by running:
+The @command{guix} command must be in the search path on the build
+machines. You can check whether this is the case by running:
@example
-ssh build-machine guile -c "'(use-modules (guix config))'"
+ssh build-machine guix repl --version
@end example
There is one last thing to do once @file{machines.scm} is in place. As
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index bfdaa3c011..b472d202a9 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -23,13 +23,12 @@
#:use-module (ssh session)
#:use-module (ssh channel)
#:use-module (ssh popen)
- #:use-module (ssh dist)
- #:use-module (ssh dist node)
#:use-module (ssh version)
#:use-module (guix config)
#:use-module (guix records)
#:use-module (guix ssh)
#:use-module (guix store)
+ #:use-module (guix inferior)
#:use-module (guix derivations)
#:use-module ((guix serialization)
#:select (nar-error? nar-error-file))
@@ -321,12 +320,15 @@ hook."
(set-port-revealed! port 1)
port))
+(define (node-guile-version node)
+ (inferior-eval '(version) node))
+
(define (node-free-disk-space node)
"Return the free disk space, in bytes, in NODE's store."
- (node-eval node
- `(begin
- (use-modules (guix build syscalls))
- (free-disk-space ,(%store-prefix)))))
+ (inferior-eval `(begin
+ (use-modules (guix build syscalls))
+ (free-disk-space ,(%store-prefix)))
+ node))
(define* (transfer-and-offload drv machine
#:key
@@ -367,8 +369,12 @@ MACHINE."
(derivation-file-name drv)
(build-machine-name machine)
(nix-protocol-error-message c))
- (let* ((space (false-if-exception
- (node-free-disk-space (make-node session)))))
+ (let* ((inferior (false-if-exception (remote-inferior session)))
+ (space (false-if-exception
+ (node-free-disk-space inferior))))
+
+ (when inferior
+ (close-inferior inferior))
;; Use exit code 100 for a permanent build failure. The daemon
;; interprets other non-zero codes as transient build failures.
@@ -417,11 +423,11 @@ of free disk space on '~a'~%")
(define (node-load node)
"Return the load on NODE. Return +∞ if NODE is misbehaving."
- (let ((line (node-eval node
- '(begin
- (use-modules (ice-9 rdelim))
- (call-with-input-file "/proc/loadavg"
- read-string)))))
+ (let ((line (inferior-eval '(begin
+ (use-modules (ice-9 rdelim))
+ (call-with-input-file "/proc/loadavg"
+ read-string))
+ node)))
(if (eof-object? line)
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
(match (string-tokenize line)
@@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
;; Note: We call 'node-load' only as a last resort because it is
;; too costly to call it once for every machine.
(let* ((session (false-if-exception (open-ssh-session best)))
- (node (and session (make-node session)))
+ (node (and session (remote-inferior session)))
(load (and node (normalized-load best (node-load node))))
(space (and node (node-free-disk-space node))))
+ (when node (close-inferior node))
(when session (disconnect! session))
(if (and node (< load 2.) (>= space %minimum-disk-space))
(match others
@@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..."
(#f
(report-guile-error name))
((? string? version)
- ;; Note: The version string already contains the word "Guile".
- (info (G_ "'~a' is running ~a~%")
+ (info (G_ "'~a' is running GNU Guile ~a~%")
name (node-guile-version node)))))
(define (assert-node-has-guix node name)
"Bail out if NODE lacks the (guix) module, or if its daemon is not running."
(catch 'node-repl-error
(lambda ()
- (match (node-eval node
- '(begin
- (use-modules (guix))
- (and add-text-to-store 'alright)))
+ (match (inferior-eval '(begin
+ (use-modules (guix))
+ (and add-text-to-store 'alright))
+ node)
('alright #t)
(_ (report-module-error name))))
(lambda (key . args)
@@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..."
(catch 'node-repl-error
(lambda ()
- (match (node-eval node
- '(begin
- (use-modules (guix))
- (with-store store
- (add-text-to-store store "test"
- "Hello, build machine!"))))
+ (match (inferior-eval '(begin
+ (use-modules (guix))
+ (with-store store
+ (add-text-to-store store "test"
+ "Hello, build machine!")))
+ node)
((? string? str)
(info (G_ "Guix is usable on '~a' (test returned ~s)~%")
name str))
@@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..."
(string-append name "-"
(number->string (random 1000000 (force %random-state)))))
-(define (assert-node-can-import node name daemon-socket)
+(define (assert-node-can-import session node name daemon-socket)
"Bail out if NODE refuses to import our archives."
- (let ((session (node-session node)))
- (with-store store
- (let* ((item (add-text-to-store store "export-test" (nonce)))
- (remote (connect-to-remote-daemon session daemon-socket)))
- (with-store local
- (send-files local (list item) remote))
-
- (if (valid-path? remote item)
- (info (G_ "'~a' successfully imported '~a'~%")
- name item)
- (leave (G_ "'~a' was not properly imported on '~a'~%")
- item name))))))
-
-(define (assert-node-can-export node name daemon-socket)
+ (with-store store
+ (let* ((item (add-text-to-store store "export-test" (nonce)))
+ (remote (connect-to-remote-daemon session daemon-socket)))
+ (with-store local
+ (send-files local (list item) remote))
+
+ (if (valid-path? remote item)
+ (info (G_ "'~a' successfully imported '~a'~%")
+ name item)
+ (leave (G_ "'~a' was not properly imported on '~a'~%")
+ item name)))))
+
+(define (assert-node-can-export session node name daemon-socket)
"Bail out if we cannot import signed archives from NODE."
- (let* ((session (node-session node))
- (remote (connect-to-remote-daemon session daemon-socket))
+ (let* ((remote (connect-to-remote-daemon session daemon-socket))
(item (add-text-to-store remote "import-test" (nonce name))))
(with-store store
(if (and (retrieve-files store (list item) remote)
@@ -701,11 +705,13 @@ machine."
(let* ((names (map build-machine-name machines))
(sockets (map build-machine-daemon-socket machines))
(sessions (map open-ssh-session machines))
- (nodes (map make-node sessions)))
+ (nodes (map remote-inferior sessions)))
(for-each assert-node-repl nodes names)
(for-each assert-node-has-guix nodes names)
- (for-each assert-node-can-import nodes names sockets)
- (for-each assert-node-can-export nodes names sockets))))
+ (for-each assert-node-can-import sessions nodes names sockets)
+ (for-each assert-node-can-export sessions nodes names sockets)
+ (for-each close-inferior nodes)
+ (for-each disconnect! sessions))))
(define (check-machine-status machine-file pred)
"Print the load of each machine matching PRED in MACHINE-FILE."
@@ -722,10 +728,11 @@ machine."
(length machines) machine-file)
(for-each (lambda (machine)
(let* ((session (open-ssh-session machine))
- (node (make-node session))
- (uts (node-eval node '(uname)))
- (load (node-load node))
- (free (node-free-disk-space node)))
+ (inferior (remote-inferior session))
+ (uts (inferior-eval '(uname) inferior))
+ (load (node-load inferior))
+ (free (node-free-disk-space inferior)))
+ (close-inferior inferior)
(disconnect! session)
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%"
diff --git a/guix/ssh.scm b/guix/ssh.scm
index b8bea8028a..1ed8406633 100644
--- a/guix/ssh.scm
+++ b/guix/ssh.scm
@@ -27,8 +27,6 @@
#:use-module (ssh channel)
#:use-module (ssh popen)
#:use-module (ssh session)
- #:use-module (ssh dist)
- #:use-module (ssh dist node)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
@@ -102,6 +100,20 @@ Throw an error on failure."
"guix" "repl" "-t" "machine")))
(port->inferior pipe)))
+(define (inferior-remote-eval exp session)
+ "Evaluate EXP in a new inferior running in SESSION, and close the inferior
+right away."
+ (let ((inferior (remote-inferior session)))
+ (dynamic-wind
+ (const #t)
+ (lambda ()
+ (inferior-eval exp inferior))
+ (lambda ()
+ ;; Close INFERIOR right away to prevent finalization from happening in
+ ;; another thread at the wrong time (see
+ ;; <https://bugs.gnu.org/26976>.)
+ (close-inferior inferior)))))
+
(define* (remote-daemon-channel session
#:optional
(socket-name
@@ -277,15 +289,15 @@ Return the list of store items actually sent."
;; Compute the subset of FILES missing on SESSION and send them.
(let* ((files (if recursive? (requisites local files) files))
(session (channel-get-session (nix-server-socket remote)))
- (node (make-node session))
- (missing (node-eval node
- `(begin
- (use-modules (guix)
- (srfi srfi-1) (srfi srfi-26))
-
- (with-store store
- (remove (cut valid-path? store <>)
- ',files)))))
+ (missing (inferior-remote-eval
+ `(begin
+ (use-modules (guix)
+ (srfi srfi-1) (srfi srfi-26))
+
+ (with-store store
+ (remove (cut valid-path? store <>)
+ ',files)))
+ session))
(count (length missing))
(sizes (map (lambda (item)
(path-info-nar-size (query-path-info local item)))