aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/guix.texi30
-rw-r--r--guix/scripts/offload.scm54
2 files changed, 62 insertions, 22 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index 3fc76c8670..553a3b8ae9 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -1081,7 +1081,28 @@ architecture natively supports it, via emulation (@pxref{Transparent
Emulation with QEMU}), or both. Missing prerequisites for the build are
copied over SSH to the target machine, which then proceeds with the
build; upon success the output(s) of the build are copied back to the
-initial machine.
+initial machine. The offload facility comes with a basic scheduler that
+attempts to select the best machine. The best machine is chosen among
+the available machines based on criteria such as:
+
+@enumerate
+@item
+The availability of a build slot. A build machine can have as many
+build slots (connections) as the value of the @code{parallel-builds}
+field of its @code{build-machine} object.
+
+@item
+Its relative speed, as defined via the @code{speed} field of its
+@code{build-machine} object.
+
+@item
+Its load. The normalized machine load must be lower than a threshold
+value, configurable via the @code{overload-threshold} field of its
+@code{build-machine} object.
+
+@item
+Disk space availability. More than a 100 MiB must be available.
+@end enumerate
The @file{/etc/guix/machines.scm} file typically looks like this:
@@ -1185,6 +1206,13 @@ when transferring files to and from build machines.
File name of the Unix-domain socket @command{guix-daemon} is listening
to on that machine.
+@item @code{overload-threshold} (default: @code{0.6})
+The load threshold above which a potential offload machine is
+disregarded by the offload scheduler. The value roughly translates to
+the total processor usage of the build machine, ranging from 0.0 (0%) to
+1.0 (100%). It can also be disabled by setting
+@code{overload-threshold} to @code{#f}.
+
@item @code{parallel-builds} (default: @code{1})
The number of builds that may run in parallel on the machine.
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index 3dc8ccefcb..a5fe98b675 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -88,6 +88,10 @@
(default 3))
(daemon-socket build-machine-daemon-socket ; string
(default "/var/guix/daemon-socket/socket"))
+ ;; A #f value tells the offload scheduler to disregard the load of the build
+ ;; machine when selecting the best offload machine.
+ (overload-threshold build-machine-overload-threshold ; inexact real between
+ (default 0.6)) ; 0.0 and 1.0 | #f
(parallel-builds build-machine-parallel-builds ; number
(default 1))
(speed build-machine-speed ; inexact real
@@ -391,30 +395,34 @@ of free disk space on '~a'~%")
(* 100 (expt 2 20))) ;100 MiB
(define (node-load node)
- "Return the load on NODE. Return +∞ if NODE is misbehaving."
+ "Return the load on NODE, a normalized value between 0.0 and 1.0. The value
+is derived from /proc/loadavg and normalized according to the number of
+logical cores available, to give a rough estimation of CPU usage. Return
+1.0 (fully loaded) if NODE is misbehaving."
(let ((line (inferior-eval '(begin
(use-modules (ice-9 rdelim))
(call-with-input-file "/proc/loadavg"
read-string))
- node)))
- (if (eof-object? line)
- +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
+ node))
+ (ncores (inferior-eval '(begin
+ (use-modules (ice-9 threads))
+ (current-processor-count))
+ node)))
+ (if (or (eof-object? line) (eof-object? ncores))
+ 1.0 ;MACHINE does not respond, so assume it is fully loaded
(match (string-tokenize line)
((one five fifteen . x)
- (string->number one))
+ (let ((load (/ (string->number one) ncores)))
+ (if (> load 1.0)
+ 1.0
+ load)))
(x
- +inf.0)))))
-
-(define (normalized-load machine load)
- "Divide LOAD by the number of parallel builds of MACHINE."
- (if (rational? load)
- (let* ((jobs (build-machine-parallel-builds machine))
- (normalized (/ load jobs)))
- (format (current-error-port) "load on machine '~a' is ~s\
- (normalized: ~s)~%"
- (build-machine-name machine) load normalized)
- normalized)
- load))
+ 1.0)))))
+
+(define (report-load machine load)
+ (format (current-error-port)
+ "normalized load on machine '~a' is ~,2f~%"
+ (build-machine-name machine) load))
(define (random-seed)
(logxor (getpid) (car (gettimeofday))))
@@ -472,11 +480,15 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
(let* ((session (false-if-exception (open-ssh-session best
%short-timeout)))
(node (and session (remote-inferior session)))
- (load (and node (normalized-load best (node-load node))))
+ (load (and node (node-load node)))
+ (threshold (build-machine-overload-threshold best))
(space (and node (node-free-disk-space node))))
+ (when load (report-load best load))
(when node (close-inferior node))
(when session (disconnect! session))
- (if (and node (< load 2.) (>= space %minimum-disk-space))
+ (if (and node
+ (or (not threshold) (< load threshold))
+ (>= space %minimum-disk-space))
(match others
(((machines slots) ...)
;; Release slots from the uninteresting machines.
@@ -708,13 +720,13 @@ machine."
(free (node-free-disk-space inferior)))
(close-inferior inferior)
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
- host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%\
+ host name: ~a~% normalized load: ~,2f~% free disk space: ~,2f MiB~%\
time difference: ~a s~%"
(build-machine-name machine)
(utsname:sysname uts) (utsname:release uts)
(utsname:machine uts)
(utsname:nodename uts)
- (normalized-load machine load)
+ load
(/ free (expt 2 20) 1.)
(- time now))))))))