aboutsummaryrefslogtreecommitdiff
path: root/gnu/system/linux-container.scm
blob: c780b68fba471394811bfc3e4f98b34cce79ec3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 David Thompson <davet@gnu.org>
;;; Copyright © 2016-2017, 2019-2023 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2019 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2020 Google LLC
;;; Copyright © 2022 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2023 Pierre Langlois <pierre.langlois@gmx.com>
;;; Copyright © 2024 Leo Nikkilä <hello@lnikki.la>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu system linux-container)
  #:use-module (ice-9 match)
  #:use-module (srfi srfi-1)
  #:use-module (guix config)
  #:use-module (guix store)
  #:use-module (guix gexp)
  #:use-module (guix derivations)
  #:use-module (guix monads)
  #:use-module (guix modules)
  #:use-module (gnu build linux-container)
  #:use-module (gnu services)
  #:use-module (gnu services base)
  #:use-module (gnu services networking)
  #:use-module (gnu services shepherd)
  #:use-module (gnu system)
  #:use-module (gnu system file-systems)
  #:export (system-container
            containerized-operating-system
            container-script
            eval/container))

(define* (container-essential-services os #:key shared-network?)
  "Return a list of essential services corresponding to OS, a
non-containerized OS.  This procedure essentially strips essential services
from OS that are needed on the bare metal and not in a container."
  (define base
    (remove (lambda (service)
              (memq (service-kind service)
                    (cons* (service-kind %linux-bare-metal-service)
                           firmware-service-type
                           system-service-type
                           (if shared-network?
                               (list hosts-service-type)
                               '()))))
            (operating-system-essential-services os)))

  (cons (service system-service-type
                 `(("locale" ,(operating-system-locale-directory os))))
        ;; If network is to be shared with the host, remove network
        ;; configuration files from etc-service.
        (if shared-network?
            (modify-services base
              (etc-service-type
               files => (remove
                         (match-lambda
                           ((filename _)
                            (member filename
                                    (map basename %network-configuration-files))))
                         files)))
            base)))

(define dummy-networking-service-type
  (shepherd-service-type
   'dummy-networking
   (const (shepherd-service
           (documentation "Provide loopback and networking without actually
doing anything.")
           (provision '(loopback networking))
           (start #~(const #t))))
   #f
   (description "Provide loopback and networking without actually doing
anything.  This service is used by guest systems running in containers, where
networking support is provided by the host.")))

(define %nscd-container-caches
  ;; Similar to %nscd-default-caches but with smaller cache sizes. This allows
  ;; many containers to coexist on the same machine without exhausting RAM.
  (map (lambda (cache)
         (nscd-cache
          (inherit cache)
          (max-database-size (expt 2 18)))) ;256KiB
       %nscd-default-caches))

(define* (containerized-operating-system os mappings
                                         #:key
                                         shared-network?
                                         (extra-file-systems '()))
  "Return an operating system based on OS for use in a Linux container
environment.  MAPPINGS is a list of <file-system-mapping> to realize in the
containerized OS.  EXTRA-FILE-SYSTEMS is a list of file systems to add to OS."
  (define user-file-systems
    (remove (lambda (fs)
              (let ((target (file-system-mount-point fs))
                    (source (file-system-device fs)))
                (or (string=? target (%store-prefix))
                    (string=? target "/")
                    (and (string? source)
                         (string-prefix? "/dev/" source))
                    (string-prefix? "/dev/" target)
                    (string-prefix? "/sys/" target))))
            (operating-system-file-systems os)))

  (define (mapping->fs fs)
    (file-system (inherit (file-system-mapping->bind-mount fs))
      (needed-for-boot? #t)))

  (define services-to-drop
    ;; Service types to filter from the original operating-system. Some of
    ;; these make no sense in a container (e.g., those that access
    ;; /dev/tty[0-9]), while others just need to be reinstantiated with
    ;; different configs that are better suited to containers.
    (append (list console-font-service-type
                  mingetty-service-type
                  agetty-service-type)
            (if shared-network?
                ;; Replace these with dummy-networking-service-type below.
                (list
                 static-networking-service-type
                 dhcp-client-service-type
                 network-manager-service-type
                 connman-service-type)
                (list))))

  (define services-to-add
    ;; Many Guix services depend on a 'networking' shepherd
    ;; service, so make sure to provide a dummy 'networking'
    ;; service when we are sure that networking is already set up
    ;; in the host and can be used.  That prevents double setup.
    (if shared-network?
        (list (service dummy-networking-service-type))
        '()))

  (define os-with-base-essential-services
    (operating-system
      (inherit os)
      (swap-devices '()) ; disable swap
      (services
       (append services-to-add
               (filter-map (lambda (s)
                             (cond ((memq (service-kind s) services-to-drop)
                                    #f)
                                   ((eq? nscd-service-type (service-kind s))
                                    (service nscd-service-type
                                             (nscd-configuration
                                              (inherit (service-value s))
                                              (caches %nscd-container-caches))))
                                   ((eq? guix-service-type (service-kind s))
                                    ;; Pass '--disable-chroot' so that
                                    ;; guix-daemon can build thing even in
                                    ;; Docker without '--privileged'.
                                    (service guix-service-type
                                             (guix-configuration
                                              (inherit (service-value s))
                                              (extra-options
                                               (cons "--disable-chroot"
                                                     (guix-configuration-extra-options
                                                      (service-value s)))))))
                                   (else s)))
                           (operating-system-user-services os))))
      (file-systems (append (map mapping->fs
                                 (if shared-network?
                                     (append %network-file-mappings mappings)
                                     mappings))
                            extra-file-systems
                            user-file-systems

                            ;; Provide a dummy root file system so we can create
                            ;; a 'boot-parameters' file.
                            (list (file-system
                                    (mount-point "/")
                                    (device "nothing")
                                    (type "dummy")))))))

  ;; `essential-services' is thunked, we need to evaluate it separately.
  (operating-system
    (inherit os-with-base-essential-services)
    (essential-services (container-essential-services
                         os-with-base-essential-services
                         #:shared-network? shared-network?))))

(define* (container-script os #:key (mappings '()) shared-network?)
  "Return a derivation of a script that runs OS as a Linux container.
MAPPINGS is a list of <file-system> objects that specify the files/directories
that will be shared with the host system."
  (define (mountable-file-system? file-system)
    ;; Return #t if FILE-SYSTEM should be mounted in the container.
    (and (not (string=? "/" (file-system-mount-point file-system)))
         (file-system-needed-for-boot? file-system)))

  (define (os-file-system-specs os)
    (map file-system->spec
         (filter mountable-file-system?
                 (operating-system-file-systems os))))

  (let* ((os (containerized-operating-system
              os (cons %store-mapping mappings)
              #:shared-network? shared-network?
              #:extra-file-systems %container-file-systems))
         (specs (os-file-system-specs os)))

    (define script
      (with-imported-modules (source-module-closure
                              '((guix build utils)
                                (gnu build linux-container)
                                (guix i18n)
                                (guix diagnostics)))
        #~(begin
            (use-modules (gnu build linux-container)
                         (gnu system file-systems) ;spec->file-system
                         (guix build utils)
                         (guix i18n)
                         (guix diagnostics)
                         (srfi srfi-1)
                         (srfi srfi-37)
                         (ice-9 match))

            (define (show-help)
              (display (G_ "Usage: run-container [OPTION ...]
Run the container with the given options."))
              (newline)
              (display (G_ "
      --share=SPEC       share host file system with read/write access
                         according to SPEC"))
              (display (G_ "
      --expose=SPEC      expose host file system directory as read-only
                         according to SPEC"))
              (newline)
              (display (G_ "
  -h, --help             display this help and exit"))
              (newline))

            (define %options
              ;; Specifications of the command-line options.
              (list (option '(#\h "help") #f #f
                            (lambda args
                              (show-help)
                              (exit 0)))
                    (option '("share") #t #f
                            (lambda (opt name arg result)
                              (alist-cons 'file-system-mapping
                                          (specification->file-system-mapping arg #t)
                                          result)))
                    (option '("expose") #t #f
                            (lambda (opt name arg result)
                              (alist-cons 'file-system-mapping
                                          (specification->file-system-mapping arg #f)
                                          result)))))

            (define (parse-options args options)
              (args-fold args options
                         (lambda (opt name arg . rest)
                           (report-error (G_ "~A: unrecognized option~%") name)
                           (exit 1))
                         (lambda (op res) (cons op res))
                         '()))

            (define (explain pid)
              ;; XXX: We can't quite call 'bindtextdomain' so there's actually
              ;; no i18n.
              ;; XXX: Should we really give both options? 'guix container exec'
              ;; is a more verbose command.  Hard to fail to enter the container
              ;; when we list two options.
              (info (G_ "system container is running as PID ~a~%") pid)
              (info (G_ "Run 'sudo guix container exec ~a /run/current-system/profile/bin/bash --login'\n")
                    pid)
              (info (G_ "or run 'sudo nsenter -a -t ~a' to get a shell into it.~%") pid)
              (newline (guix-warning-port)))

            (let* ((opts (parse-options (cdr (command-line)) %options))
                   (mappings (filter-map (match-lambda
                                           (('file-system-mapping . mapping) mapping)
                                           (_ #f))
                                         opts))
                   (file-systems
                    (filter-map (lambda (fs)
                                  (let ((flags (file-system-flags fs)))
                                    (and (or (not (memq 'bind-mount flags))
                                             (file-exists? (file-system-device fs)))
                                         fs)))
                                (append (map file-system-mapping->bind-mount mappings)
                                        (map spec->file-system '#$specs)))))
              (call-with-container file-systems
                (lambda ()
                  (setenv "HOME" "/root")
                  (setenv "TMPDIR" "/tmp")
                  (setenv "GUIX_NEW_SYSTEM" #$os)
                  (for-each mkdir-p '("/run" "/bin" "/etc" "/home" "/var"))
                  (primitive-load (string-append #$os "/boot")))
                ;; A range of 65536 uid/gids is used to cover 16 bits worth of
                ;; users and groups, which is sufficient for most cases.
                ;;
                ;; See: http://www.freedesktop.org/software/systemd/man/systemd-nspawn.html#--private-users=
                #:host-uids 65536
                #:namespaces (if #$shared-network?
                                 (delq 'net %namespaces)
                                 %namespaces)
                #:process-spawned-hook explain)))))

    (gexp->script "run-container" script)))

(define* (eval/container exp
                         #:key
                         (mappings '())
                         (namespaces %namespaces)
                         (guest-uid 0) (guest-gid 0))
  "Evaluate EXP, a gexp, in a new process executing in separate namespaces as
listed in NAMESPACES.  Add MAPPINGS, a list of <file-system-mapping>, to the
set of directories visible in the process's mount namespace.  Inside the
namespaces, run code as GUEST-UID and GUEST-GID.  Return the process' exit
status as a monadic value.

This is useful to implement processes that, unlike derivations, are not
entirely pure and need to access the outside world or to perform side
effects."
  (mlet %store-monad ((lowered (lower-gexp exp)))
    (define inputs
      (cons (lowered-gexp-guile lowered)
            (lowered-gexp-inputs lowered)))

    (define items
      (append (append-map derivation-input-output-paths inputs)
              (lowered-gexp-sources lowered)))

    (mbegin %store-monad
      (built-derivations inputs)
      (mlet %store-monad ((closure ((store-lift requisites) items)))
        (return (call-with-container (map file-system-mapping->bind-mount
                                          (append (map (lambda (item)
                                                         (file-system-mapping
                                                          (source item)
                                                          (target source)))
                                                       closure)
                                                  mappings))
                  (lambda ()
                    (apply execl
                           (string-append (derivation-input-output-path
                                           (lowered-gexp-guile lowered))
                                          "/bin/guile")
                           "guile"
                           (append (append-map (lambda (directory)
                                                 `("-L" ,directory))
                                               (lowered-gexp-load-path lowered))
                                   (append-map (lambda (directory)
                                                 `("-C" ,directory))
                                               (lowered-gexp-load-compiled-path
                                                lowered))
                                   (list "-c"
                                         (object->string
                                          (lowered-gexp-sexp lowered))))))
                  #:namespaces namespaces
                  #:guest-uid guest-uid
                  #:guest-gid guest-gid))))))