aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/aux-files/run-in-namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/aux-files/run-in-namespace.c')
-rw-r--r--gnu/packages/aux-files/run-in-namespace.c313
1 files changed, 313 insertions, 0 deletions
diff --git a/gnu/packages/aux-files/run-in-namespace.c b/gnu/packages/aux-files/run-in-namespace.c
new file mode 100644
index 0000000000..f0cff88552
--- /dev/null
+++ b/gnu/packages/aux-files/run-in-namespace.c
@@ -0,0 +1,313 @@
+/* GNU Guix --- Functional package management for GNU
+ Copyright (C) 2018 Ludovic Courtès <ludo@gnu.org>
+
+ This file is part of GNU Guix.
+
+ GNU Guix is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or (at
+ your option) any later version.
+
+ GNU Guix is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
+ mount namespace where the store is mounted in its right place.
+
+ We would happily do that in Scheme using 'call-with-container'. However,
+ this very program needs to be relocatable, so it needs to be statically
+ linked, which complicates things (Guile's modules can hardly be "linked"
+ into a single executable.) */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <errno.h>
+#include <libgen.h>
+#include <limits.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/syscall.h>
+
+/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
+ caller must eventually free. */
+static char *
+concat (const char *directory, const char *file)
+{
+ char *result = malloc (strlen (directory) + 2 + strlen (file));
+ assert (result != NULL);
+
+ strcpy (result, directory);
+ strcat (result, "/");
+ strcat (result, file);
+ return result;
+}
+
+static void
+mkdir_p (const char *directory)
+{
+ if (strcmp (directory, "/") != 0)
+ {
+ char *parent = dirname (strdupa (directory));
+ mkdir_p (parent);
+ int err = mkdir (directory, 0700);
+ if (err < 0 && errno != EEXIST)
+ assert_perror (errno);
+ }
+}
+
+static void
+rm_rf (const char *directory)
+{
+ DIR *stream = opendir (directory);
+
+ for (struct dirent *entry = readdir (stream);
+ entry != NULL;
+ entry = readdir (stream))
+ {
+ if (strcmp (entry->d_name, ".") == 0
+ || strcmp (entry->d_name, "..") == 0)
+ continue;
+
+ char *full = concat (directory, entry->d_name);
+
+ int err = unlink (full);
+ if (err < 0)
+ {
+ if (errno == EISDIR)
+ /* Recurse (we expect a shallow directory structure so there's
+ little risk of stack overflow.) */
+ rm_rf (full);
+ else
+ assert_perror (errno);
+ }
+
+ free (full);
+ }
+
+ closedir (stream);
+
+ int err = rmdir (directory);
+ if (err < 0 && errno != ENOENT)
+ assert_perror (errno);
+}
+
+/* Bind mount all the top-level entries in SOURCE to TARGET. */
+static void
+bind_mount (const char *source, const char *target)
+{
+ DIR *stream = opendir (source);
+
+ for (struct dirent *entry = readdir (stream);
+ entry != NULL;
+ entry = readdir (stream))
+ {
+ /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
+ for now. */
+ assert (entry->d_type != DT_UNKNOWN);
+
+ if (strcmp (entry->d_name, ".") == 0
+ || strcmp (entry->d_name, "..") == 0)
+ continue;
+
+ char *abs_source = concat (source, entry->d_name);
+ char *new_entry = concat (target, entry->d_name);
+
+ if (entry->d_type == DT_LNK)
+ {
+ char target[PATH_MAX];
+
+ ssize_t result = readlink (abs_source, target, sizeof target - 1);
+ if (result > 0)
+ {
+ target[result] = '\0';
+ int err = symlink (target, new_entry);
+ if (err < 0)
+ assert_perror (errno);
+ }
+ }
+ else
+ {
+ /* Create the mount point. */
+ if (entry->d_type == DT_DIR)
+ {
+ int err = mkdir (new_entry, 0700);
+ if (err != 0)
+ assert_perror (errno);
+ }
+ else
+ close (open (new_entry, O_WRONLY | O_CREAT));
+
+ int err = mount (abs_source, new_entry, "none",
+ MS_BIND | MS_REC | MS_RDONLY, NULL);
+
+ /* It used to be that only directories could be bind-mounted. Thus,
+ keep going if we fail to bind-mount a non-directory entry.
+ That's OK because regular files in the root file system are
+ usually uninteresting. */
+ if (err != 0 && entry->d_type != DT_DIR)
+ assert_perror (errno);
+
+ free (new_entry);
+ free (abs_source);
+ }
+ }
+
+ closedir (stream);
+}
+
+/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
+ user_namespaces(7). */
+static void
+write_id_map (pid_t pid, const char *file, int id)
+{
+ char id_map_file[100];
+ snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
+
+ char id_map[100];
+
+ /* Map root and the current user. */
+ int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
+ int fd = open (id_map_file, O_WRONLY);
+ if (fd < 0)
+ assert_perror (errno);
+
+ int n = write (fd, id_map, len);
+ if (n < 0)
+ assert_perror (errno);
+
+ close (fd);
+}
+
+/* Disallow setgroups(2) for PID. */
+static void
+disallow_setgroups (pid_t pid)
+{
+ char file[100];
+
+ snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
+
+ int fd = open (file, O_WRONLY);
+ if (fd < 0)
+ assert_perror (errno);
+
+ int err = write (fd, "deny", 5);
+ if (err < 0)
+ assert_perror (errno);
+
+ close (fd);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ ssize_t size;
+ char self[PATH_MAX];
+ size = readlink ("/proc/self/exe", self, sizeof self - 1);
+ assert (size > 0);
+
+ /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
+ want to extract "/home/ludo/.local/gnu/store". */
+ size_t index = strlen (self)
+ - strlen ("@WRAPPED_PROGRAM@")
+ + strlen ("@STORE_DIRECTORY@");
+ char *store = strdup (self);
+ store[index] = '\0';
+
+ struct stat statbuf;
+
+ /* If STORE is already at the "right" place, we can execute
+ @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
+ needed when running one of these wrappers from within an unshare'd
+ namespace, because 'unshare' fails with EPERM in that context. */
+ if (strcmp (store, "@STORE_DIRECTORY@") != 0
+ && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
+ {
+ /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
+ bind-mounted in the right place. */
+ int err;
+ char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
+ char *new_store = concat (new_root, "@STORE_DIRECTORY@");
+ char *cwd = get_current_dir_name ();
+
+ /* Create a child with separate namespaces and set up bind-mounts from
+ there. That way, bind-mounts automatically disappear when the child
+ exits, which simplifies cleanup for the parent. Note: clone is more
+ convenient than fork + unshare since the parent can directly write
+ the child uid_map/gid_map files. */
+ pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
+ NULL, NULL, NULL);
+ switch (child)
+ {
+ case 0:
+ /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
+ we cannot make NEW_ROOT a tmpfs (which would have saved the need
+ for 'rm_rf'.) */
+ bind_mount ("/", new_root);
+ mkdir_p (new_store);
+ err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
+ NULL);
+ if (err < 0)
+ assert_perror (errno);
+
+ chdir (new_root);
+ err = chroot (new_root);
+ if (err < 0)
+ assert_perror (errno);
+
+ /* Change back to where we were before chroot'ing. */
+ chdir (cwd);
+ break;
+
+ case -1:
+ fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
+ fprintf (stderr, "\
+This may be because \"user namespaces\" are not supported on this system.\n\
+Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
+unless you move it to the '@STORE_DIRECTORY@' directory.\n\
+\n\
+Please refer to the 'guix pack' documentation for more information.\n");
+ return EXIT_FAILURE;
+
+ default:
+ {
+ /* Map the current user/group ID in the child's namespace (the
+ default is to get the "overflow UID", i.e., the UID of
+ "nobody"). We must first disallow 'setgroups' for that
+ process. */
+ disallow_setgroups (child);
+ write_id_map (child, "uid_map", getuid ());
+ write_id_map (child, "gid_map", getgid ());
+
+ int status;
+ waitpid (child, &status, 0);
+ chdir ("/"); /* avoid EBUSY */
+ rm_rf (new_root);
+ free (new_root);
+ exit (status);
+ }
+ }
+ }
+
+ /* The executable is available under @STORE_DIRECTORY@, so we can now
+ execute it. */
+ int err = execv ("@WRAPPED_PROGRAM@", argv);
+ if (err < 0)
+ assert_perror (errno);
+
+ return EXIT_FAILURE;
+}