[systemd-commits] 4 commits - src/core src/nspawn units/kmod-static-nodes.service.in units/sys-kernel-config.mount units/sys-kernel-debug.mount units/systemd-tmpfiles-setup-dev.service.in

Lennart Poettering lennart at kemper.freedesktop.org
Thu Jul 3 18:24:48 PDT 2014


 src/core/main.c                             |    2 
 src/nspawn/nspawn.c                         |  158 ++++++++++++++++++++++++++--
 units/kmod-static-nodes.service.in          |    2 
 units/sys-kernel-config.mount               |    1 
 units/sys-kernel-debug.mount                |    1 
 units/systemd-tmpfiles-setup-dev.service.in |    2 
 6 files changed, 153 insertions(+), 13 deletions(-)

New commits:
commit 4d9f07b492ba1707d4a92cd937b87b8baf827f7d
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Jul 4 03:22:33 2014 +0200

    nspawn: add new --volatile switch for booting containers in volatile (ephemeral) mode
    
    Two modes are supported: --volatile=yes mounts only /usr into the
    container, and a tmpfs as root directory. --volatile=state mounts the
    full OS tree in, but overmounts /var with a tmpfs.
    
    --volatile=yes hence boots with an unpopulated /etc and /var, starting
    with pristine configuration and state.
    
    --volatile=state hence boots with an unpopulated /var, only starting
    with pristine state.

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0d538c2..edad1cb 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -107,6 +107,12 @@ typedef enum LinkJournal {
         LINK_GUEST
 } LinkJournal;
 
+typedef enum Volatile {
+        VOLATILE_NO,
+        VOLATILE_YES,
+        VOLATILE_STATE,
+} Volatile;
+
 static char *arg_directory = NULL;
 static char *arg_user = NULL;
 static sd_id128_t arg_uuid = {};
@@ -159,6 +165,7 @@ static bool arg_network_veth = false;
 static const char *arg_network_bridge = NULL;
 static unsigned long arg_personality = 0xffffffffLU;
 static const char *arg_image = NULL;
+static Volatile arg_volatile = VOLATILE_NO;
 
 static int help(void) {
 
@@ -207,7 +214,8 @@ static int help(void) {
                "     --share-system         Share system namespaces with host\n"
                "     --register=BOOLEAN     Register container as machine\n"
                "     --keep-unit            Do not register a scope for the machine, reuse\n"
-               "                            the service unit nspawn is running in\n",
+               "                            the service unit nspawn is running in\n"
+               "     --volatile[=MODE]      Run the system in volatile mode\n",
                program_invocation_short_name);
 
         return 0;
@@ -235,6 +243,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NETWORK_VETH,
                 ARG_NETWORK_BRIDGE,
                 ARG_PERSONALITY,
+                ARG_VOLATILE,
         };
 
         static const struct option options[] = {
@@ -267,6 +276,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
                 { "personality",           required_argument, NULL, ARG_PERSONALITY       },
                 { "image",                 required_argument, NULL, 'i'                   },
+                { "volatile",              optional_argument, NULL, ARG_VOLATILE          },
                 {}
         };
 
@@ -559,6 +569,25 @@ static int parse_argv(int argc, char *argv[]) {
 
                         break;
 
+                case ARG_VOLATILE:
+
+                        if (!optarg)
+                                arg_volatile = VOLATILE_YES;
+                        else {
+                                r = parse_boolean(optarg);
+                                if (r < 0) {
+                                        if (streq(optarg, "state"))
+                                                arg_volatile = VOLATILE_STATE;
+                                        else {
+                                                log_error("Failed to parse --volatile= argument: %s", optarg);
+                                                return r;
+                                        }
+                                } else
+                                        arg_volatile = r ? VOLATILE_YES : VOLATILE_NO;
+                        }
+
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -585,6 +614,11 @@ static int parse_argv(int argc, char *argv[]) {
                 return -EINVAL;
         }
 
+        if (arg_volatile != VOLATILE_NO && arg_read_only) {
+                log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
+                return -EINVAL;
+        }
+
         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
 
         return 1;
@@ -712,7 +746,7 @@ static int mount_binds(const char *dest, char **l, bool ro) {
                 }
 
                 /* Create the mount point, but be conservative -- refuse to create block
-                * and char devices. */
+                 * and char devices. */
                 if (S_ISDIR(source_st.st_mode))
                         mkdir_label(where, 0755);
                 else if (S_ISFIFO(source_st.st_mode))
@@ -796,7 +830,6 @@ static int setup_timezone(const char *dest) {
                 if (!y)
                         y = path_startswith(q, "/usr/share/zoneinfo/");
 
-
                 /* Already pointing to the right place? Then do nothing .. */
                 if (y && streq(y, z))
                         return 0;
@@ -815,7 +848,9 @@ static int setup_timezone(const char *dest) {
         if (!what)
                 return log_oom();
 
+        mkdir_parents(where, 0755);
         unlink(where);
+
         if (symlink(what, where) < 0) {
                 log_error("Failed to correct timezone of container: %m");
                 return 0;
@@ -839,11 +874,105 @@ static int setup_resolv_conf(const char *dest) {
 
         /* We don't really care for the results of this really. If it
          * fails, it fails, but meh... */
+        mkdir_parents(where, 0755);
         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
 
         return 0;
 }
 
+static int setup_volatile_state(const char *directory) {
+        const char *p;
+        int r;
+
+        assert(directory);
+
+        if (arg_volatile != VOLATILE_STATE)
+                return 0;
+
+        /* --volatile=state means we simply overmount /var
+           with a tmpfs, and the rest read-only. */
+
+        r = bind_remount_recursive(directory, true);
+        if (r < 0) {
+                log_error("Failed to remount %s read-only: %s", directory, strerror(-r));
+                return r;
+        }
+
+        p = strappenda(directory, "/var");
+        mkdir(p, 0755);
+
+        if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, "mode=755") < 0) {
+                log_error("Failed to mount tmpfs to /var: %m");
+                return -errno;
+        }
+
+        return 0;
+}
+
+static int setup_volatile(const char *directory) {
+        bool tmpfs_mounted = false, bind_mounted = false;
+        char template[] = "/tmp/nspawn-volatile-XXXXXX";
+        const char *f, *t;
+        int r;
+
+        assert(directory);
+
+        if (arg_volatile != VOLATILE_YES)
+                return 0;
+
+        /* --volatile=yes means we mount a tmpfs to the root dir, and
+           the original /usr to use inside it, and that read-only. */
+
+        if (!mkdtemp(template)) {
+                log_error("Failed to create temporary directory: %m");
+                return -errno;
+        }
+
+        if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, "mode=755") < 0) {
+                log_error("Failed to mount tmpfs for root directory: %m");
+                r = -errno;
+                goto fail;
+        }
+
+        tmpfs_mounted = true;
+
+        f = strappenda(directory, "/usr");
+        t = strappenda(template, "/usr");
+
+        mkdir(t, 0755);
+        if (mount(f, t, "bind", MS_BIND|MS_REC, NULL) < 0) {
+                log_error("Failed to create /usr bind mount: %m");
+                r = -errno;
+                goto fail;
+        }
+
+        bind_mounted = true;
+
+        r = bind_remount_recursive(t, true);
+        if (r < 0) {
+                log_error("Failed to remount %s read-only: %s", t, strerror(-r));
+                goto fail;
+        }
+
+        if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
+                log_error("Failed to move root mount: %m");
+                r = -errno;
+                goto fail;
+        }
+
+        rmdir(template);
+
+        return 0;
+
+fail:
+        if (bind_mounted)
+                umount(t);
+        if (tmpfs_mounted)
+                umount(template);
+        rmdir(template);
+        return r;
+}
+
 static char* id128_format_as_uuid(sd_id128_t id, char s[37]) {
 
         snprintf(s, 37,
@@ -2868,7 +2997,11 @@ int main(int argc, char *argv[]) {
                         goto finish;
                 }
 
-                r = dissect_image(image_fd, &root_device, &root_device_rw, &home_device, &home_device_rw, &srv_device, &srv_device_rw, &secondary);
+                r = dissect_image(image_fd,
+                                  &root_device, &root_device_rw,
+                                  &home_device, &home_device_rw,
+                                  &srv_device, &srv_device_rw,
+                                  &secondary);
                 if (r < 0)
                         goto finish;
         }
@@ -3042,18 +3175,23 @@ int main(int argc, char *argv[]) {
                                           srv_device, srv_device_rw) < 0)
                                 goto child_fail;
 
-                        r = base_filesystem_create(arg_directory);
-                        if (r < 0) {
-                                log_error("Failed to create the base filesystem: %s", strerror(-r));
-                                goto child_fail;
-                        }
-
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
                                 log_error("Failed to make bind mount: %m");
                                 goto child_fail;
                         }
 
+                        r = setup_volatile(arg_directory);
+                        if (r < 0)
+                                goto child_fail;
+
+                        if (setup_volatile_state(arg_directory) < 0)
+                                goto child_fail;
+
+                        r = base_filesystem_create(arg_directory);
+                        if (r < 0)
+                                goto child_fail;
+
                         if (arg_read_only) {
                                 k = bind_remount_recursive(arg_directory, true);
                                 if (k < 0) {

commit baa1bdf70f21848fbe01d3f383ae0f59d86a9bf3
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Jul 4 03:13:05 2014 +0200

    main: change check whether /etc is unpopulated to look for /etc/machine-id
    
    Previously, we checked whether /etc was completely empty. This makes it
    difficult though for container managers such as nspawn to install a
    small number of files (such as /etc/timezone), and have the system
    otherwise populate its own tree.
    
    Hence, change this by looking for /etc/machine-id, which should be a
    good sign whether /etc is populated or not.

diff --git a/src/core/main.c b/src/core/main.c
index 38835fc..1ca8999 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1543,7 +1543,7 @@ int main(int argc, char *argv[]) {
                 if (in_initrd())
                         log_info("Running in initial RAM disk.");
 
-                empty_etc = dir_is_empty("/etc") > 0;
+                empty_etc = access("/etc/machine-id", F_OK) < 0;
                 if (empty_etc)
                         log_info("Running with unpopulated /etc.");
         } else {

commit fa229d09281d435153b4cfd138a2a62fa66d889b
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Jul 4 03:10:09 2014 +0200

    units: conditionalize configfs and debugfs with CAP_SYS_RAWIO
    
    We really don't want these in containers as they provide a too lowlevel
    look on the system.
    
    Conditionalize them with CAP_SYS_RAWIO since that's required to access
    /proc/kcore, /dev/kmem and similar, which feel similar in style. Also,
    npsawn containers lack that capability.

diff --git a/units/sys-kernel-config.mount b/units/sys-kernel-config.mount
index 020101c..21648ef 100644
--- a/units/sys-kernel-config.mount
+++ b/units/sys-kernel-config.mount
@@ -11,6 +11,7 @@ Documentation=https://www.kernel.org/doc/Documentation/filesystems/configfs/conf
 Documentation=http://www.freedesktop.org/wiki/Software/systemd/APIFileSystems
 DefaultDependencies=no
 ConditionPathExists=/sys/kernel/config
+ConditionCapability=CAP_SYS_RAWIO
 After=systemd-modules-load.service
 Before=sysinit.target
 
diff --git a/units/sys-kernel-debug.mount b/units/sys-kernel-debug.mount
index 5369728..1e94387 100644
--- a/units/sys-kernel-debug.mount
+++ b/units/sys-kernel-debug.mount
@@ -11,6 +11,7 @@ Documentation=https://www.kernel.org/doc/Documentation/filesystems/debugfs.txt
 Documentation=http://www.freedesktop.org/wiki/Software/systemd/APIFileSystems
 DefaultDependencies=no
 ConditionPathExists=/sys/kernel/debug
+ConditionCapability=CAP_SYS_RAWIO
 Before=sysinit.target
 
 [Mount]

commit e0c74691c41a204eba2fd5f39615049fc9ff1648
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Jul 4 03:07:20 2014 +0200

    units: conditionalize static device node logic on CAP_SYS_MODULES instead of CAP_MKNOD
    
    npsawn containers generally have CAP_MKNOD, since this is required
    to make PrviateDevices= work. Thus, it's not useful anymore to
    conditionalize the kmod static device node units.
    
    Use CAP_SYS_MODULES instead which is not available for nspawn
    containers. However, the static device node logic is only done for being
    able to autoload modules with it, and if we can't do that there's no
    point in doing it.

diff --git a/units/kmod-static-nodes.service.in b/units/kmod-static-nodes.service.in
index 368f980..0934a87 100644
--- a/units/kmod-static-nodes.service.in
+++ b/units/kmod-static-nodes.service.in
@@ -9,7 +9,7 @@
 Description=Create list of required static device nodes for the current kernel
 DefaultDependencies=no
 Before=sysinit.target systemd-tmpfiles-setup-dev.service
-ConditionCapability=CAP_MKNOD
+ConditionCapability=CAP_SYS_MODULE
 ConditionPathExists=/lib/modules/%v/modules.devname
 
 [Service]
diff --git a/units/systemd-tmpfiles-setup-dev.service.in b/units/systemd-tmpfiles-setup-dev.service.in
index b9cfc53..06346d3 100644
--- a/units/systemd-tmpfiles-setup-dev.service.in
+++ b/units/systemd-tmpfiles-setup-dev.service.in
@@ -12,7 +12,7 @@ DefaultDependencies=no
 Conflicts=shutdown.target
 After=systemd-readahead-collect.service systemd-readahead-replay.service systemd-sysusers.service
 Before=sysinit.target local-fs-pre.target systemd-udevd.service shutdown.target
-ConditionCapability=CAP_MKNOD
+ConditionCapability=CAP_SYS_MODULE
 
 [Service]
 Type=oneshot



More information about the systemd-commits mailing list