[systemd-commits] 4 commits - src/core src/nspawn units/kmod-static-nodes.service.in units/sys-kernel-config.mount units/sys-kernel-debug.mount units/systemd-tmpfiles-setup-dev.service.in
Lennart Poettering
lennart at kemper.freedesktop.org
Thu Jul 3 18:24:48 PDT 2014
src/core/main.c | 2
src/nspawn/nspawn.c | 158 ++++++++++++++++++++++++++--
units/kmod-static-nodes.service.in | 2
units/sys-kernel-config.mount | 1
units/sys-kernel-debug.mount | 1
units/systemd-tmpfiles-setup-dev.service.in | 2
6 files changed, 153 insertions(+), 13 deletions(-)
New commits:
commit 4d9f07b492ba1707d4a92cd937b87b8baf827f7d
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jul 4 03:22:33 2014 +0200
nspawn: add new --volatile switch for booting containers in volatile (ephemeral) mode
Two modes are supported: --volatile=yes mounts only /usr into the
container, and a tmpfs as root directory. --volatile=state mounts the
full OS tree in, but overmounts /var with a tmpfs.
--volatile=yes hence boots with an unpopulated /etc and /var, starting
with pristine configuration and state.
--volatile=state hence boots with an unpopulated /var, only starting
with pristine state.
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0d538c2..edad1cb 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -107,6 +107,12 @@ typedef enum LinkJournal {
LINK_GUEST
} LinkJournal;
+typedef enum Volatile {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+} Volatile;
+
static char *arg_directory = NULL;
static char *arg_user = NULL;
static sd_id128_t arg_uuid = {};
@@ -159,6 +165,7 @@ static bool arg_network_veth = false;
static const char *arg_network_bridge = NULL;
static unsigned long arg_personality = 0xffffffffLU;
static const char *arg_image = NULL;
+static Volatile arg_volatile = VOLATILE_NO;
static int help(void) {
@@ -207,7 +214,8 @@ static int help(void) {
" --share-system Share system namespaces with host\n"
" --register=BOOLEAN Register container as machine\n"
" --keep-unit Do not register a scope for the machine, reuse\n"
- " the service unit nspawn is running in\n",
+ " the service unit nspawn is running in\n"
+ " --volatile[=MODE] Run the system in volatile mode\n",
program_invocation_short_name);
return 0;
@@ -235,6 +243,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_NETWORK_VETH,
ARG_NETWORK_BRIDGE,
ARG_PERSONALITY,
+ ARG_VOLATILE,
};
static const struct option options[] = {
@@ -267,6 +276,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
{ "personality", required_argument, NULL, ARG_PERSONALITY },
{ "image", required_argument, NULL, 'i' },
+ { "volatile", optional_argument, NULL, ARG_VOLATILE },
{}
};
@@ -559,6 +569,25 @@ static int parse_argv(int argc, char *argv[]) {
break;
+ case ARG_VOLATILE:
+
+ if (!optarg)
+ arg_volatile = VOLATILE_YES;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ if (streq(optarg, "state"))
+ arg_volatile = VOLATILE_STATE;
+ else {
+ log_error("Failed to parse --volatile= argument: %s", optarg);
+ return r;
+ }
+ } else
+ arg_volatile = r ? VOLATILE_YES : VOLATILE_NO;
+ }
+
+ break;
+
case '?':
return -EINVAL;
@@ -585,6 +614,11 @@ static int parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
+ if (arg_volatile != VOLATILE_NO && arg_read_only) {
+ log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
+ return -EINVAL;
+ }
+
arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
return 1;
@@ -712,7 +746,7 @@ static int mount_binds(const char *dest, char **l, bool ro) {
}
/* Create the mount point, but be conservative -- refuse to create block
- * and char devices. */
+ * and char devices. */
if (S_ISDIR(source_st.st_mode))
mkdir_label(where, 0755);
else if (S_ISFIFO(source_st.st_mode))
@@ -796,7 +830,6 @@ static int setup_timezone(const char *dest) {
if (!y)
y = path_startswith(q, "/usr/share/zoneinfo/");
-
/* Already pointing to the right place? Then do nothing .. */
if (y && streq(y, z))
return 0;
@@ -815,7 +848,9 @@ static int setup_timezone(const char *dest) {
if (!what)
return log_oom();
+ mkdir_parents(where, 0755);
unlink(where);
+
if (symlink(what, where) < 0) {
log_error("Failed to correct timezone of container: %m");
return 0;
@@ -839,11 +874,105 @@ static int setup_resolv_conf(const char *dest) {
/* We don't really care for the results of this really. If it
* fails, it fails, but meh... */
+ mkdir_parents(where, 0755);
copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
return 0;
}
+static int setup_volatile_state(const char *directory) {
+ const char *p;
+ int r;
+
+ assert(directory);
+
+ if (arg_volatile != VOLATILE_STATE)
+ return 0;
+
+ /* --volatile=state means we simply overmount /var
+ with a tmpfs, and the rest read-only. */
+
+ r = bind_remount_recursive(directory, true);
+ if (r < 0) {
+ log_error("Failed to remount %s read-only: %s", directory, strerror(-r));
+ return r;
+ }
+
+ p = strappenda(directory, "/var");
+ mkdir(p, 0755);
+
+ if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, "mode=755") < 0) {
+ log_error("Failed to mount tmpfs to /var: %m");
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int setup_volatile(const char *directory) {
+ bool tmpfs_mounted = false, bind_mounted = false;
+ char template[] = "/tmp/nspawn-volatile-XXXXXX";
+ const char *f, *t;
+ int r;
+
+ assert(directory);
+
+ if (arg_volatile != VOLATILE_YES)
+ return 0;
+
+ /* --volatile=yes means we mount a tmpfs to the root dir, and
+ the original /usr to use inside it, and that read-only. */
+
+ if (!mkdtemp(template)) {
+ log_error("Failed to create temporary directory: %m");
+ return -errno;
+ }
+
+ if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, "mode=755") < 0) {
+ log_error("Failed to mount tmpfs for root directory: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ tmpfs_mounted = true;
+
+ f = strappenda(directory, "/usr");
+ t = strappenda(template, "/usr");
+
+ mkdir(t, 0755);
+ if (mount(f, t, "bind", MS_BIND|MS_REC, NULL) < 0) {
+ log_error("Failed to create /usr bind mount: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ bind_mounted = true;
+
+ r = bind_remount_recursive(t, true);
+ if (r < 0) {
+ log_error("Failed to remount %s read-only: %s", t, strerror(-r));
+ goto fail;
+ }
+
+ if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
+ log_error("Failed to move root mount: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ rmdir(template);
+
+ return 0;
+
+fail:
+ if (bind_mounted)
+ umount(t);
+ if (tmpfs_mounted)
+ umount(template);
+ rmdir(template);
+ return r;
+}
+
static char* id128_format_as_uuid(sd_id128_t id, char s[37]) {
snprintf(s, 37,
@@ -2868,7 +2997,11 @@ int main(int argc, char *argv[]) {
goto finish;
}
- r = dissect_image(image_fd, &root_device, &root_device_rw, &home_device, &home_device_rw, &srv_device, &srv_device_rw, &secondary);
+ r = dissect_image(image_fd,
+ &root_device, &root_device_rw,
+ &home_device, &home_device_rw,
+ &srv_device, &srv_device_rw,
+ &secondary);
if (r < 0)
goto finish;
}
@@ -3042,18 +3175,23 @@ int main(int argc, char *argv[]) {
srv_device, srv_device_rw) < 0)
goto child_fail;
- r = base_filesystem_create(arg_directory);
- if (r < 0) {
- log_error("Failed to create the base filesystem: %s", strerror(-r));
- goto child_fail;
- }
-
/* Turn directory into bind mount */
if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
log_error("Failed to make bind mount: %m");
goto child_fail;
}
+ r = setup_volatile(arg_directory);
+ if (r < 0)
+ goto child_fail;
+
+ if (setup_volatile_state(arg_directory) < 0)
+ goto child_fail;
+
+ r = base_filesystem_create(arg_directory);
+ if (r < 0)
+ goto child_fail;
+
if (arg_read_only) {
k = bind_remount_recursive(arg_directory, true);
if (k < 0) {
commit baa1bdf70f21848fbe01d3f383ae0f59d86a9bf3
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jul 4 03:13:05 2014 +0200
main: change check whether /etc is unpopulated to look for /etc/machine-id
Previously, we checked whether /etc was completely empty. This makes it
difficult though for container managers such as nspawn to install a
small number of files (such as /etc/timezone), and have the system
otherwise populate its own tree.
Hence, change this by looking for /etc/machine-id, which should be a
good sign whether /etc is populated or not.
diff --git a/src/core/main.c b/src/core/main.c
index 38835fc..1ca8999 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1543,7 +1543,7 @@ int main(int argc, char *argv[]) {
if (in_initrd())
log_info("Running in initial RAM disk.");
- empty_etc = dir_is_empty("/etc") > 0;
+ empty_etc = access("/etc/machine-id", F_OK) < 0;
if (empty_etc)
log_info("Running with unpopulated /etc.");
} else {
commit fa229d09281d435153b4cfd138a2a62fa66d889b
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jul 4 03:10:09 2014 +0200
units: conditionalize configfs and debugfs with CAP_SYS_RAWIO
We really don't want these in containers as they provide a too lowlevel
look on the system.
Conditionalize them with CAP_SYS_RAWIO since that's required to access
/proc/kcore, /dev/kmem and similar, which feel similar in style. Also,
npsawn containers lack that capability.
diff --git a/units/sys-kernel-config.mount b/units/sys-kernel-config.mount
index 020101c..21648ef 100644
--- a/units/sys-kernel-config.mount
+++ b/units/sys-kernel-config.mount
@@ -11,6 +11,7 @@ Documentation=https://www.kernel.org/doc/Documentation/filesystems/configfs/conf
Documentation=http://www.freedesktop.org/wiki/Software/systemd/APIFileSystems
DefaultDependencies=no
ConditionPathExists=/sys/kernel/config
+ConditionCapability=CAP_SYS_RAWIO
After=systemd-modules-load.service
Before=sysinit.target
diff --git a/units/sys-kernel-debug.mount b/units/sys-kernel-debug.mount
index 5369728..1e94387 100644
--- a/units/sys-kernel-debug.mount
+++ b/units/sys-kernel-debug.mount
@@ -11,6 +11,7 @@ Documentation=https://www.kernel.org/doc/Documentation/filesystems/debugfs.txt
Documentation=http://www.freedesktop.org/wiki/Software/systemd/APIFileSystems
DefaultDependencies=no
ConditionPathExists=/sys/kernel/debug
+ConditionCapability=CAP_SYS_RAWIO
Before=sysinit.target
[Mount]
commit e0c74691c41a204eba2fd5f39615049fc9ff1648
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jul 4 03:07:20 2014 +0200
units: conditionalize static device node logic on CAP_SYS_MODULES instead of CAP_MKNOD
npsawn containers generally have CAP_MKNOD, since this is required
to make PrviateDevices= work. Thus, it's not useful anymore to
conditionalize the kmod static device node units.
Use CAP_SYS_MODULES instead which is not available for nspawn
containers. However, the static device node logic is only done for being
able to autoload modules with it, and if we can't do that there's no
point in doing it.
diff --git a/units/kmod-static-nodes.service.in b/units/kmod-static-nodes.service.in
index 368f980..0934a87 100644
--- a/units/kmod-static-nodes.service.in
+++ b/units/kmod-static-nodes.service.in
@@ -9,7 +9,7 @@
Description=Create list of required static device nodes for the current kernel
DefaultDependencies=no
Before=sysinit.target systemd-tmpfiles-setup-dev.service
-ConditionCapability=CAP_MKNOD
+ConditionCapability=CAP_SYS_MODULE
ConditionPathExists=/lib/modules/%v/modules.devname
[Service]
diff --git a/units/systemd-tmpfiles-setup-dev.service.in b/units/systemd-tmpfiles-setup-dev.service.in
index b9cfc53..06346d3 100644
--- a/units/systemd-tmpfiles-setup-dev.service.in
+++ b/units/systemd-tmpfiles-setup-dev.service.in
@@ -12,7 +12,7 @@ DefaultDependencies=no
Conflicts=shutdown.target
After=systemd-readahead-collect.service systemd-readahead-replay.service systemd-sysusers.service
Before=sysinit.target local-fs-pre.target systemd-udevd.service shutdown.target
-ConditionCapability=CAP_MKNOD
+ConditionCapability=CAP_SYS_MODULE
[Service]
Type=oneshot
More information about the systemd-commits
mailing list