[systemd-commits] 7 commits - TODO man/systemd.exec.xml src/core src/nspawn src/shared units/systemd-machined.service.in
Lennart Poettering
lennart at kemper.freedesktop.org
Fri Jun 6 05:59:52 PDT 2014
TODO | 7 -
man/systemd.exec.xml | 14 +-
src/core/namespace.c | 42 ++++---
src/core/umount.c | 65 ++++-------
src/nspawn/nspawn.c | 25 ++--
src/shared/util.c | 211 ++++++++++++++++++++++++++++++++++++++
src/shared/util.h | 4
units/systemd-machined.service.in | 2
8 files changed, 292 insertions(+), 78 deletions(-)
New commits:
commit dc17bcef197a0d5ee798cce59c40e4f5e85c24f6
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jun 6 14:51:07 2014 +0200
update TODO
diff --git a/TODO b/TODO
index caf565f..98a5947 100644
--- a/TODO
+++ b/TODO
@@ -181,15 +181,16 @@ Features:
ReadOnlyDirectories=... for whitelisting files for a service.
* sd-bus:
- - make AddMatch calls on dbus1 transports async?
- when kdbus does not take our message without memfds, try again with memfds
- systemd-bus-proxyd needs to enforce good old XML policy
- allow updating attach flags during runtime
- pid1: peek into activating message when activating a service
- introduce sd_bus_emit_object_added()/sd_bus_emit_object_removed() that automatically includes the build-in interfaces in the list
- constructors for bus messages should probably not be OK with a NULL bus pointer
+ - port to sd-resolve for connecting to TCP dbus servers
- see if we can drop more message validation on the sending side
- add API to clone sd_bus_message objects
+ - make AddMatch calls on dbus1 transports async?
- kdbus: matches against source or destination pids for an "strace -p"-like feel. Problem: The PID info needs to be available in userspace too...
- longer term: priority inheritance
- dbus spec updates:
@@ -200,7 +201,6 @@ Features:
- path escaping
- update systemd.special(7) to mention that dbus.socket is only about the compatibility socket now
- test bloom filter generation indexes
- - port to sd-resolve for connecting to TCP dbus servers
* sd-event
- allow multiple signal handlers per signal?
@@ -454,7 +454,6 @@ Features:
* currently x-systemd.timeout is lost in the initrd, since crypttab is copied into dracut, but fstab is not
* nspawn:
- - nspawn: --read-only is not applied recursively to submounts
- bind mount read-only the cgroup tree higher than nspawn
- nspawn: make it work for dwalsh and shared /usr containers -- tmpfs mounts as command line parameters
- refuses to boot containers without /etc/machine-id (OK?), and with empty /etc/machine-id (not OK).
@@ -490,8 +489,6 @@ Features:
* MountFlags=shared acts as MountFlags=slave right now.
-* ReadOnlyDirectories= is not applied recursively to submounts
-
* drop PID 1 reloading, only do reexecing (difficult: Reload()
currently is properly synchronous, Reexec() is weird, because we
cannot delay the response properly until we are back, so instead of
commit 051be1f71c9ab82a88da8f33e95882aa386a5541
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jun 6 14:48:51 2014 +0200
namespace: cover /boot with ProtectSystem= again
Now that we properly exclude autofs mounts from ProtectSystem= we can
include it in the effect of ProtectSystem= again.
diff --git a/src/core/namespace.c b/src/core/namespace.c
index f11065e..3b5402b 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -367,7 +367,7 @@ int setup_namespace(
strv_length(inaccessible_dirs) +
private_dev +
(protect_home != PROTECT_HOME_NO ? 3 : 0) +
- (protect_system != PROTECT_SYSTEM_NO ? 1 : 0) +
+ (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
(protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
if (n > 0) {
@@ -409,7 +409,7 @@ int setup_namespace(
}
if (protect_system != PROTECT_SYSTEM_NO) {
- r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "/etc") : STRV_MAKE("/usr"), READONLY);
+ r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "-/boot", "/etc") : STRV_MAKE("/usr", "-/boot"), READONLY);
if (r < 0)
return r;
}
commit 3c52ad9237ead4fc26888348f3da74d896fb06ab
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jun 6 14:30:09 2014 +0200
units: fix minor typo
diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in
index e60ea32..3ae0847 100644
--- a/units/systemd-machined.service.in
+++ b/units/systemd-machined.service.in
@@ -20,5 +20,5 @@ WatchdogSec=1min
PrivateTmp=yes
PrivateDevices=yes
PrivateNetwork=yes
-PortectSystem=full
+ProtectSystem=full
ProtectHome=yes
commit d6797c920e9eb70f46a893c00fdd9ecb86d15f84
Author: Lennart Poettering <lennart at poettering.net>
Date: Fri Jun 6 11:42:25 2014 +0200
namespace: beef up read-only bind mount logic
Instead of blindly creating another bind mount for read-only mounts,
check if there's already one we can use, and if so, use it. Also,
recursively mark all submounts read-only too. Also, ignore autofs mounts
when remounting read-only unless they are already triggered.
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index c5bb55c..c419424 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -777,8 +777,8 @@
<term><varname>ReadOnlyDirectories=</varname></term>
<term><varname>InaccessibleDirectories=</varname></term>
- <listitem><para>Sets up a new
- file system namespace for executed
+ <listitem><para>Sets up a new file
+ system namespace for executed
processes. These options may be used
to limit access a process might have
to the main file system
@@ -799,16 +799,14 @@
processes inside the namespace. Note
that restricting access with these
options does not extend to submounts
- of a directory. You must list
- submounts separately in these settings
- to ensure the same limited
- access. These options may be specified
+ of a directory that are created later
+ on. These options may be specified
more than once in which case all
directories listed will have limited
access from within the namespace. If
the empty string is assigned to this
- option, the specific list is reset, and
- all prior assignments have no
+ option, the specific list is reset,
+ and all prior assignments have no
effect.</para>
<para>Paths in
<varname>ReadOnlyDirectories=</varname>
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 43b9045..f11065e 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -280,9 +280,6 @@ static int apply_mount(
switch (m->mode) {
- case PRIVATE_DEV:
- return mount_dev(m);
-
case INACCESSIBLE:
/* First, get rid of everything that is below if there
@@ -295,8 +292,9 @@ static int apply_mount(
case READONLY:
case READWRITE:
- what = m->path;
- break;
+ /* Nothing to mount here, we just later toggle the
+ * MS_RDONLY bit for the mount point */
+ return 0;
case PRIVATE_TMP:
what = tmp_dir;
@@ -306,6 +304,9 @@ static int apply_mount(
what = var_tmp_dir;
break;
+ case PRIVATE_DEV:
+ return mount_dev(m);
+
default:
assert_not_reached("Unknown mode");
}
@@ -316,7 +317,7 @@ static int apply_mount(
if (r >= 0)
log_debug("Successfully mounted %s to %s", what, m->path);
else if (m->ignore && errno == ENOENT)
- r = 0;
+ return 0;
return r;
}
@@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) {
assert(m);
- if (m->mode != INACCESSIBLE && m->mode != READONLY)
- return 0;
+ if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+ r = bind_remount_recursive(m->path, true);
+ else if (m->mode == READWRITE)
+ r = bind_remount_recursive(m->path, false);
+ else
+ r = 0;
- r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
- if (r < 0 && !(m->ignore && errno == ENOENT))
- return -errno;
+ if (m->ignore && r == -ENOENT)
+ return 0;
- return 0;
+ return r;
}
int setup_namespace(
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a1d7724..19fb086 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -635,7 +635,7 @@ static int mount_all(const char *dest) {
return r;
}
-static int mount_binds(const char *dest, char **l, unsigned long flags) {
+static int mount_binds(const char *dest, char **l, bool ro) {
char **x, **y;
STRV_FOREACH_PAIR(x, y, l) {
@@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
return -errno;
}
- if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
- log_error("mount(%s) failed: %m", where);
- return -errno;
+ if (ro) {
+ r = bind_remount_recursive(where, true);
+ if (r < 0) {
+ log_error("Read-Only bind mount failed: %s", strerror(-r));
+ return r;
+ }
}
}
@@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) {
/* Turn directory into bind mount */
if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
- log_error("Failed to make bind mount.");
+ log_error("Failed to make bind mount: %m");
goto child_fail;
}
- if (arg_read_only)
- if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
- log_error("Failed to make read-only.");
+ if (arg_read_only) {
+ k = bind_remount_recursive(arg_directory, true);
+ if (k < 0) {
+ log_error("Failed to make tree read-only: %s", strerror(-k));
goto child_fail;
}
+ }
if (mount_all(arg_directory) < 0)
goto child_fail;
@@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) {
if (setup_journal(arg_directory) < 0)
goto child_fail;
- if (mount_binds(arg_directory, arg_bind, 0) < 0)
+ if (mount_binds(arg_directory, arg_bind, false) < 0)
goto child_fail;
- if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
+ if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
goto child_fail;
if (setup_kdbus(arg_directory, kdbus_domain) < 0)
diff --git a/src/shared/util.c b/src/shared/util.c
index 011fb36..7fa3742 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) {
"%*s" /* (11) mount options 2 */
"%*[^\n]", /* some rubbish at the end */
&path);
-
if (k != 1) {
if (k == EOF)
break;
@@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) {
return r ? r : n;
}
+
+int bind_remount_recursive(const char *prefix, bool ro) {
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *cleaned = NULL;
+ int r;
+
+ /* Recursively remount a directory (and all its submounts)
+ * read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it
+ * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we
+ * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
+ * submounts we can access, too. When mounts are stacked on
+ * the same mount point we only care for each individual
+ * "top-level" mount on each point, as we cannot
+ * influence/access the underlying mounts anyway. We do not
+ * have any effect on future submounts that might get
+ * propagated, they migt be writable. This includes future
+ * submounts that have been triggered via autofs. */
+
+ cleaned = strdup(prefix);
+ if (!cleaned)
+ return -ENOMEM;
+
+ path_kill_slashes(cleaned);
+
+ done = set_new(string_hash_func, string_compare_func);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_set_free_free_ Set *todo = NULL;
+ bool top_autofs = false;
+ char *x;
+
+ todo = set_new(string_hash_func, string_compare_func);
+ if (!todo)
+ return -ENOMEM;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options (superblock) */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options (bind mount) */
+ "%*[^\n]", /* some rubbish at the end */
+ &path,
+ &type);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ p = cunescape(path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(cleaned, p);
+ continue;
+ }
+
+ if (path_startswith(p, cleaned) &&
+ !set_contains(done, p)) {
+
+ r = set_consume(todo, p);
+ p = NULL;
+
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, cleaned)))
+ return 0;
+
+ if (!set_contains(done, cleaned) &&
+ !set_contains(todo, cleaned)) {
+ /* The prefix directory itself is not yet a
+ * mount, make it one. */
+ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+ return -errno;
+
+ if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ x = strdup(cleaned);
+ if (!x)
+ return -ENOMEM;
+
+ r = set_consume(done, x);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+
+ if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+
+ /* Deal with mount points that are
+ * obstructed by a later mount */
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ }
+ }
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index ac851fa..952239e 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -944,3 +944,5 @@ union file_handle_union {
int update_reboot_param_file(const char *param);
int umount_recursive(const char *target, int flags);
+
+int bind_remount_recursive(const char *prefix, bool ro);
commit c8835999c33c0443bf91e1a8fa6dd716a8ff0b0f
Author: Lennart Poettering <lennart at poettering.net>
Date: Thu Jun 5 21:37:40 2014 +0200
namespace: also include /root in ProtectHome=
/root can't really be autofs, and is also a home, directory, so cover it
with ProtectHome=.
diff --git a/src/core/namespace.c b/src/core/namespace.c
index fcbfd87..43b9045 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -362,7 +362,7 @@ int setup_namespace(
strv_length(read_only_dirs) +
strv_length(inaccessible_dirs) +
private_dev +
- (protect_home != PROTECT_HOME_NO ? 2 : 0) +
+ (protect_home != PROTECT_HOME_NO ? 3 : 0) +
(protect_system != PROTECT_SYSTEM_NO ? 1 : 0) +
(protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
@@ -399,7 +399,7 @@ int setup_namespace(
}
if (protect_home != PROTECT_HOME_NO) {
- r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user", "-/root"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
if (r < 0)
return r;
}
commit 6d313367d9ef780560e117e886502a99fa220eac
Author: Lennart Poettering <lennart at poettering.net>
Date: Thu Jun 5 21:35:35 2014 +0200
namespace: when setting up an inaccessible mount point, unmounting everything below
This has the benefit of not triggering any autofs mount points
unnecessarily.
diff --git a/src/core/namespace.c b/src/core/namespace.c
index b6deab7..fcbfd87 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -284,6 +284,12 @@ static int apply_mount(
return mount_dev(m);
case INACCESSIBLE:
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible directory. */
+ umount_recursive(m->path, 0);
+
what = "/run/systemd/inaccessible";
break;
diff --git a/src/shared/util.c b/src/shared/util.c
index 49185a9..011fb36 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -54,6 +54,7 @@
#include <grp.h>
#include <sys/mman.h>
#include <sys/vfs.h>
+#include <sys/mount.h>
#include <linux/magic.h>
#include <limits.h>
#include <langinfo.h>
@@ -6502,3 +6503,70 @@ int update_reboot_param_file(const char *param) {
return r;
}
+
+int umount_recursive(const char *prefix, int flags) {
+ bool again;
+ int n = 0, r;
+
+ /* Try to umount everything recursively below a
+ * directory. Also, take care of stacked mounts, and keep
+ * unmounting them until they are gone. */
+
+ do {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ again = false;
+ r = 0;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%*s " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options 2 */
+ "%*[^\n]", /* some rubbish at the end */
+ &path);
+
+ if (k != 1) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ p = cunescape(path);
+ if (!p)
+ return -ENOMEM;
+
+ if (!path_startswith(p, prefix))
+ continue;
+
+ if (umount2(p, flags) < 0) {
+ r = -errno;
+ continue;
+ }
+
+ again = true;
+ n++;
+
+ break;
+ }
+
+ } while (again);
+
+ return r ? r : n;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index 62eb604..ac851fa 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -942,3 +942,5 @@ union file_handle_union {
};
int update_reboot_param_file(const char *param);
+
+int umount_recursive(const char *target, int flags);
commit c3544e8d2c2d870a2aff0944aff4ab7824b9ae6b
Author: Lennart Poettering <lennart at poettering.net>
Date: Thu Jun 5 21:35:15 2014 +0200
umount: modernizations
diff --git a/src/core/umount.c b/src/core/umount.c
index d1258f0..a30f674 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -61,52 +61,46 @@ static void mount_points_list_free(MountPoint **head) {
}
static int mount_points_list_get(MountPoint **head) {
- FILE *proc_self_mountinfo;
- char *path, *p;
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
unsigned int i;
- int r;
assert(head);
- if (!(proc_self_mountinfo = fopen("/proc/self/mountinfo", "re")))
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
return -errno;
for (i = 1;; i++) {
- int k;
+ _cleanup_free_ char *path = NULL;
+ char *p = NULL;
MountPoint *m;
+ int k;
- path = p = NULL;
-
- if ((k = fscanf(proc_self_mountinfo,
- "%*s " /* (1) mount id */
- "%*s " /* (2) parent id */
- "%*s " /* (3) major:minor */
- "%*s " /* (4) root */
- "%ms " /* (5) mount point */
- "%*s" /* (6) mount options */
- "%*[^-]" /* (7) optional fields */
- "- " /* (8) separator */
- "%*s " /* (9) file system type */
- "%*s" /* (10) mount source */
- "%*s" /* (11) mount options 2 */
- "%*[^\n]", /* some rubbish at the end */
- &path)) != 1) {
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%*s " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options 2 */
+ "%*[^\n]", /* some rubbish at the end */
+ &path);
+ if (k != 1) {
if (k == EOF)
break;
log_warning("Failed to parse /proc/self/mountinfo:%u.", i);
-
- free(path);
continue;
}
p = cunescape(path);
- free(path);
-
- if (!p) {
- r = -ENOMEM;
- goto finish;
- }
+ if (!p)
+ return -ENOMEM;
/* Ignore mount points we can't unmount because they
* are API or because we are keeping them open (like
@@ -118,22 +112,17 @@ static int mount_points_list_get(MountPoint **head) {
continue;
}
- if (!(m = new0(MountPoint, 1))) {
+ m = new0(MountPoint, 1);
+ if (!m) {
free(p);
- r = -ENOMEM;
- goto finish;
+ return -ENOMEM;
}
m->path = p;
LIST_PREPEND(mount_point, *head, m);
}
- r = 0;
-
-finish:
- fclose(proc_self_mountinfo);
-
- return r;
+ return 0;
}
static int swap_list_get(MountPoint **head) {
More information about the systemd-commits
mailing list