[systemd-commits] man/systemd.exec.xml src/core src/test units/systemd-hostnamed.service.in units/systemd-journal-gatewayd.service.in units/systemd-journald.service.in units/systemd-localed.service.in units/systemd-logind.service.in units/systemd-machined.service.in units/systemd-networkd.service.in units/systemd-resolved.service.in units/systemd-timedated.service.in units/systemd-timesyncd.service.in units/systemd-udevd.service.in
Lennart Poettering
lennart at kemper.freedesktop.org
Tue Jun 3 15:06:07 PDT 2014
man/systemd.exec.xml | 61 +++++++++++++++++++++++++++++-
src/core/dbus-execute.c | 5 ++
src/core/execute.c | 11 ++++-
src/core/execute.h | 3 +
src/core/load-fragment-gperf.gperf.m4 | 2
src/core/load-fragment.c | 43 +++++++++++++++++++++
src/core/load-fragment.h | 1
src/core/namespace.c | 26 ++++++++++++
src/core/namespace.h | 15 +++++++
src/test/test-ns.c | 2
units/systemd-hostnamed.service.in | 2
units/systemd-journal-gatewayd.service.in | 2
units/systemd-journald.service.in | 2
units/systemd-localed.service.in | 2
units/systemd-logind.service.in | 2
units/systemd-machined.service.in | 2
units/systemd-networkd.service.in | 2
units/systemd-resolved.service.in | 2
units/systemd-timedated.service.in | 2
units/systemd-timesyncd.service.in | 2
units/systemd-udevd.service.in | 2
21 files changed, 187 insertions(+), 4 deletions(-)
New commits:
commit 417116f23432073162ebfcb286a7800846482eed
Author: Lennart Poettering <lennart at poettering.net>
Date: Tue Jun 3 23:41:44 2014 +0200
core: add new ReadOnlySystem= and ProtectedHome= settings for service units
ReadOnlySystem= uses fs namespaces to mount /usr and /boot read-only for
a service.
ProtectedHome= uses fs namespaces to mount /home and /run/user
inaccessible or read-only for a service.
This patch also enables these settings for all our long-running services.
Together they should be good building block for a minimal service
sandbox, removing the ability for services to modify the operating
system or access the user's private data.
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 3f27d13..3664303 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -764,7 +764,7 @@
capability sets as documented in
<citerefentry><refentrytitle>cap_from_text</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
Note that these capability sets are
- usually influenced by the capabilities
+ usually influenced (and filtered) by the capabilities
attached to the executed file. Due to
that
<varname>CapabilityBoundingSet=</varname>
@@ -935,6 +935,63 @@
</varlistentry>
<varlistentry>
+ <term><varname>ReadOnlySystem=</varname></term>
+
+ <listitem><para>Takes a boolean
+ argument. If true, mounts the
+ <filename>/usr</filename> and
+ <filename>/boot</filename> directories
+ read-only for processes invoked by
+ this unit. This setting ensures that
+ any modification of the vendor
+ supplied operating system is
+ prohibited for the service. It is
+ recommended to enable this setting for
+ all long-running services, unless they
+ are involved with system updates or
+ need to modify the operating system in
+ other ways. Note however, that
+ processes retaining the CAP_SYS_ADMIN
+ capability can undo the effect of this
+ setting. This setting is hence
+ particularly useful for daemons which
+ have this capability removed, for
+ example with
+ <varname>CapabilityBoundingSet=</varname>. Defaults
+ to off.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>ProtectedHome=</varname></term>
+
+ <listitem><para>Takes a boolean
+ argument or
+ <literal>read-only</literal>. If true,
+ the directories
+ <filename>/home</filename> and
+ <filename>/run/user</filename> are
+ made inaccessible and empty for
+ processes invoked by this unit. If set
+ to <literal>read-only</literal> the
+ two directores are made read-only
+ instead. It is recommended to enable
+ this setting for all long-running
+ services (in particular network-facing
+ one), to ensure they cannot get access
+ to private user data, unless the
+ services actually require access to
+ the user's private data. Note however,
+ that processes retaining the
+ CAP_SYS_ADMIN capability can undo the
+ effect of this setting. This setting
+ is hence particularly useful for
+ daemons which have this capability
+ removed, for example with
+ <varname>CapabilityBoundingSet=</varname>. Defaults
+ to off.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>MountFlags=</varname></term>
<listitem><para>Takes a mount
@@ -968,6 +1025,8 @@
namespace related options
(<varname>PrivateTmp=</varname>,
<varname>PrivateDevices=</varname>,
+ <varname>ReadOnlySystem=</varname>,
+ <varname>ProtectedHome=</varname>,
<varname>ReadOnlyDirectories=</varname>,
<varname>InaccessibleDirectories=</varname>
and
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 13b3d0d..2aa08c1 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -35,6 +35,7 @@
#include "capability.h"
#include "env-util.h"
#include "af-list.h"
+#include "namespace.h"
#ifdef HAVE_SECCOMP
#include "seccomp-util.h"
@@ -44,6 +45,8 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutp
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
+static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_protected_home, protected_home, ProtectedHome);
+
static int property_get_environment_files(
sd_bus *bus,
const char *path,
@@ -626,6 +629,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectedHome", "s", bus_property_get_protected_home, offsetof(ExecContext, protected_home), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadOnlySystem", "b", bus_property_get_bool, offsetof(ExecContext, read_only_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/execute.c b/src/core/execute.c
index af8e7c7..ce8b9bc 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1569,7 +1569,9 @@ int exec_spawn(ExecCommand *command,
!strv_isempty(context->inaccessible_dirs) ||
context->mount_flags != 0 ||
(context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
- context->private_devices) {
+ context->private_devices ||
+ context->read_only_system ||
+ context->protected_home != PROTECTED_HOME_NO) {
char *tmp = NULL, *var = NULL;
@@ -1593,8 +1595,9 @@ int exec_spawn(ExecCommand *command,
tmp,
var,
context->private_devices,
+ context->protected_home,
+ context->read_only_system,
context->mount_flags);
-
if (err < 0) {
r = EXIT_NAMESPACE;
goto fail_child;
@@ -2111,6 +2114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPrivateTmp: %s\n"
"%sPrivateNetwork: %s\n"
"%sPrivateDevices: %s\n"
+ "%sProtectedHome: %s\n"
+ "%sReadOnlySystem: %s\n"
"%sIgnoreSIGPIPE: %s\n",
prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/",
@@ -2119,6 +2124,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->private_tmp),
prefix, yes_no(c->private_network),
prefix, yes_no(c->private_devices),
+ prefix, protected_home_to_string(c->protected_home),
+ prefix, yes_no(c->read_only_system),
prefix, yes_no(c->ignore_sigpipe));
STRV_FOREACH(e, c->environment)
diff --git a/src/core/execute.h b/src/core/execute.h
index c9e29ff..3d6f77c 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -39,6 +39,7 @@ typedef struct ExecRuntime ExecRuntime;
#include "set.h"
#include "fdset.h"
#include "missing.h"
+#include "namespace.h"
typedef enum ExecInput {
EXEC_INPUT_NULL,
@@ -156,6 +157,8 @@ struct ExecContext {
bool private_tmp;
bool private_network;
bool private_devices;
+ bool read_only_system;
+ ProtectedHome protected_home;
bool no_new_privileges;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 4c066a8..97382d4 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -80,6 +80,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0,
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
+$1.ReadOnlySystem, config_parse_bool, 0, offsetof($1, exec_context.read_only_system)
+$1.ProtectedHome, config_parse_protected_home, 0, offsetof($1, exec_context)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context)
$1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality)
$1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 6403e41..9df7808 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -3044,6 +3044,49 @@ int config_parse_no_new_privileges(
return 0;
}
+int config_parse_protected_home(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int k;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Our enum shall be a superset of booleans, hence first try
+ * to parse as as boolean, and then as enum */
+
+ k = parse_boolean(rvalue);
+ if (k > 0)
+ c->protected_home = PROTECTED_HOME_YES;
+ else if (k == 0)
+ c->protected_home = PROTECTED_HOME_NO;
+ else {
+ ProtectedHome h;
+
+ h = protected_home_from_string(rvalue);
+ if (h < 0){
+ log_syntax(unit, LOG_ERR, filename, line, -h, "Failed to parse protected home value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->protected_home = h;
+ }
+
+ return 0;
+}
+
#define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 9ef9caa..279efa9 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -97,6 +97,7 @@ int config_parse_set_status(const char *unit, const char *filename, unsigned lin
int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_protected_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 9f15211..de09e9f 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -331,6 +331,8 @@ int setup_namespace(
char* tmp_dir,
char* var_tmp_dir,
bool private_dev,
+ ProtectedHome protected_home,
+ bool read_only_system,
unsigned mount_flags) {
BindMount *m, *mounts = NULL;
@@ -347,7 +349,9 @@ int setup_namespace(
strv_length(read_write_dirs) +
strv_length(read_only_dirs) +
strv_length(inaccessible_dirs) +
- private_dev;
+ private_dev +
+ (protected_home != PROTECTED_HOME_NO ? 2 : 0) +
+ (read_only_system ? 2 : 0);
if (n > 0) {
m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
@@ -381,6 +385,18 @@ int setup_namespace(
m++;
}
+ if (protected_home != PROTECTED_HOME_NO) {
+ r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (r < 0)
+ return r;
+ }
+
+ if (read_only_system) {
+ r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
+ if (r < 0)
+ return r;
+ }
+
assert(mounts + n == m);
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
@@ -581,3 +597,11 @@ fail:
return r;
}
+
+static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
+ [PROTECTED_HOME_NO] = "no",
+ [PROTECTED_HOME_YES] = "yes",
+ [PROTECTED_HOME_READ_ONLY] = "read-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index fb1fc6e..b985bdf 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -23,12 +23,24 @@
#include <stdbool.h>
+#include "macro.h"
+
+typedef enum ProtectedHome {
+ PROTECTED_HOME_NO,
+ PROTECTED_HOME_YES,
+ PROTECTED_HOME_READ_ONLY,
+ _PROTECTED_HOME_MAX,
+ _PROTECTED_HOME_INVALID = -1
+} ProtectedHome;
+
int setup_namespace(char **read_write_dirs,
char **read_only_dirs,
char **inaccessible_dirs,
char *tmp_dir,
char *var_tmp_dir,
bool private_dev,
+ ProtectedHome protected_home,
+ bool read_only_system,
unsigned mount_flags);
int setup_tmp_dirs(const char *id,
@@ -36,3 +48,6 @@ int setup_tmp_dirs(const char *id,
char **var_tmp_dir);
int setup_netns(int netns_storage_socket[2]);
+
+const char* protected_home_to_string(ProtectedHome p) _const_;
+ProtectedHome protected_home_from_string(const char *s) _pure_;
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index ad0d041..7158193 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -60,6 +60,8 @@ int main(int argc, char *argv[]) {
tmp_dir,
var_tmp_dir,
true,
+ PROTECTED_HOME_NO,
+ false,
0);
if (r < 0) {
log_error("Failed to setup namespace: %s", strerror(-r));
diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in
index 79e22c1..497b8d9 100644
--- a/units/systemd-hostnamed.service.in
+++ b/units/systemd-hostnamed.service.in
@@ -18,3 +18,5 @@ WatchdogSec=1min
PrivateTmp=yes
PrivateDevices=yes
PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
diff --git a/units/systemd-journal-gatewayd.service.in b/units/systemd-journal-gatewayd.service.in
index e8e571e..3695240 100644
--- a/units/systemd-journal-gatewayd.service.in
+++ b/units/systemd-journal-gatewayd.service.in
@@ -17,6 +17,8 @@ SupplementaryGroups=systemd-journal
PrivateTmp=yes
PrivateDevices=yes
PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
[Install]
Also=systemd-journal-gatewayd.socket
diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in
index de93879..ba3f847 100644
--- a/units/systemd-journald.service.in
+++ b/units/systemd-journald.service.in
@@ -20,6 +20,8 @@ RestartSec=0
NotifyAccess=all
StandardOutput=null
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID
+ReadOnlySystem=yes
+ProtectedHome=yes
WatchdogSec=1min
# Increase the default a bit in order to allow many simultaneous
diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in
index ae1c5e5..e1792d6 100644
--- a/units/systemd-localed.service.in
+++ b/units/systemd-localed.service.in
@@ -18,3 +18,5 @@ WatchdogSec=1min
PrivateTmp=yes
PrivateDevices=yes
PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in
index c6cbd1c..68803fb 100644
--- a/units/systemd-logind.service.in
+++ b/units/systemd-logind.service.in
@@ -25,6 +25,8 @@ RestartSec=0
BusName=org.freedesktop.login1
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
WatchdogSec=1min
+ReadOnlySystem=yes
+ProtectedHome=yes
# Increase the default a bit in order to allow many simultaneous
# logins since we keep one fd open per session.
diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in
index 1a27c6e..07522e1 100644
--- a/units/systemd-machined.service.in
+++ b/units/systemd-machined.service.in
@@ -20,3 +20,5 @@ WatchdogSec=1min
PrivateTmp=yes
PrivateDevices=yes
PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in
index 3538295..a928999 100644
--- a/units/systemd-networkd.service.in
+++ b/units/systemd-networkd.service.in
@@ -20,6 +20,8 @@ Restart=always
RestartSec=0
ExecStart=@rootlibexecdir@/systemd-networkd
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
+ReadOnlySystem=yes
+ProtectedHome=yes
WatchdogSec=1min
[Install]
diff --git a/units/systemd-resolved.service.in b/units/systemd-resolved.service.in
index 9d422ca..787fde2 100644
--- a/units/systemd-resolved.service.in
+++ b/units/systemd-resolved.service.in
@@ -16,6 +16,8 @@ Restart=always
RestartSec=0
ExecStart=@rootlibexecdir@/systemd-resolved
CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
+ReadOnlySystem=yes
+ProtectedHome=yes
[Install]
WantedBy=multi-user.target
diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in
index 5c90290..9658149 100644
--- a/units/systemd-timedated.service.in
+++ b/units/systemd-timedated.service.in
@@ -16,3 +16,5 @@ BusName=org.freedesktop.timedate1
CapabilityBoundingSet=CAP_SYS_TIME
WatchdogSec=1min
PrivateTmp=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
diff --git a/units/systemd-timesyncd.service.in b/units/systemd-timesyncd.service.in
index cbde3ff..030e4a0 100644
--- a/units/systemd-timesyncd.service.in
+++ b/units/systemd-timesyncd.service.in
@@ -23,6 +23,8 @@ ExecStart=@rootlibexecdir@/systemd-timesyncd
CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
PrivateTmp=yes
PrivateDevices=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
WatchdogSec=1min
[Install]
diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in
index ddee015..82275f0 100644
--- a/units/systemd-udevd.service.in
+++ b/units/systemd-udevd.service.in
@@ -22,3 +22,5 @@ Restart=always
RestartSec=0
ExecStart=@rootlibexecdir@/systemd-udevd
MountFlags=slave
+ReadOnlySystem=yes
+ProtectedHome=yes
More information about the systemd-commits
mailing list