[systemd-commits] 6 commits - Makefile.am TODO man/systemd.exec.xml shell-completion/Makefile shell-completion/bash shell-completion/zsh src/core src/nspawn src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Tue Feb 25 17:29:18 PST 2014


 Makefile.am                           |   30 +++++-
 TODO                                  |    4 
 man/systemd.exec.xml                  |  107 ++++++++++++++++-----
 shell-completion/Makefile             |    1 
 shell-completion/bash/Makefile        |    1 
 shell-completion/zsh/Makefile         |    1 
 src/core/dbus-execute.c               |   50 ++++++++++
 src/core/execute.c                    |  166 ++++++++++++++++++++++++++++++----
 src/core/execute.h                    |    3 
 src/core/load-fragment-gperf.gperf.m4 |    6 -
 src/core/load-fragment.c              |   78 +++++++++++++++
 src/core/load-fragment.h              |    1 
 src/core/unit.c                       |    8 +
 src/nspawn/nspawn.c                   |    2 
 src/shared/.gitignore                 |    4 
 src/shared/af-list.c                  |   58 +++++++++++
 src/shared/af-list.h                  |   27 +++++
 src/shared/exit-status.c              |    3 
 src/shared/exit-status.h              |    3 
 19 files changed, 501 insertions(+), 52 deletions(-)

New commits:
commit f513e420c8b1a1d4c13092cd378f048b69793497
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Feb 26 02:28:52 2014 +0100

    exec: imply NoNewPriviliges= only when seccomp filters are used in user mode

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 413d81d..9224f1e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1010,8 +1010,8 @@
                         <varlistentry>
                                 <term><varname>SystemCallFilter=</varname></term>
 
-                                <listitem><para>Takes a space-separated
-                                list of system call
+                                <listitem><para>Takes a
+                                space-separated list of system call
                                 names. If this setting is used, all
                                 system calls executed by the unit
                                 processes except for the listed ones
@@ -1023,12 +1023,13 @@
                                 the effect is inverted: only the
                                 listed system calls will result in
                                 immediate process termination
-                                (blacklisting). If this option is used,
+                                (blacklisting). If running in user
+                                mode and this option is used,
                                 <varname>NoNewPrivileges=yes</varname>
-                                is implied. This feature makes use of
-                                the Secure Computing Mode 2 interfaces
-                                of the kernel ('seccomp filtering')
-                                and is useful for enforcing a minimal
+                                is implied. This feature makes use of the
+                                Secure Computing Mode 2 interfaces of
+                                the kernel ('seccomp filtering') and
+                                is useful for enforcing a minimal
                                 sandboxing environment. Note that the
                                 <function>execve</function>,
                                 <function>rt_sigreturn</function>,
@@ -1096,28 +1097,31 @@
                                 <constant>x86</constant>,
                                 <constant>x86-64</constant>,
                                 <constant>x32</constant>,
-                                <constant>arm</constant> as well as the
-                                special identifier
-                                <constant>native</constant>. Only system
-                                calls of the specified architectures
-                                will be permitted to processes of this
-                                unit. This is an effective way to
-                                disable compatibility with non-native
-                                architectures for processes, for
-                                example to prohibit execution of
-                                32-bit x86 binaries on 64-bit x86-64
-                                systems. The special
+                                <constant>arm</constant> as well as
+                                the special identifier
+                                <constant>native</constant>. Only
+                                system calls of the specified
+                                architectures will be permitted to
+                                processes of this unit. This is an
+                                effective way to disable compatibility
+                                with non-native architectures for
+                                processes, for example to prohibit
+                                execution of 32-bit x86 binaries on
+                                64-bit x86-64 systems. The special
                                 <constant>native</constant> identifier
                                 implicitly maps to the native
                                 architecture of the system (or more
                                 strictly: to the architecture the
-                                system manager is compiled for). Note
-                                that setting this option to a
-                                non-empty list implies that
-                                <constant>native</constant> is included
-                                too. By default, this option is set to
-                                the empty list, i.e. no architecture
-                                system call filtering is
+                                system manager is compiled for). If
+                                running in user mode and this option
+                                is used,
+                                <varname>NoNewPrivileges=yes</varname>
+                                is implied. Note that setting this
+                                option to a non-empty list implies
+                                that <constant>native</constant> is
+                                included too. By default, this option
+                                is set to the empty list, i.e. no
+                                architecture system call filtering is
                                 applied.</para></listitem>
                         </varlistentry>
 
@@ -1149,7 +1153,10 @@
                                 sockets only) are unaffected. Note
                                 that this option has no effect on
                                 32bit x86 and is ignored (but works
-                                correctly on x86-64). By default no
+                                correctly on x86-64). If running in user
+                                mode and this option is used,
+                                <varname>NoNewPrivileges=yes</varname>
+                                is implied. By default no
                                 restriction applies, all address
                                 families are accessible to
                                 processes. If assigned the empty
diff --git a/src/core/execute.c b/src/core/execute.c
index fff25c2..9de6e87 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1706,7 +1706,8 @@ int exec_spawn(ExecCommand *command,
                                 }
 
 #ifdef HAVE_SECCOMP
-                        if (context->address_families) {
+                        if (context->address_families_whitelist ||
+                            !set_isempty(context->address_families)) {
                                 err = apply_address_families(context);
                                 if (err < 0) {
                                         r = EXIT_ADDRESS_FAMILIES;
@@ -1714,7 +1715,9 @@ int exec_spawn(ExecCommand *command,
                                 }
                         }
 
-                        if (context->syscall_filter || context->syscall_archs) {
+                        if (context->syscall_whitelist ||
+                            !set_isempty(context->syscall_filter) ||
+                            !set_isempty(context->syscall_archs)) {
                                 err = apply_seccomp(context);
                                 if (err < 0) {
                                         r = EXIT_SECCOMP;
diff --git a/src/core/unit.c b/src/core/unit.c
index 9d54147..0547073 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2817,6 +2817,14 @@ int unit_exec_context_patch_defaults(Unit *u, ExecContext *c) {
                         return r;
         }
 
+        if (u->manager->running_as == SYSTEMD_USER &&
+            (c->syscall_whitelist ||
+             !set_isempty(c->syscall_filter) ||
+             !set_isempty(c->syscall_archs) ||
+             c->address_families_whitelist ||
+             !set_isempty(c->address_families)))
+                c->no_new_privileges = true;
+
         return 0;
 }
 

commit 9c423fbf2a11bf9c936017c0f1e06ea2e4e82a40
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Feb 26 02:19:17 2014 +0100

    update TODO

diff --git a/TODO b/TODO
index d8a3a3b..6cac3e2 100644
--- a/TODO
+++ b/TODO
@@ -74,10 +74,6 @@ Features:
   "systemctl status" without args to output this state along with a
   selection of other data, such as the uptime or so.
 
-* Add a seccomp-based filter for socket() calls to limit services to
-  specific address families (for example: AF_UNIX), inspired by
-  Android's sandboxing
-
 * implement Distribute= in socket units to allow running multiple
   service instances processing the listening socket, and open this up
   for ReusePort=

commit 538b08707ab7d34fac5b8c2753d3bf9ac12c2ebf
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Feb 26 02:16:46 2014 +0100

    build-sys: add missing makefile symlinks

diff --git a/shell-completion/Makefile b/shell-completion/Makefile
new file mode 120000
index 0000000..bd10475
--- /dev/null
+++ b/shell-completion/Makefile
@@ -0,0 +1 @@
+../src/Makefile
\ No newline at end of file
diff --git a/shell-completion/bash/Makefile b/shell-completion/bash/Makefile
new file mode 120000
index 0000000..d0b0e8e
--- /dev/null
+++ b/shell-completion/bash/Makefile
@@ -0,0 +1 @@
+../Makefile
\ No newline at end of file
diff --git a/shell-completion/zsh/Makefile b/shell-completion/zsh/Makefile
new file mode 120000
index 0000000..d0b0e8e
--- /dev/null
+++ b/shell-completion/zsh/Makefile
@@ -0,0 +1 @@
+../Makefile
\ No newline at end of file

commit 4298d0b5128326621c8f537107c4c8b459490721
Author: Lennart Poettering <lennart at poettering.net>
Date:   Tue Feb 25 20:37:03 2014 +0100

    core: add new RestrictAddressFamilies= switch
    
    This new unit settings allows restricting which address families are
    available to processes. This is an effective way to minimize the attack
    surface of services, by turning off entire network stacks for them.
    
    This is based on seccomp, and does not work on x86-32, since seccomp
    cannot filter socketcall() syscalls on that platform.

diff --git a/Makefile.am b/Makefile.am
index 529b525..dd067f6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -764,6 +764,8 @@ libsystemd_shared_la_SOURCES = \
 	src/shared/net-util.h \
 	src/shared/errno-list.c \
 	src/shared/errno-list.h \
+	src/shared/af-list.c \
+	src/shared/af-list.h \
 	src/shared/audit.c \
 	src/shared/audit.h \
 	src/shared/xml.c \
@@ -775,7 +777,9 @@ libsystemd_shared_la_SOURCES = \
 
 nodist_libsystemd_shared_la_SOURCES = \
 	src/shared/errno-from-name.h \
-	src/shared/errno-to-name.h
+	src/shared/errno-to-name.h \
+	src/shared/af-from-name.h \
+	src/shared/af-to-name.h
 
 libsystemd_shared_la_CFLAGS = \
 	$(AM_CFLAGS) \
@@ -1059,11 +1063,15 @@ CLEANFILES += \
 	src/core/load-fragment-gperf.c \
 	src/core/load-fragment-gperf-nulstr.c \
 	src/shared/errno-list.txt \
-	src/shared/errno-from-name.gperf
+	src/shared/errno-from-name.gperf \
+	src/shared/af-list.txt \
+	src/shared/af-from-name.gperf
 
 BUILT_SOURCES += \
 	src/shared/errno-from-name.h \
-	src/shared/errno-to-name.h
+	src/shared/errno-to-name.h \
+	src/shared/af-from-name.h \
+	src/shared/af-to-name.h
 
 src/shared/errno-list.txt:
 	$(AM_V_at)$(MKDIR_P) $(dir $@)
@@ -1081,6 +1089,22 @@ src/shared/errno-to-name.h: src/shared/errno-list.txt
 	$(AM_V_at)$(MKDIR_P) $(dir $@)
 	$(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const errno_names[] = { "} { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
 
+src/shared/af-list.txt:
+	$(AM_V_at)$(MKDIR_P) $(dir $@)
+	$(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include sys/socket.h - < /dev/null | grep -v AF_UNSPEC | grep -v AF_MAX | $(AWK) '/^#define[ \t]+AF_[^ \t]+[ \t]+PF_[^ \t]/ { print $$2; }'  > $@
+
+src/shared/af-from-name.gperf: src/shared/af-list.txt
+	$(AM_V_at)$(MKDIR_P) $(dir $@)
+	$(AM_V_GEN)$(AWK) 'BEGIN{ print "struct af_name { const char* name; int id; };"; print "%null-strings"; print "%%";} { printf "%s, %s\n", $$1, $$1 }' < $< > $@
+
+src/shared/af-from-name.h: src/shared/af-from-name.gperf
+	$(AM_V_at)$(MKDIR_P) $(dir $@)
+	$(AM_V_GPERF)$(GPERF) -L ANSI-C -t --ignore-case -N lookup_af -H hash_af_name -p -C < $< > $@
+
+src/shared/af-to-name.h: src/shared/af-list.txt
+	$(AM_V_at)$(MKDIR_P) $(dir $@)
+	$(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const af_names[] = { "} !/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
+
 # ------------------------------------------------------------------------------
 systemd_SOURCES = \
 	src/core/main.c
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 1983993..413d81d 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1122,6 +1122,55 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>RestrictAddressFamilies=</varname></term>
+
+                                <listitem><para>Restricts the set of
+                                socket address families accessible to
+                                the processes of this unit. Takes a
+                                space-separated list of address family
+                                names to whitelist, such as
+                                <constant>AF_UNIX</constant>,
+                                <constant>AF_INET</constant> or
+                                <constant>AF_INET6</constant>. When
+                                prefixed with <constant>~</constant>
+                                the listed address families will be
+                                applied as blacklist, otherwise as
+                                whitelist. Note that this restricts
+                                access to the
+                                <citerefentry><refentrytitle>socket</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                system call only. Sockets passed into
+                                the process by other means (for
+                                example, by using socket activation
+                                with socket units, see
+                                <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>)
+                                are unaffected. Also, sockets created
+                                with <function>socketpair()</function>
+                                (which creates connected AF_UNIX
+                                sockets only) are unaffected. Note
+                                that this option has no effect on
+                                32bit x86 and is ignored (but works
+                                correctly on x86-64). By default no
+                                restriction applies, all address
+                                families are accessible to
+                                processes. If assigned the empty
+                                string any previous list changes are
+                                undone.</para>
+
+                                <para>Use this option to limit
+                                exposure of processes to remote
+                                systems, in particular via exotic
+                                network protocols. Note that in most
+                                cases the local
+                                <constant>AF_UNIX</constant> address
+                                family should be included in the
+                                configured whitelist as it is
+                                frequently used for local
+                                communication, including for
+                                <citerefentry><refentrytitle>syslog</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                logging.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Personality=</varname></term>
 
                                 <listitem><para>Controls which
@@ -1138,6 +1187,7 @@
                                 host system's
                                 kernel.</para></listitem>
                         </varlistentry>
+
                 </variablelist>
         </refsect1>
 
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 935c62b..02e2a6d 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -34,6 +34,7 @@
 #include "dbus-execute.h"
 #include "capability.h"
 #include "env-util.h"
+#include "af-list.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -518,6 +519,54 @@ static int property_get_personality(
         return sd_bus_message_append(reply, "s", personality_to_string(c->personality));
 }
 
+static int property_get_address_families(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        _cleanup_strv_free_ char **l = NULL;
+        Iterator i;
+        void *af;
+        int r;
+
+        assert(bus);
+        assert(reply);
+        assert(c);
+
+        r = sd_bus_message_open_container(reply, 'r', "bas");
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(reply, "b", c->address_families_whitelist);
+        if (r < 0)
+                return r;
+
+        SET_FOREACH(af, c->address_families, i) {
+                const char *name;
+
+                name = af_to_name(PTR_TO_INT(af));
+                if (!name)
+                        continue;
+
+                r = strv_extend(&l, name);
+                if (r < 0)
+                        return -ENOMEM;
+        }
+
+        strv_sort(l);
+
+        r = sd_bus_message_append_strv(reply, l);
+        if (r < 0)
+                return r;
+
+        return sd_bus_message_close_container(reply);
+}
+
 const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -585,6 +634,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_VTABLE_END
 };
 
diff --git a/src/core/execute.c b/src/core/execute.c
index aeddd2e..fff25c2 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -81,6 +81,7 @@
 #include "async.h"
 #include "selinux-util.h"
 #include "errno-list.h"
+#include "af-list.h"
 #include "apparmor-util.h"
 
 #ifdef HAVE_SECCOMP
@@ -994,9 +995,130 @@ static int apply_seccomp(ExecContext *c) {
 
 finish:
         seccomp_release(seccomp);
+        return r;
+}
+
+static int apply_address_families(ExecContext *c) {
+        scmp_filter_ctx *seccomp;
+        Iterator i;
+        int r;
+
+        assert(c);
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        if (c->address_families_whitelist) {
+                int af, first = 0, last = 0;
+                void *afp;
+
+                /* If this is a whitelist, we first block the address
+                 * families that are out of range and then everything
+                 * that is not in the set. First, we find the lowest
+                 * and highest address family in the set. */
+
+                SET_FOREACH(afp, c->address_families, i) {
+                        af = PTR_TO_INT(afp);
 
+                        if (af <= 0 || af >= af_max())
+                                continue;
+
+                        if (first == 0 || af < first)
+                                first = af;
+
+                        if (last == 0 || af > last)
+                                last = af;
+                }
+
+                assert((first == 0) == (last == 0));
+
+                if (first == 0) {
+
+                        /* No entries in the valid range, block everything */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        0);
+                        if (r < 0)
+                                goto finish;
+
+                } else {
+
+                        /* Block everything below the first entry */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_LT, first));
+                        if (r < 0)
+                                goto finish;
+
+                        /* Block everything above the last entry */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_GT, last));
+                        if (r < 0)
+                                goto finish;
+
+                        /* Block everything between the first and last
+                         * entry */
+                        for (af = 1; af < af_max(); af++) {
+
+                                if (set_contains(c->address_families, INT_TO_PTR(af)))
+                                        continue;
+
+                                r = seccomp_rule_add(
+                                                seccomp,
+                                                SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                                SCMP_SYS(socket),
+                                                1,
+                                                SCMP_A0(SCMP_CMP_EQ, af));
+                                if (r < 0)
+                                        goto finish;
+                        }
+                }
+
+        } else {
+                void *af;
+
+                /* If this is a blacklist, then generate one rule for
+                 * each address family that are then combined in OR
+                 * checks. */
+
+                SET_FOREACH(af, c->address_families, i) {
+
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+                        if (r < 0)
+                                goto finish;
+                }
+        }
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
         return r;
 }
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -1584,6 +1706,14 @@ int exec_spawn(ExecCommand *command,
                                 }
 
 #ifdef HAVE_SECCOMP
+                        if (context->address_families) {
+                                err = apply_address_families(context);
+                                if (err < 0) {
+                                        r = EXIT_ADDRESS_FAMILIES;
+                                        goto fail_child;
+                                }
+                        }
+
                         if (context->syscall_filter || context->syscall_archs) {
                                 err = apply_seccomp(context);
                                 if (err < 0) {
@@ -1777,13 +1907,14 @@ void exec_context_done(ExecContext *c) {
         free(c->apparmor_profile);
         c->apparmor_profile = NULL;
 
-#ifdef HAVE_SECCOMP
         set_free(c->syscall_filter);
         c->syscall_filter = NULL;
 
         set_free(c->syscall_archs);
         c->syscall_archs = NULL;
-#endif
+
+        set_free(c->address_families);
+        c->address_families = NULL;
 }
 
 void exec_command_done(ExecCommand *c) {
diff --git a/src/core/execute.h b/src/core/execute.h
index 2bfe227..3c905ce 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -178,6 +178,9 @@ struct ExecContext {
         int syscall_errno;
         bool syscall_whitelist:1;
 
+        Set *address_families;
+        bool address_families_whitelist:1;
+
         bool oom_score_adjust_set:1;
         bool nice_set:1;
         bool ioprio_set:1;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 26146b1..beff290 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -52,10 +52,12 @@ $1.NoNewPrivileges,              config_parse_bool,                  0,
 m4_ifdef(`HAVE_SECCOMP',
 `$1.SystemCallFilter,            config_parse_syscall_filter,        0,                             offsetof($1, exec_context)
 $1.SystemCallArchitectures,      config_parse_syscall_archs,         0,                             offsetof($1, exec_context.syscall_archs)
-$1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,                             offsetof($1, exec_context)',
+$1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,                             offsetof($1, exec_context)
+$1.RestrictAddressFamilies,      config_parse_address_families,      0,                             offsetof($1, exec_context)',
 `$1.SystemCallFilter,            config_parse_warn_compat,           0,                             0
 $1.SystemCallArchitectures,      config_parse_warn_compat,           0,                             0
-$1.SystemCallErrorNumber,        config_parse_warn_compat,           0,                             0')
+$1.SystemCallErrorNumber,        config_parse_warn_compat,           0,                             0
+$1.RestrictAddressFamilies,      config_parse_warn_compat,           0,                             0')
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
 $1.LimitDATA,                    config_parse_limit,                 RLIMIT_DATA,                   offsetof($1, exec_context.rlimit)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 82aed1e..478d22c 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -56,6 +56,7 @@
 #include "bus-util.h"
 #include "bus-error.h"
 #include "errno-list.h"
+#include "af-list.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -2216,6 +2217,81 @@ int config_parse_syscall_errno(
         c->syscall_errno = e;
         return 0;
 }
+
+int config_parse_address_families(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        ExecContext *c = data;
+        Unit *u = userdata;
+        bool invert = false;
+        char *w, *state;
+        size_t l;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(u);
+
+        if (isempty(rvalue)) {
+                /* Empty assignment resets the list */
+                set_free(c->address_families);
+                c->address_families = NULL;
+                c->address_families_whitelist = false;
+                return 0;
+        }
+
+        if (rvalue[0] == '~') {
+                invert = true;
+                rvalue++;
+        }
+
+        if (!c->address_families) {
+                c->address_families = set_new(trivial_hash_func, trivial_compare_func);
+                if (!c->address_families)
+                        return log_oom();
+
+                c->address_families_whitelist = !invert;
+        }
+
+        FOREACH_WORD_QUOTED(w, l, rvalue, state) {
+                _cleanup_free_ char *t = NULL;
+                int af;
+
+                t = strndup(w, l);
+                if (!t)
+                        return log_oom();
+
+                af = af_from_name(t);
+                if (af <= 0)  {
+                        log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse address family, ignoring: %s", t);
+                        continue;
+                }
+
+                /* If we previously wanted to forbid an address family and now
+                 * we want to allow it, then remove it from the list
+                 */
+                if (!invert == c->address_families_whitelist)  {
+                        r = set_put(c->address_families, INT_TO_PTR(af));
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return log_oom();
+                } else
+                        set_remove(c->address_families, INT_TO_PTR(af));
+        }
+
+        return 0;
+}
 #endif
 
 int config_parse_unit_slice(
@@ -3024,6 +3100,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_syscall_filter,        "SYSCALLS" },
                 { config_parse_syscall_archs,         "ARCHS" },
                 { config_parse_syscall_errno,         "ERRNO" },
+                { config_parse_address_families,      "FAMILIES" },
 #endif
                 { config_parse_cpu_shares,            "SHARES" },
                 { config_parse_memory_limit,          "LIMIT" },
@@ -3039,6 +3116,7 @@ void unit_dump_config_items(FILE *f) {
 #endif
                 { config_parse_job_mode,              "MODE" },
                 { config_parse_job_mode_isolate,      "BOOLEAN" },
+                { config_parse_personality,           "PERSONALITY" },
         };
 
         const char *prev = NULL;
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 4a5ec35..c5dbe61 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -90,6 +90,7 @@ int config_parse_job_mode_isolate(const char *unit, const char *filename, unsign
 int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_apparmor_profile(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_address_families(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/shared/.gitignore b/src/shared/.gitignore
index c9b5f81..9f4ec9f 100644
--- a/src/shared/.gitignore
+++ b/src/shared/.gitignore
@@ -2,3 +2,7 @@
 /errno-from-name.h
 /errno-list.txt
 /errno-to-name.h
+/af-from-name.gperf
+/af-from-name.h
+/af-list.txt
+/af-to-name.h
diff --git a/src/shared/af-list.c b/src/shared/af-list.c
new file mode 100644
index 0000000..f396115
--- /dev/null
+++ b/src/shared/af-list.c
@@ -0,0 +1,58 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/socket.h>
+#include <string.h>
+
+#include "util.h"
+#include "af-list.h"
+
+static const struct af_name* lookup_af(register const char *str, register unsigned int len);
+
+#include "af-to-name.h"
+#include "af-from-name.h"
+
+const char *af_to_name(int id) {
+
+        if (id <= 0)
+                return NULL;
+
+        if (id >= (int) ELEMENTSOF(af_names))
+                return NULL;
+
+        return af_names[id];
+}
+
+int af_from_name(const char *name) {
+        const struct af_name *sc;
+
+        assert(name);
+
+        sc = lookup_af(name, strlen(name));
+        if (!sc)
+                return AF_UNSPEC;
+
+        return sc->id;
+}
+
+int af_max(void) {
+        return ELEMENTSOF(af_names);
+}
diff --git a/src/shared/af-list.h b/src/shared/af-list.h
new file mode 100644
index 0000000..e346ab8
--- /dev/null
+++ b/src/shared/af-list.h
@@ -0,0 +1,27 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+const char *af_to_name(int id);
+int af_from_name(const char *name);
+
+int af_max(void);
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
index 902f55a..c1b04a3 100644
--- a/src/shared/exit-status.c
+++ b/src/shared/exit-status.c
@@ -139,6 +139,9 @@ const char* exit_status_to_string(ExitStatus status, ExitStatusLevel level) {
 
                 case EXIT_APPARMOR_PROFILE:
                         return "APPARMOR";
+
+                case EXIT_ADDRESS_FAMILIES:
+                        return "ADDRESS_FAMILIES";
                 }
         }
 
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
index de379f1..e7f1203 100644
--- a/src/shared/exit-status.h
+++ b/src/shared/exit-status.h
@@ -70,7 +70,8 @@ typedef enum ExitStatus {
         EXIT_SECCOMP,
         EXIT_SELINUX_CONTEXT,
         EXIT_PERSONALITY,  /* 230 */
-        EXIT_APPARMOR_PROFILE
+        EXIT_APPARMOR_PROFILE,
+        EXIT_ADDRESS_FAMILIES,
 } ExitStatus;
 
 typedef enum ExitStatusLevel {

commit 9875fd7875d433eea5c6e3319916e1be18722086
Author: Lennart Poettering <lennart at poettering.net>
Date:   Tue Feb 25 20:33:17 2014 +0100

    nspawn: no need for duplicate checks against EEXIST

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 1a02935..1fe641b 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -1738,7 +1738,7 @@ static int audit_still_doesnt_work_in_containers(void) {
                 return log_oom();
 
         r = seccomp_add_secondary_archs(seccomp);
-        if (r < 0 && r != -EEXIST) {
+        if (r < 0) {
                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
                 goto finish;
         }

commit 7c66bae2ff5cb674612f84637cb98f9478ed26a8
Author: Lennart Poettering <lennart at poettering.net>
Date:   Tue Feb 25 20:32:27 2014 +0100

    seccomp: we should control NO_NEW_PRIVS on our own, not let seccomp do this for us

diff --git a/src/core/execute.c b/src/core/execute.c
index a328fc2..aeddd2e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -969,30 +969,30 @@ static int apply_seccomp(ExecContext *c) {
                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
                         if (r == -EEXIST)
                                 continue;
-                        if (r < 0) {
-                                seccomp_release(seccomp);
-                                return r;
-                        }
+                        if (r < 0)
+                                goto finish;
                 }
-        } else {
 
+        } else {
                 r = seccomp_add_secondary_archs(seccomp);
-                if (r < 0) {
-                        seccomp_release(seccomp);
-                        return r;
-                }
+                if (r < 0)
+                        goto finish;
         }
 
         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
         SET_FOREACH(id, c->syscall_filter, i) {
                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
-                if (r < 0) {
-                        seccomp_release(seccomp);
-                        return r;
-                }
+                if (r < 0)
+                        goto finish;
         }
 
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
         r = seccomp_load(seccomp);
+
+finish:
         seccomp_release(seccomp);
 
         return r;



More information about the systemd-commits mailing list