[systemd-devel] [PATCH v3 2/2] syscallfilter: port to libseccomp

Lennart Poettering lennart at poettering.net
Wed Feb 12 09:32:21 PST 2014


On Wed, 12.02.14 01:29, Ronny Chevalier (chevalier.ronny at gmail.com) wrote:

Commited this one! Thanks!

(I added another commit on top, to add SystemCallErrorNumber= to
optionally return an error from blacklisted syscalls instead of aborting
the process)

> ---
> Ok so I found the problem. I forgot a condition in SET_FOREACH that if e is 0, it will stop the foreach (and read have the id 0)
> 
> Here is the new patch fixing this. Sorry again
> 
>  Makefile.am                    | 30 ++-----------
>  README                         |  1 +
>  TODO                           |  1 -
>  configure.ac                   | 14 ++++++
>  man/systemd.exec.xml           | 18 +++++++-
>  src/core/build.h               |  8 +++-
>  src/core/dbus-execute.c        |  8 +---
>  src/core/execute.c             | 88 ++++++++++++++++++--------------------
>  src/core/execute.h             | 12 +++++-
>  src/core/load-fragment.c       | 96 +++++++++++++++++++++++++++---------------
>  src/shared/.gitignore          |  4 --
>  src/shared/linux/seccomp-bpf.h | 76 ---------------------------------
>  src/shared/linux/seccomp.h     | 47 ---------------------
>  src/shared/syscall-list.c      | 56 ------------------------
>  src/shared/syscall-list.h      | 41 ------------------
>  src/test/test-tables.c         |  3 --
>  16 files changed, 157 insertions(+), 346 deletions(-)
>  delete mode 100644 src/shared/linux/seccomp-bpf.h
>  delete mode 100644 src/shared/linux/seccomp.h
>  delete mode 100644 src/shared/syscall-list.c
>  delete mode 100644 src/shared/syscall-list.h
> 
> diff --git a/Makefile.am b/Makefile.am
> index 6a3fd48..1b978e5 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -653,8 +653,6 @@ noinst_LTLIBRARIES += \
>  libsystemd_shared_la_SOURCES = \
>  	src/shared/linux/auto_dev-ioctl.h \
>  	src/shared/linux/fanotify.h \
> -	src/shared/linux/seccomp.h \
> -	src/shared/linux/seccomp-bpf.h \
>  	src/shared/ioprio.h \
>  	src/shared/missing.h \
>  	src/shared/initreq.h \
> @@ -762,8 +760,6 @@ libsystemd_shared_la_SOURCES = \
>  	src/shared/net-util.h \
>  	src/shared/errno-list.c \
>  	src/shared/errno-list.h \
> -	src/shared/syscall-list.c \
> -	src/shared/syscall-list.h \
>  	src/shared/audit.c \
>  	src/shared/audit.h \
>  	src/shared/xml.c \
> @@ -771,9 +767,7 @@ libsystemd_shared_la_SOURCES = \
>  
>  nodist_libsystemd_shared_la_SOURCES = \
>  	src/shared/errno-from-name.h \
> -	src/shared/errno-to-name.h \
> -	src/shared/syscall-from-name.h \
> -	src/shared/syscall-to-name.h
> +	src/shared/errno-to-name.h
>  
>  # ------------------------------------------------------------------------------
>  noinst_LTLIBRARIES += \
> @@ -998,6 +992,7 @@ libsystemd_core_la_CFLAGS = \
>  	$(PAM_CFLAGS) \
>  	$(AUDIT_CFLAGS) \
>  	$(KMOD_CFLAGS) \
> +	$(SECCOMP_CFLAGS) \
>  	-pthread
>  
>  libsystemd_core_la_LIBADD = \
> @@ -1012,6 +1007,7 @@ libsystemd_core_la_LIBADD = \
>  	$(PAM_LIBS) \
>  	$(AUDIT_LIBS) \
>  	$(CAP_LIBS) \
> +	$(SECCOMP_LIBS) \
>  	$(KMOD_LIBS)
>  
>  src/core/load-fragment-gperf-nulstr.c: src/core/load-fragment-gperf.gperf
> @@ -1025,33 +1021,13 @@ CLEANFILES += \
>  	src/core/load-fragment-gperf.gperf \
>  	src/core/load-fragment-gperf.c \
>  	src/core/load-fragment-gperf-nulstr.c \
> -	src/shared/syscall-list.txt \
> -	src/shared/syscall-from-name.gperf \
>  	src/shared/errno-list.txt \
>  	src/shared/errno-from-name.gperf
>  
>  BUILT_SOURCES += \
> -	src/shared/syscall-from-name.h \
> -	src/shared/syscall-to-name.h \
>  	src/shared/errno-from-name.h \
>  	src/shared/errno-to-name.h
>  
> -src/shared/syscall-list.txt:
> -	$(AM_V_at)$(MKDIR_P) $(dir $@)
> -	$(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include sys/syscall.h - < /dev/null | $(AWK) '/^#define[ \t]+__NR_[^ ]+[ \t]+[0-9(]/ { sub(/__NR_/, "", $$2); if ($$2 !~ /SYSCALL_BASE/) print $$2; }' > $@
> -
> -src/shared/syscall-from-name.gperf: src/shared/syscall-list.txt
> -	$(AM_V_at)$(MKDIR_P) $(dir $@)
> -	$(AM_V_GEN)$(AWK) 'BEGIN{ print "struct syscall_name { const char* name; int id; };"; print "%null-strings"; print "%%";} { printf "%s, __NR_%s\n", $$1, $$1 }' < $< > $@
> -
> -src/shared/syscall-from-name.h: src/shared/syscall-from-name.gperf
> -	$(AM_V_at)$(MKDIR_P) $(dir $@)
> -	$(AM_V_GPERF)$(GPERF) -L ANSI-C -t --ignore-case -N lookup_syscall -H hash_syscall_name -p -C < $< > $@
> -
> -src/shared/syscall-to-name.h: src/shared/syscall-list.txt
> -	$(AM_V_at)$(MKDIR_P) $(dir $@)
> -	$(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const syscall_names[] = { "} { printf "[SYSCALL_TO_INDEX(__NR_%s)] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
> -
>  src/shared/errno-list.txt:
>  	$(AM_V_at)$(MKDIR_P) $(dir $@)
>  	$(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include errno.h - < /dev/null | $(AWK) '/^#define[ \t]+E[^ _]+[ \t]+[0-9]/ { print $$2; }'  > $@
> diff --git a/README b/README
> index 509b45f..a237672 100644
> --- a/README
> +++ b/README
> @@ -92,6 +92,7 @@ REQUIREMENTS:
>  
>          glibc >= 2.14
>          libcap
> +        libseccomp >= 1.0.0 (optional)
>          libblkid >= 2.20 (from util-linux) (optional)
>          libkmod >= 15 (optional)
>          PAM >= 1.1.2 (optional)
> diff --git a/TODO b/TODO
> index fd2cfcb..220b6ee 100644
> --- a/TODO
> +++ b/TODO
> @@ -503,7 +503,6 @@ Features:
>    - syscall filter: add knowledge about compat syscalls
>    - syscall filter: don't enforce no new privs?
>    - syscall filter: option to return EPERM rather than SIGSYS?
> -  - syscall filter: port to libseccomp
>    - system-wide seccomp filter
>  
>  * load-fragment: when loading a unit file via a chain of symlinks
> diff --git a/configure.ac b/configure.ac
> index d92f1fe..940ae9f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -321,6 +321,19 @@ fi
>  AM_CONDITIONAL(HAVE_BLKID, [test "$have_blkid" = "yes"])
>  
>  # ------------------------------------------------------------------------------
> +have_seccomp=no
> +AC_ARG_ENABLE(seccomp, AS_HELP_STRING([--disable-seccomp], [Disable optional SECCOMP support]))
> +if test "x$enable_seccomp" != "xno"; then
> +        PKG_CHECK_MODULES(SECCOMP, [libseccomp >= 1.0.0],
> +               [AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available]) have_seccomp=yes],
> +               [have_seccomp=no])
> +        if test "x$have_seccomp" = "xno" -a "x$enable_seccomp" = "xyes"; then
> +                AC_MSG_ERROR([*** seccomp support requested but libraries not found])
> +        fi
> +fi
> +AM_CONDITIONAL(HAVE_SECCOMP, [test "$have_seccomp" = "yes"])
> +
> +# ------------------------------------------------------------------------------
>  have_ima=yes
>  AC_ARG_ENABLE([ima], AS_HELP_STRING([--disable-ima],[Disable optional IMA support]),
>                  [case "${enableval}" in
> @@ -1088,6 +1101,7 @@ AC_MSG_RESULT([
>          AUDIT:                   ${have_audit}
>          IMA:                     ${have_ima}
>          SELinux:                 ${have_selinux}
> +        SECCOMP:                 ${have_seccomp}
>          SMACK:                   ${have_smack}
>          XZ:                      ${have_xz}
>          ACL:                     ${have_acl}
> diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
> index f4caccd..0c6ca5a 100644
> --- a/man/systemd.exec.xml
> +++ b/man/systemd.exec.xml
> @@ -1029,7 +1029,23 @@
>                                  merged. If the empty string is
>                                  assigned, the filter is reset, all
>                                  prior assignments will have no
> -                                effect.</para></listitem>
> +                                effect.</para>
> +
> +                                <para>If you specify both types of this option
> +                                (i.e. whitelisting and blacklisting) the first
> +                                encountered will take precedence and will
> +                                dictate the default action (termination
> +                                or approval of a system call). Then the
> +                                next occurrences of this option will add or
> +                                delete the listed system calls from the set
> +                                of the filtered system calls, depending of
> +                                its type and the default action (e.g. You
> +                                have started with a whitelisting of <function>
> +                                read</function> and <function>write</function>
> +                                and right after it add a blacklisting of
> +                                <function>write</function>, then <function>
> +                                write</function> will be removed from the set)
> +                                </para></listitem>
>                          </varlistentry>
>  
>                  </variablelist>
> diff --git a/src/core/build.h b/src/core/build.h
> index 4513a0b..f04f03f 100644
> --- a/src/core/build.h
> +++ b/src/core/build.h
> @@ -81,4 +81,10 @@
>  #define _XZ_FEATURE_ "-XZ"
>  #endif
>  
> -#define SYSTEMD_FEATURES _PAM_FEATURE_ " " _LIBWRAP_FEATURE_ " " _AUDIT_FEATURE_ " " _SELINUX_FEATURE_ " " _IMA_FEATURE_ " " _SYSVINIT_FEATURE_ " " _LIBCRYPTSETUP_FEATURE_ " " _GCRYPT_FEATURE_ " " _ACL_FEATURE_ " " _XZ_FEATURE_
> +#ifdef HAVE_SECCOMP
> +#define _SECCOMP_FEATURE_ "+SECCOMP"
> +#else
> +#define _SECCOMP_FEATURE_ "-SECCOMP"
> +#endif
> +
> +#define SYSTEMD_FEATURES _PAM_FEATURE_ " " _LIBWRAP_FEATURE_ " " _AUDIT_FEATURE_ " " _SELINUX_FEATURE_ " " _IMA_FEATURE_ " " _SYSVINIT_FEATURE_ " " _LIBCRYPTSETUP_FEATURE_ " " _GCRYPT_FEATURE_ " " _ACL_FEATURE_ " " _XZ_FEATURE_ _SECCOMP_FEATURE_
> diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
> index db16990..2ed7a3c 100644
> --- a/src/core/dbus-execute.c
> +++ b/src/core/dbus-execute.c
> @@ -25,7 +25,6 @@
>  #include "missing.h"
>  #include "ioprio.h"
>  #include "strv.h"
> -#include "syscall-list.h"
>  #include "fileio.h"
>  #include "execute.h"
>  #include "dbus-execute.h"
> @@ -354,10 +353,7 @@ static int property_get_syscall_filter(
>          assert(reply);
>          assert(c);
>  
> -        if (c->syscall_filter)
> -                return sd_bus_message_append_array(reply, 'u', c->syscall_filter, (syscall_max() + 31) >> 4);
> -        else
> -                return sd_bus_message_append_array(reply, 'u', NULL, 0);
> +        return sd_bus_message_append(reply, "s", c->syscall_filter_string);
>  }
>  
>  const sd_bus_vtable bus_exec_vtable[] = {
> @@ -422,7 +418,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
>          SD_BUS_PROPERTY("SELinuxContext", "s", NULL, offsetof(ExecContext, selinux_context), SD_BUS_VTABLE_PROPERTY_CONST),
>          SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
>          SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST),
> -        SD_BUS_PROPERTY("SystemCallFilter", "au", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
> +        SD_BUS_PROPERTY("SystemCallFilter", "s", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
>          SD_BUS_VTABLE_END
>  };
>  
> diff --git a/src/core/execute.c b/src/core/execute.c
> index b941a02..d2e5b74 100644
> --- a/src/core/execute.c
> +++ b/src/core/execute.c
> @@ -38,9 +38,13 @@
>  #include <linux/fs.h>
>  #include <linux/oom.h>
>  #include <sys/poll.h>
> -#include <linux/seccomp-bpf.h>
>  #include <glob.h>
>  #include <libgen.h>
> +#ifdef HAVE_SECCOMP
> +#include <seccomp.h>
> +
> +#include "set.h"
> +#endif
>  #undef basename
>  
>  #ifdef HAVE_PAM
> @@ -67,7 +71,6 @@
>  #include "utmp-wtmp.h"
>  #include "def.h"
>  #include "path-util.h"
> -#include "syscall-list.h"
>  #include "env-util.h"
>  #include "fileio.h"
>  #include "unit.h"
> @@ -933,57 +936,32 @@ static void rename_process_from_path(const char *path) {
>          rename_process(process_name);
>  }
>  
> -static int apply_seccomp(uint32_t *syscall_filter) {
> -        static const struct sock_filter header[] = {
> -                VALIDATE_ARCHITECTURE,
> -                EXAMINE_SYSCALL
> -        };
> -        static const struct sock_filter footer[] = {
> -                _KILL_PROCESS
> -        };
> -
> -        int i;
> -        unsigned n;
> -        struct sock_filter *f;
> -        struct sock_fprog prog = {};
> -
> -        assert(syscall_filter);
> +#ifdef HAVE_SECCOMP
> +static int apply_seccomp(ExecContext *c) {
> +        uint32_t action = SCMP_ACT_ALLOW;
> +        Iterator i;
> +        void *id;
>  
> -        /* First: count the syscalls to check for */
> -        for (i = 0, n = 0; i < syscall_max(); i++)
> -                if (syscall_filter[i >> 4] & (1 << (i & 31)))
> -                        n++;
> -
> -        /* Second: build the filter program from a header the syscall
> -         * matches and the footer */
> -        f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
> -        memcpy(f, header, sizeof(header));
> -
> -        for (i = 0, n = 0; i < syscall_max(); i++)
> -                if (syscall_filter[i >> 4] & (1 << (i & 31))) {
> -                        struct sock_filter item[] = {
> -                                BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
> -                                BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
> -                        };
> +        assert(c);
>  
> -                        assert_cc(ELEMENTSOF(item) == 2);
> +        c->syscall_filter = seccomp_init(c->syscall_filter_default_action);
> +        if (!c->syscall_filter)
> +                return -1;
>  
> -                        f[ELEMENTSOF(header) + 2*n]  = item[0];
> -                        f[ELEMENTSOF(header) + 2*n+1] = item[1];
> +        if (c->syscall_filter_default_action == SCMP_ACT_ALLOW)
> +                action = SCMP_ACT_KILL;
>  
> -                        n++;
> +        SET_FOREACH(id, c->filtered_syscalls, i) {
> +                int r = seccomp_rule_add(c->syscall_filter, action, PTR_TO_INT(id) - 1, 0);
> +                if (r < 0) {
> +                        log_error("Failed to add syscall filter");
> +                        return r;
>                  }
> +        }
>  
> -        memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
> -
> -        /* Third: install the filter */
> -        prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
> -        prog.filter = f;
> -        if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
> -                return -errno;
> -
> -        return 0;
> +        return seccomp_load(c->syscall_filter);
>  }
> +#endif
>  
>  static void do_idle_pipe_dance(int idle_pipe[4]) {
>          assert(idle_pipe);
> @@ -1562,13 +1540,15 @@ int exec_spawn(ExecCommand *command,
>                                          goto fail_child;
>                                  }
>  
> -                        if (context->syscall_filter) {
> -                                err = apply_seccomp(context->syscall_filter);
> +#ifdef HAVE_SECCOMP
> +                        if (context->filtered_syscalls) {
> +                                err = apply_seccomp(context);
>                                  if (err < 0) {
>                                          r = EXIT_SECCOMP;
>                                          goto fail_child;
>                                  }
>                          }
> +#endif
>  #ifdef HAVE_SELINUX
>                          if (context->selinux_context && use_selinux()) {
>                                  bool ignore;
> @@ -1751,6 +1731,18 @@ void exec_context_done(ExecContext *c) {
>  
>          free(c->syscall_filter);
>          c->syscall_filter = NULL;
> +
> +        free(c->syscall_filter_string);
> +        c->syscall_filter_string = NULL;
> +
> +#ifdef HAVE_SECCOMP
> +        if (c->syscall_filter) {
> +                seccomp_release(c->syscall_filter);
> +                c->syscall_filter = NULL;
> +        }
> +        set_free(c->filtered_syscalls);
> +        c->filtered_syscalls = NULL;
> +#endif
>  }
>  
>  void exec_command_done(ExecCommand *c) {
> diff --git a/src/core/execute.h b/src/core/execute.h
> index be811a9..b2d70d7 100644
> --- a/src/core/execute.h
> +++ b/src/core/execute.h
> @@ -33,6 +33,11 @@ typedef struct ExecRuntime ExecRuntime;
>  #include <stdbool.h>
>  #include <stdio.h>
>  #include <sched.h>
> +#ifdef HAVE_SECCOMP
> +#include <seccomp.h>
> +
> +#include "set.h"
> +#endif
>  
>  #include "list.h"
>  #include "util.h"
> @@ -162,7 +167,12 @@ struct ExecContext {
>           * don't enter a trigger loop. */
>          bool same_pgrp;
>  
> -        uint32_t *syscall_filter;
> +#ifdef HAVE_SECCOMP
> +        scmp_filter_ctx syscall_filter;
> +        Set *filtered_syscalls;
> +        uint32_t syscall_filter_default_action;
> +#endif
> +        char *syscall_filter_string;
>  
>          bool oom_score_adjust_set:1;
>          bool nice_set:1;
> diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
> index 7a2d32d..06ff18b 100644
> --- a/src/core/load-fragment.c
> +++ b/src/core/load-fragment.c
> @@ -33,6 +33,11 @@
>  #include <sys/stat.h>
>  #include <sys/time.h>
>  #include <sys/resource.h>
> +#ifdef HAVE_SECCOMP
> +#include <seccomp.h>
> +
> +#include "set.h"
> +#endif
>  
>  #include "sd-messages.h"
>  #include "unit.h"
> @@ -47,13 +52,12 @@
>  #include "unit-printf.h"
>  #include "utf8.h"
>  #include "path-util.h"
> -#include "syscall-list.h"
>  #include "env-util.h"
>  #include "cgroup.h"
>  #include "bus-util.h"
>  #include "bus-error.h"
>  
> -#ifndef HAVE_SYSV_COMPAT
> +#if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP)
>  int config_parse_warn_compat(const char *unit,
>                               const char *filename,
>                               unsigned line,
> @@ -1916,16 +1920,7 @@ int config_parse_documentation(const char *unit,
>          return r;
>  }
>  
> -static void syscall_set(uint32_t *p, int nr) {
> -        nr = SYSCALL_TO_INDEX(nr);
> -        p[nr >> 4] |= 1 << (nr & 31);
> -}
> -
> -static void syscall_unset(uint32_t *p, int nr) {
> -        nr = SYSCALL_TO_INDEX(nr);
> -        p[nr >> 4] &= ~(1 << (nr & 31));
> -}
> -
> +#ifdef HAVE_SECCOMP
>  int config_parse_syscall_filter(const char *unit,
>                                  const char *filename,
>                                  unsigned line,
> @@ -1936,13 +1931,23 @@ int config_parse_syscall_filter(const char *unit,
>                                  const char *rvalue,
>                                  void *data,
>                                  void *userdata) {
> -
>          ExecContext *c = data;
>          Unit *u = userdata;
>          bool invert = false;
>          char *w;
>          size_t l;
>          char *state;
> +        _cleanup_strv_free_ char **syscalls = strv_new(NULL, NULL);
> +        _cleanup_free_ char *sorted_syscalls = NULL;
> +        uint32_t action = SCMP_ACT_ALLOW;
> +        Iterator i;
> +        void *e;
> +        static char const *default_syscalls[] = {"execve",
> +                                                 "exit",
> +                                                 "exit_group",
> +                                                 "rt_sigreturn",
> +                                                 "sigreturn",
> +                                                 NULL};
>  
>          assert(filename);
>          assert(lvalue);
> @@ -1951,34 +1956,37 @@ int config_parse_syscall_filter(const char *unit,
>  
>          if (isempty(rvalue)) {
>                  /* Empty assignment resets the list */
> -                free(c->syscall_filter);
> -                c->syscall_filter = NULL;
> +                set_free(c->filtered_syscalls);
> +                c->filtered_syscalls= NULL;
> +                free(c->syscall_filter_string);
> +                c->syscall_filter_string = NULL;
>                  return 0;
>          }
>  
>          if (rvalue[0] == '~') {
>                  invert = true;
> +                action = SCMP_ACT_KILL;
>                  rvalue++;
>          }
>  
> -        if (!c->syscall_filter) {
> -                size_t n;
> +        if (!c->filtered_syscalls) {
> +                c->filtered_syscalls = set_new(trivial_hash_func, trivial_compare_func);
> +                if (invert)
> +                        c->syscall_filter_default_action = SCMP_ACT_ALLOW;
> +                else {
> +                        char const **syscall;
>  
> -                n = (syscall_max() + 31) >> 4;
> -                c->syscall_filter = new(uint32_t, n);
> -                if (!c->syscall_filter)
> -                        return log_oom();
> +                        c->syscall_filter_default_action = SCMP_ACT_KILL;
>  
> -                memset(c->syscall_filter, invert ? 0xFF : 0, n * sizeof(uint32_t));
> +                        /* accept default syscalls if we are on a whitelist */
> +                        STRV_FOREACH(syscall, default_syscalls) {
> +                                int id = seccomp_syscall_resolve_name(*syscall);
> +                                if (id < 0)
> +                                        continue;
>  
> -                /* Add these by default */
> -                syscall_set(c->syscall_filter, __NR_execve);
> -                syscall_set(c->syscall_filter, __NR_rt_sigreturn);
> -#ifdef __NR_sigreturn
> -                syscall_set(c->syscall_filter, __NR_sigreturn);
> -#endif
> -                syscall_set(c->syscall_filter, __NR_exit_group);
> -                syscall_set(c->syscall_filter, __NR_exit);
> +                                set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
> +                        }
> +                }
>          }
>  
>          FOREACH_WORD_QUOTED(w, l, rvalue, state) {
> @@ -1989,23 +1997,39 @@ int config_parse_syscall_filter(const char *unit,
>                  if (!t)
>                          return log_oom();
>  
> -                id = syscall_from_name(t);
> +                id = seccomp_syscall_resolve_name(t);
>                  if (id < 0)  {
>                          log_syntax(unit, LOG_ERR, filename, line, EINVAL,
>                                     "Failed to parse syscall, ignoring: %s", t);
>                          continue;
>                  }
>  
> -                if (invert)
> -                        syscall_unset(c->syscall_filter, id);
> +                /* If we previously wanted to forbid a syscall
> +                 * and now we want to allow it, then remove it from the list
> +                 * libseccomp will also return -EPERM if we try to add
> +                 * a rule with the same action as the default
> +                 */
> +                if (action == c->syscall_filter_default_action)
> +                        set_remove(c->filtered_syscalls, INT_TO_PTR(id + 1));
>                  else
> -                        syscall_set(c->syscall_filter, id);
> +                        set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
> +        }
> +
> +        SET_FOREACH(e, c->filtered_syscalls, i) {
> +                char *name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(e) - 1);
> +                strv_push(&syscalls, name);
>          }
>  
> +        sorted_syscalls = strv_join(strv_sort(syscalls), " ");
> +        if (invert)
> +                c->syscall_filter_string = strv_join(STRV_MAKE("~", sorted_syscalls, NULL), "");
> +        else
> +                c->syscall_filter_string = strdup(sorted_syscalls);
>          c->no_new_privileges = true;
>  
>          return 0;
>  }
> +#endif
>  
>  int config_parse_unit_slice(
>                  const char *unit,
> @@ -2778,7 +2802,11 @@ void unit_dump_config_items(FILE *f) {
>                  { config_parse_set_status,            "STATUS" },
>                  { config_parse_service_sockets,       "SOCKETS" },
>                  { config_parse_environ,               "ENVIRON" },
> +#ifdef HAVE_SECCOMP
>                  { config_parse_syscall_filter,        "SYSCALL" },
> +#else
> +                { config_parse_warn_compat,           "NOTSUPPORTED" },
> +#endif
>                  { config_parse_cpu_shares,            "SHARES" },
>                  { config_parse_memory_limit,          "LIMIT" },
>                  { config_parse_device_allow,          "DEVICE" },
> diff --git a/src/shared/.gitignore b/src/shared/.gitignore
> index 3820d19..c9b5f81 100644
> --- a/src/shared/.gitignore
> +++ b/src/shared/.gitignore
> @@ -1,7 +1,3 @@
> -/syscall-from-name.gperf
> -/syscall-from-name.h
> -/syscall-list.txt
> -/syscall-to-name.h
>  /errno-from-name.gperf
>  /errno-from-name.h
>  /errno-list.txt
> diff --git a/src/shared/linux/seccomp-bpf.h b/src/shared/linux/seccomp-bpf.h
> deleted file mode 100644
> index 1e3d136..0000000
> --- a/src/shared/linux/seccomp-bpf.h
> +++ /dev/null
> @@ -1,76 +0,0 @@
> -/*
> - * seccomp example for x86 (32-bit and 64-bit) with BPF macros
> - *
> - * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev at chromium.org>
> - * Authors:
> - *  Will Drewry <wad at chromium.org>
> - *  Kees Cook <keescook at chromium.org>
> - *
> - * The code may be used by anyone for any purpose, and can serve as a
> - * starting point for developing applications using mode 2 seccomp.
> - */
> -#ifndef _SECCOMP_BPF_H_
> -#define _SECCOMP_BPF_H_
> -
> -#include <stdio.h>
> -#include <stddef.h>
> -#include <stdlib.h>
> -#include <errno.h>
> -#include <signal.h>
> -#include <string.h>
> -#include <unistd.h>
> -
> -#include <sys/prctl.h>
> -
> -#include <linux/unistd.h>
> -#include <linux/audit.h>
> -#include <linux/filter.h>
> -#include <linux/seccomp.h>
> -
> -#ifndef SECCOMP_MODE_FILTER
> -# define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
> -# define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
> -# define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
> -# define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
> -struct seccomp_data {
> -    int nr;
> -    __u32 arch;
> -    __u64 instruction_pointer;
> -    __u64 args[6];
> -};
> -#endif
> -#ifndef SYS_SECCOMP
> -# define SYS_SECCOMP 1
> -#endif
> -
> -#define syscall_nr (offsetof(struct seccomp_data, nr))
> -#define arch_nr (offsetof(struct seccomp_data, arch))
> -
> -#if defined(__i386__)
> -# define REG_SYSCALL	REG_EAX
> -# define ARCH_NR	AUDIT_ARCH_I386
> -#elif defined(__x86_64__)
> -# define REG_SYSCALL	REG_RAX
> -# define ARCH_NR	AUDIT_ARCH_X86_64
> -#else
> -# warning "Platform does not support seccomp filter yet"
> -# define REG_SYSCALL	0
> -# define ARCH_NR	0
> -#endif
> -
> -#define VALIDATE_ARCHITECTURE \
> -	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr), \
> -	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), \
> -	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
> -
> -#define EXAMINE_SYSCALL \
> -	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr)
> -
> -#define ALLOW_SYSCALL(name) \
> -	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
> -	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
> -
> -#define _KILL_PROCESS \
> -	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
> -
> -#endif /* _SECCOMP_BPF_H_ */
> diff --git a/src/shared/linux/seccomp.h b/src/shared/linux/seccomp.h
> deleted file mode 100644
> index 9c03683..0000000
> --- a/src/shared/linux/seccomp.h
> +++ /dev/null
> @@ -1,47 +0,0 @@
> -#ifndef _LINUX_SECCOMP_H
> -#define _LINUX_SECCOMP_H
> -
> -
> -#include <linux/types.h>
> -
> -
> -/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */
> -#define SECCOMP_MODE_DISABLED	0 /* seccomp is not in use. */
> -#define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
> -#define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
> -
> -/*
> - * All BPF programs must return a 32-bit value.
> - * The bottom 16-bits are for optional return data.
> - * The upper 16-bits are ordered from least permissive values to most.
> - *
> - * The ordering ensures that a min_t() over composed return values always
> - * selects the least permissive choice.
> - */
> -#define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
> -#define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
> -#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
> -#define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
> -#define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
> -
> -/* Masks for the return value sections. */
> -#define SECCOMP_RET_ACTION	0x7fff0000U
> -#define SECCOMP_RET_DATA	0x0000ffffU
> -
> -/**
> - * struct seccomp_data - the format the BPF program executes over.
> - * @nr: the system call number
> - * @arch: indicates system call convention as an AUDIT_ARCH_* value
> - *        as defined in <linux/audit.h>.
> - * @instruction_pointer: at the time of the system call.
> - * @args: up to 6 system call arguments always stored as 64-bit values
> - *        regardless of the architecture.
> - */
> -struct seccomp_data {
> -	int nr;
> -	__u32 arch;
> -	__u64 instruction_pointer;
> -	__u64 args[6];
> -};
> -
> -#endif /* _LINUX_SECCOMP_H */
> diff --git a/src/shared/syscall-list.c b/src/shared/syscall-list.c
> deleted file mode 100644
> index dc84dca..0000000
> --- a/src/shared/syscall-list.c
> +++ /dev/null
> @@ -1,56 +0,0 @@
> -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
> -
> -/***
> -  This file is part of systemd.
> -
> -  Copyright 2012 Lennart Poettering
> -
> -  systemd is free software; you can redistribute it and/or modify it
> -  under the terms of the GNU Lesser General Public License as published by
> -  the Free Software Foundation; either version 2.1 of the License, or
> -  (at your option) any later version.
> -
> -  systemd is distributed in the hope that it will be useful, but
> -  WITHOUT ANY WARRANTY; without even the implied warranty of
> -  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> -  Lesser General Public License for more details.
> -
> -  You should have received a copy of the GNU Lesser General Public License
> -  along with systemd; If not, see <http://www.gnu.org/licenses/>.
> -***/
> -
> -#include <sys/syscall.h>
> -#include <string.h>
> -
> -#include "util.h"
> -#include "syscall-list.h"
> -
> -static const struct syscall_name* lookup_syscall(register const char *str,
> -                                                 register unsigned int len);
> -
> -#include "syscall-to-name.h"
> -#include "syscall-from-name.h"
> -
> -const char *syscall_to_name(int id) {
> -        id = SYSCALL_TO_INDEX(id);
> -        if (id < 0 || id >= (int) ELEMENTSOF(syscall_names))
> -                return NULL;
> -
> -        return syscall_names[id];
> -}
> -
> -int syscall_from_name(const char *name) {
> -        const struct syscall_name *sc;
> -
> -        assert(name);
> -
> -        sc = lookup_syscall(name, strlen(name));
> -        if (!sc)
> -                return -1;
> -
> -        return sc->id;
> -}
> -
> -int syscall_max(void) {
> -        return ELEMENTSOF(syscall_names);
> -}
> diff --git a/src/shared/syscall-list.h b/src/shared/syscall-list.h
> deleted file mode 100644
> index 37efc56..0000000
> --- a/src/shared/syscall-list.h
> +++ /dev/null
> @@ -1,41 +0,0 @@
> -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
> -
> -#pragma once
> -
> -/***
> -  This file is part of systemd.
> -
> -  Copyright 2012 Lennart Poettering
> -
> -  systemd is free software; you can redistribute it and/or modify it
> -  under the terms of the GNU Lesser General Public License as published by
> -  the Free Software Foundation; either version 2.1 of the License, or
> -  (at your option) any later version.
> -
> -  systemd is distributed in the hope that it will be useful, but
> -  WITHOUT ANY WARRANTY; without even the implied warranty of
> -  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> -  Lesser General Public License for more details.
> -
> -  You should have received a copy of the GNU Lesser General Public License
> -  along with systemd; If not, see <http://www.gnu.org/licenses/>.
> -***/
> -
> -#if defined __x86_64__ && defined __ILP32__
> -/* The x32 ABI defines all of its syscalls with bit 30 set, which causes
> -   issues when attempting to use syscalls as simple indices into an array.
> -   Instead, use the syscall id & ~SYSCALL_MASK as the index, and | the
> -   internal id with the syscall mask as needed.
> -*/
> -#include <asm/unistd.h>
> -#define SYSCALL_TO_INDEX(x) ((x) & ~__X32_SYSCALL_BIT)
> -#define INDEX_TO_SYSCALL(x) ((x) | __X32_SYSCALL_BIT)
> -#else
> -#define SYSCALL_TO_INDEX(x) (x)
> -#define INDEX_TO_SYSCALL(x) (x)
> -#endif
> -
> -const char *syscall_to_name(int id);
> -int syscall_from_name(const char *name);
> -
> -int syscall_max(void);
> diff --git a/src/test/test-tables.c b/src/test/test-tables.c
> index 3b7800c..dff6431 100644
> --- a/src/test/test-tables.c
> +++ b/src/test/test-tables.c
> @@ -43,7 +43,6 @@
>  #include "unit-name.h"
>  #include "unit.h"
>  #include "util.h"
> -#include "syscall-list.h"
>  
>  #include "test-tables.h"
>  
> @@ -99,7 +98,5 @@ int main(int argc, char **argv) {
>          test_table(unit_load_state, UNIT_LOAD_STATE);
>          test_table(unit_type, UNIT_TYPE);
>  
> -        _test_table("syscall", syscall_to_name, syscall_from_name, syscall_max(), true);
> -
>          return EXIT_SUCCESS;
>  }


Lennart

-- 
Lennart Poettering, Red Hat


More information about the systemd-devel mailing list