[systemd-devel] [PATCH] core: reuse the same /tmp, /var/tmp and inaccessible dir

Lennart Poettering lennart at poettering.net
Fri Mar 1 06:16:06 PST 2013


On Wed, 20.02.13 14:50, Michal Sekletar (msekleta at redhat.com) wrote:

> All Execs within the service, will get mounted the same /tmp and /var/tmp
> directories, if service is configured with PrivateTmp=yes. Temporary
> directories are cleaned up by service itself, rather than relying on
> systemd-tmpfiles. Same logic applies also to inaccessible directories.

Hmm, looks good in principle, but I am don't grok why we need
ExecContext.bind_mounts? Can you elaborate?

Thanks,

Lennart

> ---
>  man/systemd.exec.xml |   4 +-
>  src/core/execute.c   |  78 ++++++++++++--
>  src/core/execute.h   |  18 +++-
>  src/core/manager.c   |   6 ++
>  src/core/manager.h   |   2 +
>  src/core/mount.c     |  29 ++++-
>  src/core/namespace.c | 291 ++++++++++++++++++++++++++++++---------------------
>  src/core/namespace.h |  26 +++--
>  src/core/service.c   |  41 +++++++-
>  src/core/socket.c    |  31 +++++-
>  src/core/swap.c      |  28 ++++-
>  src/test/test-ns.c   |  18 +++-
>  12 files changed, 423 insertions(+), 149 deletions(-)
> 
> diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
> index 9c31baf..b1cd685 100644
> --- a/man/systemd.exec.xml
> +++ b/man/systemd.exec.xml
> @@ -1107,7 +1107,9 @@
>                                  processes via
>                                  <filename>/tmp</filename> or
>                                  <filename>/var/tmp</filename>
> -                                impossible. Defaults to
> +                                impossible. All temporary data created
> +                                by service will be removed after service
> +                                is stopped. Defaults to
>                                  false.</para></listitem>
>                          </varlistentry>
>  
> diff --git a/src/core/execute.c b/src/core/execute.c
> index b28962a..fabc0cd 100644
> --- a/src/core/execute.c
> +++ b/src/core/execute.c
> @@ -40,6 +40,7 @@
>  #include <sys/poll.h>
>  #include <linux/seccomp-bpf.h>
>  #include <glob.h>
> +#include <execinfo.h>
>  
>  #ifdef HAVE_PAM
>  #include <security/pam_appl.h>
> @@ -165,6 +166,21 @@ void exec_context_tty_reset(const ExecContext *context) {
>                  vt_disallocate(context->tty_path);
>  }
>  
> +void exec_context_tmp_dirs_serialize(const ExecContext *context, Unit *u, FILE *f) {
> +        assert(context);
> +        assert(u);
> +        assert(f);
> +
> +        if (context->tmp_dir)
> +                unit_serialize_item(u, f, "tmp-dir", context->tmp_dir);
> +
> +        if (context->var_tmp_dir)
> +                unit_serialize_item(u, f, "var-tmp-dir", context->var_tmp_dir);
> +
> +        if (context->inaccessible_dir)
> +                unit_serialize_item(u, f, "inaccessible-dir", context->inaccessible_dir);
> +}
> +
>  static int open_null_as(int flags, int nfd) {
>          int fd, r;
>  
> @@ -960,7 +976,7 @@ static int apply_seccomp(uint32_t *syscall_filter) {
>  
>  int exec_spawn(ExecCommand *command,
>                 char **argv,
> -               const ExecContext *context,
> +               ExecContext *context,
>                 int fds[], unsigned n_fds,
>                 char **environment,
>                 bool apply_permissions,
> @@ -1028,6 +1044,10 @@ int exec_spawn(ExecCommand *command,
>  
>          cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
>  
> +        r = setup_tmpdirs(context);
> +        if (r < 0)
> +                return r;
> +
>          pid = fork();
>          if (pid < 0)
>                  return -errno;
> @@ -1289,13 +1309,8 @@ int exec_spawn(ExecCommand *command,
>                  if (strv_length(context->read_write_dirs) > 0 ||
>                      strv_length(context->read_only_dirs) > 0 ||
>                      strv_length(context->inaccessible_dirs) > 0 ||
> -                    context->mount_flags != 0 ||
> -                    context->private_tmp) {
> -                        err = setup_namespace(context->read_write_dirs,
> -                                              context->read_only_dirs,
> -                                              context->inaccessible_dirs,
> -                                              context->private_tmp,
> -                                              context->mount_flags);
> +                    context->mount_flags != 0 ) {
> +                        err = setup_namespace(context);
>                          if (err < 0) {
>                                  r = EXIT_NAMESPACE;
>                                  goto fail_child;
> @@ -1522,7 +1537,46 @@ void exec_context_init(ExecContext *c) {
>          c->timer_slack_nsec = (nsec_t) -1;
>  }
>  
> -void exec_context_done(ExecContext *c) {
> +static void exec_context_tmp_dirs_done(ExecContext *c) {
> +        assert(c);
> +
> +        if (c->tmp_dir) {
> +                rm_rf_dangerous(c->tmp_dir, false, true, false);
> +                free(c->tmp_dir);
> +                c->tmp_dir = NULL;
> +        }
> +
> +        if (c->var_tmp_dir) {
> +                rm_rf_dangerous(c->var_tmp_dir, false, true, false);
> +                free(c->var_tmp_dir);
> +                c->var_tmp_dir = NULL;
> +        }
> +
> +        if (c->inaccessible_dir) {
> +                rm_rf_dangerous(c->inaccessible_dir, false, true, false);
> +                free(c->inaccessible_dir);
> +                c->inaccessible_dir = NULL;
> +                c->need_inaccessible = false;
> +        }
> +}
> +
> +static void exec_context_bind_mounts_done(ExecContext *c) {
> +        assert(c);
> +
> +        if (c->bind_mounts) {
> +                set_free_free(c->bind_mounts);
> +                c->bind_mounts = NULL;
> +        }
> +}
> +
> +void exec_context_tmp_dirs_cleanup(ExecContext *context) {
> +        assert(context);
> +
> +        exec_context_bind_mounts_done(context);
> +        exec_context_tmp_dirs_done(context);
> +}
> +
> +void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) {
>          unsigned l;
>  
>          assert(c);
> @@ -1586,6 +1640,10 @@ void exec_context_done(ExecContext *c) {
>  
>          free(c->syscall_filter);
>          c->syscall_filter = NULL;
> +
> +        exec_context_bind_mounts_done(c);
> +        if (!reloading_or_reexecuting)
> +                exec_context_tmp_dirs_done(c);
>  }
>  
>  void exec_command_done(ExecCommand *c) {
> @@ -2134,4 +2192,4 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
>          [EXEC_OUTPUT_SOCKET] = "socket"
>  };
>  
> -DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
> +DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
> \ No newline at end of file
> diff --git a/src/core/execute.h b/src/core/execute.h
> index 2bcd2e1..a4cfe10 100644
> --- a/src/core/execute.h
> +++ b/src/core/execute.h
> @@ -36,8 +36,12 @@ typedef struct ExecContext ExecContext;
>  struct CGroupBonding;
>  struct CGroupAttribute;
>  
> +typedef struct Unit Unit;
> +
>  #include "list.h"
> +#include "set.h"
>  #include "util.h"
> +#include "namespace.h"
>  
>  typedef enum ExecInput {
>          EXEC_INPUT_NULL,
> @@ -141,6 +145,11 @@ struct ExecContext {
>          bool non_blocking;
>          bool private_tmp;
>          bool private_network;
> +        char *tmp_dir;
> +        char *var_tmp_dir;
> +        char *inaccessible_dir;
> +        bool need_inaccessible;
> +        Set *bind_mounts;
>  
>          bool no_new_privileges;
>  
> @@ -164,7 +173,7 @@ struct ExecContext {
>  
>  int exec_spawn(ExecCommand *command,
>                 char **argv,
> -               const ExecContext *context,
> +               ExecContext *context,
>                 int fds[], unsigned n_fds,
>                 char **environment,
>                 bool apply_permissions,
> @@ -192,12 +201,15 @@ void exec_command_append_list(ExecCommand **l, ExecCommand *e);
>  int exec_command_set(ExecCommand *c, const char *path, ...);
>  
>  void exec_context_init(ExecContext *c);
> -void exec_context_done(ExecContext *c);
> +void exec_context_done(ExecContext *c, bool reloading_or_reexecuting);
> +void exec_context_tmp_dirs_cleanup(ExecContext *c);
>  void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
>  void exec_context_tty_reset(const ExecContext *context);
>  
>  int exec_context_load_environment(const ExecContext *c, char ***l);
>  
> +void exec_context_tmp_dirs_serialize(const ExecContext *c, Unit *u, FILE *f);
> +
>  void exec_status_start(ExecStatus *s, pid_t pid);
>  void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status);
>  void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix);
> @@ -206,4 +218,4 @@ const char* exec_output_to_string(ExecOutput i);
>  ExecOutput exec_output_from_string(const char *s);
>  
>  const char* exec_input_to_string(ExecInput i);
> -ExecInput exec_input_from_string(const char *s);
> +ExecInput exec_input_from_string(const char *s);
> \ No newline at end of file
> diff --git a/src/core/manager.c b/src/core/manager.c
> index 28f169d..9a9cd00 100644
> --- a/src/core/manager.c
> +++ b/src/core/manager.c
> @@ -2169,6 +2169,12 @@ bool manager_is_booting_or_shutting_down(Manager *m) {
>          return false;
>  }
>  
> +bool manager_is_reloading_or_reexecuting(Manager *m) {
> +        assert(m);
> +
> +        return m->n_reloading != 0;
> +}
> +
>  void manager_reset_failed(Manager *m) {
>          Unit *u;
>          Iterator i;
> diff --git a/src/core/manager.h b/src/core/manager.h
> index cc4edf8..524cf4d 100644
> --- a/src/core/manager.h
> +++ b/src/core/manager.h
> @@ -84,6 +84,7 @@ struct Watch {
>  #include "set.h"
>  #include "dbus.h"
>  #include "path-lookup.h"
> +#include "execute.h"
>  
>  struct Manager {
>          /* Note that the set of units we know of is allowed to be
> @@ -277,6 +278,7 @@ int manager_distribute_fds(Manager *m, FDSet *fds);
>  int manager_reload(Manager *m);
>  
>  bool manager_is_booting_or_shutting_down(Manager *m);
> +bool manager_is_reloading_or_reexecuting(Manager *m);
>  
>  void manager_reset_failed(Manager *m);
>  
> diff --git a/src/core/mount.c b/src/core/mount.c
> index e3d298e..b46a578 100644
> --- a/src/core/mount.c
> +++ b/src/core/mount.c
> @@ -25,6 +25,7 @@
>  #include <sys/epoll.h>
>  #include <signal.h>
>  
> +#include "manager.h"
>  #include "unit.h"
>  #include "mount.h"
>  #include "load-fragment.h"
> @@ -126,7 +127,7 @@ static void mount_done(Unit *u) {
>          mount_parameters_done(&m->parameters_proc_self_mountinfo);
>          mount_parameters_done(&m->parameters_fragment);
>  
> -        exec_context_done(&m->exec_context);
> +        exec_context_done(&m->exec_context, manager_is_reloading_or_reexecuting(u->manager));
>          exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
>          m->control_command = NULL;
>  
> @@ -863,6 +864,7 @@ static void mount_enter_dead(Mount *m, MountResult f) {
>          if (f != MOUNT_SUCCESS)
>                  m->result = f;
>  
> +        exec_context_tmp_dirs_cleanup(&m->exec_context);
>          mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
>  }
>  
> @@ -1156,6 +1158,8 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
>          if (m->control_command_id >= 0)
>                  unit_serialize_item(u, f, "control-command", mount_exec_command_to_string(m->control_command_id));
>  
> +        exec_context_tmp_dirs_serialize(&m->exec_context, UNIT(m), f);
> +
>          return 0;
>  }
>  
> @@ -1212,7 +1216,30 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F
>                          m->control_command_id = id;
>                          m->control_command = m->exec_command + id;
>                  }
> +        } else if (streq(key, "tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                m->exec_context.tmp_dir = t;
> +        } else if (streq(key, "var-tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                m->exec_context.var_tmp_dir = t;
> +        } else if (streq(key, "inaccessible-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
>  
> +                m->exec_context.inaccessible_dir = t;
>          } else
>                  log_debug_unit(UNIT(m)->id,
>                                 "Unknown serialization key '%s'", key);
> diff --git a/src/core/namespace.c b/src/core/namespace.c
> index ba18ddc..b3dcebf 100644
> --- a/src/core/namespace.c
> +++ b/src/core/namespace.c
> @@ -36,23 +36,10 @@
>  #include "path-util.h"
>  #include "namespace.h"
>  #include "missing.h"
> +#include "execute.h"
>  
> -typedef enum PathMode {
> -        /* This is ordered by priority! */
> -        INACCESSIBLE,
> -        READONLY,
> -        PRIVATE_TMP,
> -        PRIVATE_VAR_TMP,
> -        READWRITE
> -} PathMode;
> -
> -typedef struct Path {
> -        const char *path;
> -        PathMode mode;
> -        bool done;
> -} Path;
> -
> -static int append_paths(Path **p, char **strv, PathMode mode) {
> +
> +static int append_mounts(BindMount **p, char **strv, MountMode mode) {
>          char **i;
>  
>          STRV_FOREACH(i, strv) {
> @@ -68,8 +55,8 @@ static int append_paths(Path **p, char **strv, PathMode mode) {
>          return 0;
>  }
>  
> -static int path_compare(const void *a, const void *b) {
> -        const Path *p = a, *q = b;
> +static int mount_path_compare(const void *a, const void *b) {
> +        const BindMount *p = a, *q = b;
>  
>          if (path_equal(p->path, q->path)) {
>  
> @@ -93,14 +80,14 @@ static int path_compare(const void *a, const void *b) {
>          return 0;
>  }
>  
> -static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) {
> -        Path *f, *t, *previous;
> +static void drop_duplicates(BindMount *m, unsigned *n, bool *need_inaccessible){
> +        BindMount *f, *t, *previous;
>  
> -        assert(p);
> +        assert(m);
>          assert(n);
>          assert(need_inaccessible);
>  
> -        for (f = p, t = p, previous = NULL; f < p+*n; f++) {
> +        for (f = m, t = m, previous = NULL; f < m+*n; f++) {
>  
>                  /* The first one wins */
>                  if (previous && path_equal(f->path, previous->path))
> @@ -117,11 +104,11 @@ static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) {
>                  t++;
>          }
>  
> -        *n = t - p;
> +        *n = t - m;
>  }
>  
>  static int apply_mount(
> -                Path *p,
> +                BindMount *m,
>                  const char *tmp_dir,
>                  const char *var_tmp_dir,
>                  const char *inaccessible_dir) {
> @@ -129,9 +116,9 @@ static int apply_mount(
>          const char *what;
>          int r;
>  
> -        assert(p);
> +        assert(m);
>  
> -        switch (p->mode) {
> +        switch (m->mode) {
>  
>          case INACCESSIBLE:
>                  what = inaccessible_dir;
> @@ -139,7 +126,7 @@ static int apply_mount(
>  
>          case READONLY:
>          case READWRITE:
> -                what = p->path;
> +                what = m->path;
>                  break;
>  
>          case PRIVATE_TMP:
> @@ -156,129 +143,201 @@ static int apply_mount(
>  
>          assert(what);
>  
> -        r = mount(what, p->path, NULL, MS_BIND|MS_REC, NULL);
> +        r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
>          if (r >= 0)
> -                log_debug("Successfully mounted %s to %s", what, p->path);
> +                log_debug("Successfully mounted %s to %s", what, m->path);
>  
>          return r;
>  }
>  
> -static int make_read_only(Path *p) {
> +static int make_read_only(BindMount *m) {
>          int r;
>  
> -        assert(p);
> +        assert(m);
>  
> -        if (p->mode != INACCESSIBLE && p->mode != READONLY)
> +        if (m->mode != INACCESSIBLE && m->mode != READONLY)
>                  return 0;
>  
> -        r = mount(NULL, p->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
> +        r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
>          if (r < 0)
>                  return -errno;
>  
>          return 0;
>  }
>  
> -int setup_namespace(
> -                char **writable,
> -                char **readable,
> -                char **inaccessible,
> -                bool private_tmp,
> -                unsigned long flags) {
> +static int create_tmp_dir(char template[], mode_t mask, bool need_sticky, char** dir_name) {
> +        int r = 0;
> +        char *d = NULL;
> +        bool remove = false;
> +        mode_t u;
>  
> -        char
> -                tmp_dir[] = "/tmp/systemd-private-XXXXXX",
> -                var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX",
> -                inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX";
> +        assert(dir_name);
>  
> -        Path *paths, *p;
> -        unsigned n;
> -        bool need_inaccessible = false;
> -        bool remove_tmp = false, remove_var_tmp = false, remove_inaccessible = false;
> -        int r;
> +        u = umask(mask);
> +        d = mkdtemp(template);
> +        umask(u);
> +        if (!d) {
> +                r = -errno;
> +                log_debug("Can't create directory");
> +                goto fail;
> +        }
>  
> -        if (!flags)
> -                flags = MS_SHARED;
> +        remove = true;
>  
> -        n =
> -                strv_length(writable) +
> -                strv_length(readable) +
> -                strv_length(inaccessible) +
> -                (private_tmp ? 2 : 0);
> +        log_debug("Created temporary directory : %s", template);
>  
> -        p = paths = alloca(sizeof(Path) * n);
> -        if ((r = append_paths(&p, writable, READWRITE)) < 0 ||
> -            (r = append_paths(&p, readable, READONLY)) < 0 ||
> -            (r = append_paths(&p, inaccessible, INACCESSIBLE)) < 0)
> +        d = strdup(template);
> +        if (!d) {
> +                r = log_oom();
>                  goto fail;
> +        }
>  
> -        if (private_tmp) {
> -                p->path = "/tmp";
> -                p->mode = PRIVATE_TMP;
> -                p++;
> -
> -                p->path = "/var/tmp";
> -                p->mode = PRIVATE_VAR_TMP;
> -                p++;
> +        if (need_sticky) {
> +                r = chmod(template, 0777 | S_ISVTX);
> +                if (r < 0) {
> +                        r = -errno;
> +                        goto fail;
> +                }
> +                log_debug("Setting sticky bit on : %s", template);
>          }
>  
> -        assert(paths + n == p);
> +        *dir_name = d;
>  
> -        qsort(paths, n, sizeof(Path), path_compare);
> -        drop_duplicates(paths, &n, &need_inaccessible);
> +        return 0;
> +fail:
> +        if (remove)
> +                rmdir(template);
> +        return r;
> +}
>  
> -        if (need_inaccessible) {
> -                mode_t u;
> -                char *d;
> +static int setup_bind_mounts(ExecContext *context) {
> +        int r = 0;
> +        BindMount *m, *p, *mounts;
> +        bool need_inaccessible = false;
> +        unsigned n = strv_length(context->read_write_dirs) +
> +                strv_length(context->read_only_dirs) +
> +                strv_length(context->inaccessible_dirs) +
> +                (context->private_tmp ? 2 : 0);
>  
> -                u = umask(0777);
> -                d = mkdtemp(inaccessible_dir);
> -                umask(u);
> +        assert(context);
>  
> -                if (!d) {
> -                        r = -errno;
> -                        goto fail;
> -                }
> +        /* Bind mounts are already initialized, nothing to do here */
> +        if (context->bind_mounts)
> +                return 0;
>  
> -                remove_inaccessible = true;
> +        context->bind_mounts = set_new(trivial_hash_func, trivial_compare_func);
> +        if (!context->bind_mounts) {
> +                 return log_oom();
>          }
>  
> -        if (private_tmp) {
> -                mode_t u;
> -                char *d;
> +        m = mounts = alloca(n * sizeof(BindMount));
> +        if ((r = append_mounts(&m, context->read_write_dirs, READWRITE)) < 0 ||
> +                (r = append_mounts(&m, context->read_only_dirs, READONLY)) < 0 ||
> +                (r = append_mounts(&m, context->inaccessible_dirs, INACCESSIBLE)) < 0)
> +                goto fail;
>  
> -                u = umask(0000);
> -                d = mkdtemp(tmp_dir);
> -                umask(u);
> +        if (context->private_tmp) {
> +                m->path = "/tmp";
> +                m->mode = PRIVATE_TMP;
> +                m++;
>  
> -                if (!d) {
> -                        r = -errno;
> -                        goto fail;
> -                }
> +                m->path = "/var/tmp";
> +                m->mode = PRIVATE_VAR_TMP;
> +                m++;
> +        }
>  
> -                remove_tmp = true;
> +        assert(mounts + n == m);
>  
> -                u = umask(0000);
> -                d = mkdtemp(var_tmp_dir);
> -                umask(u);
> +        qsort(mounts, n, sizeof(BindMount), mount_path_compare);
> +        drop_duplicates(mounts, &n, &need_inaccessible);
> +        context->need_inaccessible = need_inaccessible;
>  
> -                if (!d) {
> -                        r = -errno;
> +        if (context->need_inaccessible && !context->inaccessible_dir) {
> +                char inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX";
> +
> +                r = create_tmp_dir(inaccessible_dir, 0777, false, &context->inaccessible_dir);
> +                if (r < 0)
>                          goto fail;
> -                }
> +        }
>  
> -                remove_var_tmp = true;
> +        for (p = mounts; p < mounts + n; p++) {
> +                BindMount *q;
>  
> -                if (chmod(tmp_dir, 0777 + S_ISVTX) < 0) {
> -                        r = -errno;
> +                q = new0(BindMount, 1);
> +                if (!q) {
> +                        r = log_oom();
>                          goto fail;
>                  }
>  
> -                if (chmod(var_tmp_dir, 0777 + S_ISVTX) < 0) {
> -                        r = -errno;
> +                memcpy(q, p, sizeof(BindMount));
> +                r = set_put(context->bind_mounts, q);
> +                if (r < 0) {
> +                        r = log_oom();
>                          goto fail;
>                  }
>          }
>  
> +        return 0;
> +
> +fail:
> +        set_free_free(context->bind_mounts);
> +        context->bind_mounts = NULL;
> +        return r;
> +}
> +
> +int setup_tmpdirs(ExecContext *context) {
> +        int r = 0;
> +        bool remove_tmp_dir = false, remove_var_tmp_dir = false;
> +        char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
> +             var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
> +
> +        assert(context);
> +
> +        if (context->private_tmp) {
> +                if (!context->tmp_dir) {
> +                        r = create_tmp_dir(tmp_dir, 0000, true, &context->tmp_dir);
> +                        if (r < 0)
> +                                goto fail;
> +                        remove_tmp_dir = true;
> +                }
> +
> +                if (!context->var_tmp_dir) {
> +                        r = create_tmp_dir(var_tmp_dir, 0000, true, &context->var_tmp_dir);
> +                        if (r < 0)
> +                                goto fail;
> +                        remove_var_tmp_dir = true;
> +                }
> +        }
> +
> +        r = setup_bind_mounts(context);
> +        if (r < 0)
> +                goto fail;
> +
> +        return 0;
> +fail:
> +        if (remove_tmp_dir) {
> +                rmdir(context->tmp_dir);
> +                free(context->tmp_dir);
> +                context->tmp_dir = NULL;
> +        }
> +
> +        if (remove_var_tmp_dir) {
> +                rmdir(context->var_tmp_dir);
> +                free(context->var_tmp_dir);
> +                context->var_tmp_dir = NULL;
> +        }
> +
> +        return r;
> +}
> +
> +int setup_namespace(ExecContext *context) {
> +        BindMount *m;
> +        Iterator i;
> +        int r = 0;
> +
> +        if (!context->mount_flags)
> +                context->mount_flags = MS_SHARED;
> +
>          if (unshare(CLONE_NEWNS) < 0) {
>                  r = -errno;
>                  goto fail;
> @@ -291,20 +350,20 @@ int setup_namespace(
>                  goto fail;
>          }
>  
> -        for (p = paths; p < paths + n; p++) {
> -                r = apply_mount(p, tmp_dir, var_tmp_dir, inaccessible_dir);
> +        SET_FOREACH(m, context->bind_mounts, i) {
> +                r = apply_mount(m, context->tmp_dir, context->var_tmp_dir, context->inaccessible_dir);
>                  if (r < 0)
>                          goto undo_mounts;
>          }
>  
> -        for (p = paths; p < paths + n; p++) {
> -                r = make_read_only(p);
> +        SET_FOREACH(m, context->bind_mounts, i) {
> +                r = make_read_only(m);
>                  if (r < 0)
>                          goto undo_mounts;
>          }
>  
>          /* Remount / as the desired mode */
> -        if (mount(NULL, "/", NULL, flags|MS_REC, NULL) < 0) {
> +        if (mount(NULL, "/", NULL, context->mount_flags | MS_REC, NULL) < 0) {
>                  r = -errno;
>                  goto undo_mounts;
>          }
> @@ -312,19 +371,11 @@ int setup_namespace(
>          return 0;
>  
>  undo_mounts:
> -        for (p = paths; p < paths + n; p++)
> -                if (p->done)
> -                        umount2(p->path, MNT_DETACH);
> +        SET_FOREACH(m, context->bind_mounts, i) {
> +                if (m->done)
> +                        umount2(m->path, MNT_DETACH);
> +        }
>  
>  fail:
> -        if (remove_inaccessible)
> -                rmdir(inaccessible_dir);
> -
> -        if (remove_tmp)
> -                rmdir(tmp_dir);
> -
> -        if (remove_var_tmp)
> -                rmdir(var_tmp_dir);
> -
>          return r;
>  }
> diff --git a/src/core/namespace.h b/src/core/namespace.h
> index 5d72ed9..f8eed3d 100644
> --- a/src/core/namespace.h
> +++ b/src/core/namespace.h
> @@ -20,12 +20,24 @@
>    You should have received a copy of the GNU Lesser General Public License
>    along with systemd; If not, see <http://www.gnu.org/licenses/>.
>  ***/
> -
>  #include <stdbool.h>
>  
> -int setup_namespace(
> -                char **writable,
> -                char **readable,
> -                char **inaccessible,
> -                bool private_tmp,
> -                unsigned long flags);
> +typedef struct ExecContext ExecContext;
> +
> +typedef enum MountMode {
> +        /* This is ordered by priority! */
> +        INACCESSIBLE,
> +        READONLY,
> +        PRIVATE_TMP,
> +        PRIVATE_VAR_TMP,
> +        READWRITE
> +} MountMode;
> +
> +typedef struct BindMount {
> +        const char *path;
> +        MountMode mode;
> +        bool done;
> +} BindMount;
> +
> +int setup_tmpdirs(ExecContext *context);
> +int setup_namespace(ExecContext *context);
> diff --git a/src/core/service.c b/src/core/service.c
> index c510736..7f25652 100644
> --- a/src/core/service.c
> +++ b/src/core/service.c
> @@ -283,7 +283,7 @@ static void service_done(Unit *u) {
>          free(s->status_text);
>          s->status_text = NULL;
>  
> -        exec_context_done(&s->exec_context);
> +        exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager));
>          exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
>          s->control_command = NULL;
>          s->main_command = NULL;
> @@ -1905,6 +1905,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
>  
>          s->forbid_restart = false;
>  
> +        /* we want fresh tmpdirs in case service is started again immediately */
> +        exec_context_tmp_dirs_cleanup(&s->exec_context);
> +
>          return;
>  
>  fail:
> @@ -2521,6 +2524,7 @@ static int service_stop(Unit *u) {
>                 s->state == SERVICE_EXITED);
>  
>          service_enter_stop(s, SERVICE_SUCCESS);
> +
>          return 0;
>  }
>  
> @@ -2593,6 +2597,15 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
>          if (dual_timestamp_is_set(&s->watchdog_timestamp))
>                  dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp);
>  
> +        if (s->exec_context.tmp_dir)
> +                unit_serialize_item(u, f, "tmp-dir", s->exec_context.tmp_dir);
> +
> +        if (s->exec_context.var_tmp_dir)
> +                unit_serialize_item(u, f, "var-tmp-dir", s->exec_context.var_tmp_dir);
> +
> +        if (s->exec_context.inaccessible_dir)
> +                unit_serialize_item(u, f, "inaccessible-dir", s->exec_context.inaccessible_dir);
> +
>          return 0;
>  }
>  
> @@ -2705,7 +2718,31 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
>                  dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp);
>          else if (streq(key, "watchdog-timestamp"))
>                  dual_timestamp_deserialize(value, &s->watchdog_timestamp);
> -        else
> +        else if (streq(key, "tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.tmp_dir = t;
> +        } else if (streq(key, "var-tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.var_tmp_dir = t;
> +        } else if (streq(key, "inaccessible-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.inaccessible_dir = t;
> +        } else
>                  log_debug_unit(u->id, "Unknown serialization key '%s'", key);
>  
>          return 0;
> diff --git a/src/core/socket.c b/src/core/socket.c
> index 2105369..32f67da 100644
> --- a/src/core/socket.c
> +++ b/src/core/socket.c
> @@ -127,7 +127,7 @@ static void socket_done(Unit *u) {
>  
>          socket_free_ports(s);
>  
> -        exec_context_done(&s->exec_context);
> +        exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager));
>          exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
>          s->control_command = NULL;
>  
> @@ -1253,6 +1253,7 @@ static void socket_enter_dead(Socket *s, SocketResult f) {
>          if (f != SOCKET_SUCCESS)
>                  s->result = f;
>  
> +        exec_context_tmp_dirs_cleanup(&s->exec_context);
>          socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
>  }
>  
> @@ -1742,6 +1743,8 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
>                  }
>          }
>  
> +        exec_context_tmp_dirs_serialize(&s->exec_context, UNIT(s), f);
> +
>          return 0;
>  }
>  
> @@ -1901,10 +1904,32 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
>                                  p->fd = fdset_remove(fds, fd);
>                          }
>                  }
> +        } else if (streq(key, "tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.tmp_dir = t;
> +        } else if (streq(key, "var-tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
>  
> +                s->exec_context.var_tmp_dir = t;
> +        } else if (streq(key, "inaccessible-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.inaccessible_dir = t;
>          } else
> -                log_debug_unit(UNIT(s)->id,
> -                               "Unknown serialization key '%s'", key);
> +                log_debug_unit(UNIT(s)->id, "Unknown serialization key '%s'", key);
>  
>          return 0;
>  }
> diff --git a/src/core/swap.c b/src/core/swap.c
> index 61ce831..778dd07 100644
> --- a/src/core/swap.c
> +++ b/src/core/swap.c
> @@ -125,7 +125,7 @@ static void swap_done(Unit *u) {
>          free(s->parameters_fragment.what);
>          s->parameters_fragment.what = NULL;
>  
> -        exec_context_done(&s->exec_context);
> +        exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager));
>          exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
>          s->control_command = NULL;
>  
> @@ -632,6 +632,7 @@ static void swap_enter_dead(Swap *s, SwapResult f) {
>          if (f != SWAP_SUCCESS)
>                  s->result = f;
>  
> +        exec_context_tmp_dirs_cleanup(&s->exec_context);
>          swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
>  }
>  
> @@ -831,6 +832,8 @@ static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
>          if (s->control_command_id >= 0)
>                  unit_serialize_item(u, f, "control-command", swap_exec_command_to_string(s->control_command_id));
>  
> +        exec_context_tmp_dirs_serialize(&s->exec_context, UNIT(s), f);
> +
>          return 0;
>  }
>  
> @@ -874,7 +877,30 @@ static int swap_deserialize_item(Unit *u, const char *key, const char *value, FD
>                          s->control_command_id = id;
>                          s->control_command = s->exec_command + id;
>                  }
> +        } else if (streq(key, "tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.tmp_dir = t;
> +        } else if (streq(key, "var-tmp-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
> +
> +                s->exec_context.var_tmp_dir = t;
> +        } else if (streq(key, "inaccessible-dir")) {
> +                char *t;
> +
> +                t = strdup(value);
> +                if (!t)
> +                        return log_oom();
>  
> +                s->exec_context.inaccessible_dir = t;
>          } else
>                  log_debug_unit(u->id, "Unknown serialization key '%s'", key);
>  
> diff --git a/src/test/test-ns.c b/src/test/test-ns.c
> index b1c759f..7a74bb5 100644
> --- a/src/test/test-ns.c
> +++ b/src/test/test-ns.c
> @@ -26,9 +26,12 @@
>  #include <linux/fs.h>
>  
>  #include "namespace.h"
> +#include "execute.h"
>  #include "log.h"
>  
>  int main(int argc, char *argv[]) {
> +        ExecContext context;
> +
>          const char * const writable[] = {
>                  "/home",
>                  NULL
> @@ -47,8 +50,21 @@ int main(int argc, char *argv[]) {
>          };
>  
>          int r;
> +        char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
> +             var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
> +
> +        mkdtemp(tmp_dir);
> +        mkdtemp(var_tmp_dir);
> +
> +        context.read_write_dirs = (char **) writable;
> +        context.read_only_dirs = (char **) readonly;
> +        context.inaccessible_dirs = (char **) inaccessible;
> +        context.tmp_dir = tmp_dir;
> +        context.var_tmp_dir = var_tmp_dir;
> +        context.private_tmp = true;
> +        context.mount_flags = 0;
>  
> -        r = setup_namespace((char**) writable, (char**) readonly, (char**) inaccessible, true, 0);
> +        r = setup_namespace(&context);
>          if (r < 0) {
>                  log_error("Failed to setup namespace: %s", strerror(-r));
>                  return 1;


Lennart

-- 
Lennart Poettering - Red Hat, Inc.


More information about the systemd-devel mailing list