[systemd-devel] [PATCH] core: reuse the same /tmp, /var/tmp and inaccessible dir

Michal Sekletar msekleta at redhat.com
Wed Feb 13 05:45:25 PST 2013


All Execs within the service, will get mounted the same /tmp and /var/tmp
directories, if service is configured with PrivateTmp=yes. Temporary
directories are cleaned up by service itself, rather than relying on
systemd-tmpfiles. Same logic applies also to inaccessible directories.
---
 man/systemd.exec.xml |   4 +-
 src/core/execute.c   |  43 ++++++--
 src/core/execute.h   |  13 ++-
 src/core/namespace.c | 291 +++++++++++++++++++++++++++++----------------------
 src/core/namespace.h |  26 +++--
 src/core/service.c   |  10 ++
 src/test/test-ns.c   |  18 +++-
 7 files changed, 261 insertions(+), 144 deletions(-)

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 53094e5..a33517e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1107,7 +1107,9 @@
                                 processes via
                                 <filename>/tmp</filename> or
                                 <filename>/var/tmp</filename>
-                                impossible. Defaults to
+                                impossible. All temporary data created
+                                by service will be removed after service
+                                is stopped. Defaults to
                                 false.</para></listitem>
                         </varlistentry>
 
diff --git a/src/core/execute.c b/src/core/execute.c
index aa58bc4..770f1f1 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -40,6 +40,7 @@
 #include <sys/poll.h>
 #include <linux/seccomp-bpf.h>
 #include <glob.h>
+#include <execinfo.h>
 
 #ifdef HAVE_PAM
 #include <security/pam_appl.h>
@@ -984,7 +985,7 @@ static int apply_seccomp(uint32_t *syscall_filter) {
 
 int exec_spawn(ExecCommand *command,
                char **argv,
-               const ExecContext *context,
+               ExecContext *context,
                int fds[], unsigned n_fds,
                char **environment,
                bool apply_permissions,
@@ -1052,6 +1053,10 @@ int exec_spawn(ExecCommand *command,
 
         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
 
+        r = setup_tmpdirs(context);
+        if (r < 0)
+                return r;
+
         pid = fork();
         if (pid < 0)
                 return -errno;
@@ -1313,13 +1318,8 @@ int exec_spawn(ExecCommand *command,
                 if (strv_length(context->read_write_dirs) > 0 ||
                     strv_length(context->read_only_dirs) > 0 ||
                     strv_length(context->inaccessible_dirs) > 0 ||
-                    context->mount_flags != 0 ||
-                    context->private_tmp) {
-                        err = setup_namespace(context->read_write_dirs,
-                                              context->read_only_dirs,
-                                              context->inaccessible_dirs,
-                                              context->private_tmp,
-                                              context->mount_flags);
+                    context->mount_flags != 0 ) {
+                        err = setup_namespace(context);
                         if (err < 0) {
                                 r = EXIT_NAMESPACE;
                                 goto fail_child;
@@ -1546,6 +1546,26 @@ void exec_context_init(ExecContext *c) {
         c->timer_slack_nsec = (nsec_t) -1;
 }
 
+void exec_context_tmpdirs_done(ExecContext *c) {
+        if (c->tmp_dir) {
+                rm_rf_dangerous(c->tmp_dir, false, true, false);
+                free(c->tmp_dir);
+                c->tmp_dir = NULL;
+        }
+
+        if (c->var_tmp_dir) {
+                rm_rf_dangerous(c->var_tmp_dir, false, true, false);
+                free(c->var_tmp_dir);
+                c->var_tmp_dir = NULL;
+        }
+
+        if (c->inaccessible_dir) {
+                rm_rf_dangerous(c->inaccessible_dir, false, true, false);
+                free(c->inaccessible_dir);
+                c->inaccessible_dir = NULL;
+        }
+}
+
 void exec_context_done(ExecContext *c) {
         unsigned l;
 
@@ -1610,6 +1630,11 @@ void exec_context_done(ExecContext *c) {
 
         free(c->syscall_filter);
         c->syscall_filter = NULL;
+
+        exec_context_tmpdirs_done(c);
+
+        free(c->bind_mounts);
+        c->bind_mounts = NULL;
 }
 
 void exec_command_done(ExecCommand *c) {
@@ -2158,4 +2183,4 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
         [EXEC_OUTPUT_SOCKET] = "socket"
 };
 
-DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
+DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
\ No newline at end of file
diff --git a/src/core/execute.h b/src/core/execute.h
index 2bcd2e1..1728d89 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -38,6 +38,8 @@ struct CGroupAttribute;
 
 #include "list.h"
 #include "util.h"
+#include "namespace.h"
+
 
 typedef enum ExecInput {
         EXEC_INPUT_NULL,
@@ -141,6 +143,14 @@ struct ExecContext {
         bool non_blocking;
         bool private_tmp;
         bool private_network;
+        char *tmp_dir;
+        char *var_tmp_dir;
+        char *inaccessible_dir;
+        bool need_inaccessible_initialized;
+        bool need_inaccessible;
+        BindMount *bind_mounts;
+        unsigned bind_mounts_count;
+
 
         bool no_new_privileges;
 
@@ -164,7 +174,7 @@ struct ExecContext {
 
 int exec_spawn(ExecCommand *command,
                char **argv,
-               const ExecContext *context,
+               ExecContext *context,
                int fds[], unsigned n_fds,
                char **environment,
                bool apply_permissions,
@@ -192,6 +202,7 @@ void exec_command_append_list(ExecCommand **l, ExecCommand *e);
 int exec_command_set(ExecCommand *c, const char *path, ...);
 
 void exec_context_init(ExecContext *c);
+void exec_context_tmpdirs_done(ExecContext *c);
 void exec_context_done(ExecContext *c);
 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
 void exec_context_tty_reset(const ExecContext *context);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index ba18ddc..914be01 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -36,23 +36,10 @@
 #include "path-util.h"
 #include "namespace.h"
 #include "missing.h"
+#include "execute.h"
 
-typedef enum PathMode {
-        /* This is ordered by priority! */
-        INACCESSIBLE,
-        READONLY,
-        PRIVATE_TMP,
-        PRIVATE_VAR_TMP,
-        READWRITE
-} PathMode;
-
-typedef struct Path {
-        const char *path;
-        PathMode mode;
-        bool done;
-} Path;
-
-static int append_paths(Path **p, char **strv, PathMode mode) {
+
+static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         char **i;
 
         STRV_FOREACH(i, strv) {
@@ -68,8 +55,8 @@ static int append_paths(Path **p, char **strv, PathMode mode) {
         return 0;
 }
 
-static int path_compare(const void *a, const void *b) {
-        const Path *p = a, *q = b;
+static int mount_path_compare(const void *a, const void *b) {
+        const BindMount *p = a, *q = b;
 
         if (path_equal(p->path, q->path)) {
 
@@ -93,14 +80,14 @@ static int path_compare(const void *a, const void *b) {
         return 0;
 }
 
-static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) {
-        Path *f, *t, *previous;
+static void drop_duplicates(BindMount *m, unsigned *n, bool *need_inaccessible){
+        BindMount *f, *t, *previous;
 
-        assert(p);
+        assert(m);
         assert(n);
         assert(need_inaccessible);
 
-        for (f = p, t = p, previous = NULL; f < p+*n; f++) {
+        for (f = m, t = m, previous = NULL; f < m+*n; f++) {
 
                 /* The first one wins */
                 if (previous && path_equal(f->path, previous->path))
@@ -117,11 +104,11 @@ static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) {
                 t++;
         }
 
-        *n = t - p;
+        *n = t - m;
 }
 
 static int apply_mount(
-                Path *p,
+                BindMount *m,
                 const char *tmp_dir,
                 const char *var_tmp_dir,
                 const char *inaccessible_dir) {
@@ -129,9 +116,9 @@ static int apply_mount(
         const char *what;
         int r;
 
-        assert(p);
+        assert(m);
 
-        switch (p->mode) {
+        switch (m->mode) {
 
         case INACCESSIBLE:
                 what = inaccessible_dir;
@@ -139,7 +126,7 @@ static int apply_mount(
 
         case READONLY:
         case READWRITE:
-                what = p->path;
+                what = m->path;
                 break;
 
         case PRIVATE_TMP:
@@ -156,129 +143,192 @@ static int apply_mount(
 
         assert(what);
 
-        r = mount(what, p->path, NULL, MS_BIND|MS_REC, NULL);
+        r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
         if (r >= 0)
-                log_debug("Successfully mounted %s to %s", what, p->path);
+                log_debug("Successfully mounted %s to %s", what, m->path);
 
         return r;
 }
 
-static int make_read_only(Path *p) {
+static int make_read_only(BindMount *m) {
         int r;
 
-        assert(p);
+        assert(m);
 
-        if (p->mode != INACCESSIBLE && p->mode != READONLY)
+        if (m->mode != INACCESSIBLE && m->mode != READONLY)
                 return 0;
 
-        r = mount(NULL, p->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
+        r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
         if (r < 0)
                 return -errno;
 
         return 0;
 }
 
-int setup_namespace(
-                char **writable,
-                char **readable,
-                char **inaccessible,
-                bool private_tmp,
-                unsigned long flags) {
-
-        char
-                tmp_dir[] = "/tmp/systemd-private-XXXXXX",
-                var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX",
-                inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX";
-
-        Path *paths, *p;
-        unsigned n;
-        bool need_inaccessible = false;
-        bool remove_tmp = false, remove_var_tmp = false, remove_inaccessible = false;
-        int r;
-
-        if (!flags)
-                flags = MS_SHARED;
-
-        n =
-                strv_length(writable) +
-                strv_length(readable) +
-                strv_length(inaccessible) +
-                (private_tmp ? 2 : 0);
-
-        p = paths = alloca(sizeof(Path) * n);
-        if ((r = append_paths(&p, writable, READWRITE)) < 0 ||
-            (r = append_paths(&p, readable, READONLY)) < 0 ||
-            (r = append_paths(&p, inaccessible, INACCESSIBLE)) < 0)
-                goto fail;
-
-        if (private_tmp) {
-                p->path = "/tmp";
-                p->mode = PRIVATE_TMP;
-                p++;
+int setup_tmpdirs(ExecContext *context) {
+        int r = 0;
+        bool remove_tmp = false, remove_var_tmp = false;
+        mode_t u;
+        char *d = NULL;
+        char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
+             inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX",
+             var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
+
+        assert(context);
+
+        if (context->private_tmp) {
+                if (!context->tmp_dir) {
+                        d = mktemp(tmp_dir);
+                        if (!d) {
+                                r = -errno;
+                                goto fail;
+                        }
+
+                        context->tmp_dir = strdup(d);
+                        if (!context->tmp_dir) {
+                                r = log_oom();
+                                goto fail;
+                        }
+
+                        u = umask(0000);
+                        r = mkdir(tmp_dir, 0777);
+                        umask(u);
+                        if (r < 0) {
+                                free(context->tmp_dir);
+                                context->tmp_dir = NULL;
+                                r = -errno;
+                                goto fail;
+                        }
+                        remove_tmp = true;
+
+                        if (chmod(tmp_dir, 0777 | S_ISVTX) < 0) {
+                                r = -errno;
+                                goto fail;
+                        }
+                }
 
-                p->path = "/var/tmp";
-                p->mode = PRIVATE_VAR_TMP;
-                p++;
+                if (!context->var_tmp_dir) {
+                        d = mktemp(var_tmp_dir);
+                        if (!d) {
+                                r = -errno;
+                                goto fail;
+                        }
+
+                        context->var_tmp_dir = strdup(d);
+                        if (!context->var_tmp_dir) {
+                                r = log_oom();
+                                goto fail;
+                        }
+
+                        u = umask(0000);
+                        r = mkdir(var_tmp_dir, 0777);
+                        umask(u);
+                        if (r < 0) {
+                                free(context->var_tmp_dir);
+                                context->var_tmp_dir = NULL;
+                                r = -errno;
+                                goto fail;
+                        }
+                        remove_var_tmp = true;
+
+                        if (chmod(var_tmp_dir, 0777 | S_ISVTX) < 0) {
+                                r = -errno;
+                                goto fail;
+                        }
+                }
         }
 
-        assert(paths + n == p);
-
-        qsort(paths, n, sizeof(Path), path_compare);
-        drop_duplicates(paths, &n, &need_inaccessible);
-
-        if (need_inaccessible) {
-                mode_t u;
-                char *d;
+        if (!context->need_inaccessible_initialized) {
+                BindMount *m, *mounts;
+                bool need_inaccessible = false;
+                unsigned n = strv_length(context->read_write_dirs) +
+                        strv_length(context->read_only_dirs) +
+                        strv_length(context->inaccessible_dirs) +
+                        (context->private_tmp ? 2 : 0);
+
+                m = mounts = alloca(n * sizeof(BindMount));
+                if ((r = append_mounts(&m, context->read_write_dirs, READWRITE)) < 0 ||
+                        (r = append_mounts(&m, context->read_only_dirs, READONLY)) < 0 ||
+                        (r = append_mounts(&m, context->inaccessible_dirs, INACCESSIBLE)) < 0)
+                        goto fail;
 
-                u = umask(0777);
-                d = mkdtemp(inaccessible_dir);
-                umask(u);
+                if (context->private_tmp) {
+                        m->path = "/tmp";
+                        m->mode = PRIVATE_TMP;
+                        m++;
 
-                if (!d) {
-                        r = -errno;
-                        goto fail;
+                        m->path = "/var/tmp";
+                        m->mode = PRIVATE_VAR_TMP;
+                        m++;
                 }
 
-                remove_inaccessible = true;
-        }
-
-        if (private_tmp) {
-                mode_t u;
-                char *d;
+                assert(mounts + n == m);
 
-                u = umask(0000);
-                d = mkdtemp(tmp_dir);
-                umask(u);
+                qsort(mounts, n, sizeof(BindMount), mount_path_compare);
+                drop_duplicates(mounts, &n, &need_inaccessible);
+                context->need_inaccessible = need_inaccessible;
+                context->need_inaccessible_initialized = true;
 
-                if (!d) {
-                        r = -errno;
+                context->bind_mounts = new0(BindMount, n);
+                if (!context->bind_mounts) {
+                        r = log_oom();
                         goto fail;
                 }
 
-                remove_tmp = true;
-
-                u = umask(0000);
-                d = mkdtemp(var_tmp_dir);
-                umask(u);
+                memcpy(context->bind_mounts, mounts, n * sizeof(BindMount));
+                context->bind_mounts_count = n;
+        }
 
+        if (context->need_inaccessible && !context->inaccessible_dir) {
+                d = mktemp(inaccessible_dir);
                 if (!d) {
                         r = -errno;
                         goto fail;
                 }
 
-                remove_var_tmp = true;
-
-                if (chmod(tmp_dir, 0777 + S_ISVTX) < 0) {
-                        r = -errno;
+                context->inaccessible_dir = strdup(inaccessible_dir);
+                if (!context->inaccessible_dir) {
+                        r = log_oom();
                         goto fail;
                 }
 
-                if (chmod(var_tmp_dir, 0777 + S_ISVTX) < 0) {
+                u = umask(0777);
+                r = mkdir(context->inaccessible_dir, 0000);
+                umask(u);
+                if (r < 0) {
+                        free(context->inaccessible_dir);
+                        context->inaccessible_dir = NULL;
                         r = -errno;
                         goto fail;
                 }
         }
 
+        return 0;
+fail:
+        if (remove_tmp) {
+                free(context->tmp_dir);
+                context->tmp_dir = NULL;
+                rmdir(tmp_dir);
+        }
+
+        if (remove_var_tmp) {
+                free(context->var_tmp_dir);
+                context->var_tmp_dir = NULL;
+                rmdir(var_tmp_dir);
+        }
+
+        return r;
+}
+
+int setup_namespace(ExecContext *context) {
+        BindMount *m;
+        int r = 0,
+            n = context->bind_mounts_count;
+
+
+        if (!context->mount_flags)
+                context->mount_flags = MS_SHARED;
+
         if (unshare(CLONE_NEWNS) < 0) {
                 r = -errno;
                 goto fail;
@@ -291,20 +341,20 @@ int setup_namespace(
                 goto fail;
         }
 
-        for (p = paths; p < paths + n; p++) {
-                r = apply_mount(p, tmp_dir, var_tmp_dir, inaccessible_dir);
+        for (m = context->bind_mounts; m < context->bind_mounts + n; m++) {
+                r = apply_mount(m, context->tmp_dir, context->var_tmp_dir, context->inaccessible_dir);
                 if (r < 0)
                         goto undo_mounts;
         }
 
-        for (p = paths; p < paths + n; p++) {
-                r = make_read_only(p);
+        for (m = context->bind_mounts; m < context->bind_mounts + n; m++) {
+                r = make_read_only(m);
                 if (r < 0)
                         goto undo_mounts;
         }
 
         /* Remount / as the desired mode */
-        if (mount(NULL, "/", NULL, flags|MS_REC, NULL) < 0) {
+        if (mount(NULL, "/", NULL, context->mount_flags|MS_REC, NULL) < 0) {
                 r = -errno;
                 goto undo_mounts;
         }
@@ -312,19 +362,10 @@ int setup_namespace(
         return 0;
 
 undo_mounts:
-        for (p = paths; p < paths + n; p++)
-                if (p->done)
-                        umount2(p->path, MNT_DETACH);
+        for (m = context->bind_mounts; m < context->bind_mounts + n; m++)
+                if (m->done)
+                        umount2(m->path, MNT_DETACH);
 
 fail:
-        if (remove_inaccessible)
-                rmdir(inaccessible_dir);
-
-        if (remove_tmp)
-                rmdir(tmp_dir);
-
-        if (remove_var_tmp)
-                rmdir(var_tmp_dir);
-
         return r;
 }
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 5d72ed9..f8eed3d 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -20,12 +20,24 @@
   You should have received a copy of the GNU Lesser General Public License
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
-
 #include <stdbool.h>
 
-int setup_namespace(
-                char **writable,
-                char **readable,
-                char **inaccessible,
-                bool private_tmp,
-                unsigned long flags);
+typedef struct ExecContext ExecContext;
+
+typedef enum MountMode {
+        /* This is ordered by priority! */
+        INACCESSIBLE,
+        READONLY,
+        PRIVATE_TMP,
+        PRIVATE_VAR_TMP,
+        READWRITE
+} MountMode;
+
+typedef struct BindMount {
+        const char *path;
+        MountMode mode;
+        bool done;
+} BindMount;
+
+int setup_tmpdirs(ExecContext *context);
+int setup_namespace(ExecContext *context);
diff --git a/src/core/service.c b/src/core/service.c
index 9c4bc41..e4c1b64 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1873,6 +1873,12 @@ static int cgroup_good(Service *s) {
         return !r;
 }
 
+static void service_cleanup_tmpdirs(Service *s) {
+        assert(s);
+
+        exec_context_tmpdirs_done(&s->exec_context);
+}
+
 static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
         int r;
         assert(s);
@@ -2520,6 +2526,10 @@ static int service_stop(Unit *u) {
                s->state == SERVICE_EXITED);
 
         service_enter_stop(s, SERVICE_SUCCESS);
+
+        /* we want empty tmp dirs when service is started again */
+        service_cleanup_tmpdirs(s);
+
         return 0;
 }
 
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index b1c759f..7a74bb5 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -26,9 +26,12 @@
 #include <linux/fs.h>
 
 #include "namespace.h"
+#include "execute.h"
 #include "log.h"
 
 int main(int argc, char *argv[]) {
+        ExecContext context;
+
         const char * const writable[] = {
                 "/home",
                 NULL
@@ -47,8 +50,21 @@ int main(int argc, char *argv[]) {
         };
 
         int r;
+        char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
+             var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
+
+        mkdtemp(tmp_dir);
+        mkdtemp(var_tmp_dir);
+
+        context.read_write_dirs = (char **) writable;
+        context.read_only_dirs = (char **) readonly;
+        context.inaccessible_dirs = (char **) inaccessible;
+        context.tmp_dir = tmp_dir;
+        context.var_tmp_dir = var_tmp_dir;
+        context.private_tmp = true;
+        context.mount_flags = 0;
 
-        r = setup_namespace((char**) writable, (char**) readonly, (char**) inaccessible, true, 0);
+        r = setup_namespace(&context);
         if (r < 0) {
                 log_error("Failed to setup namespace: %s", strerror(-r));
                 return 1;
-- 
1.8.1.2



More information about the systemd-devel mailing list