[systemd-commits] 6 commits - src/core src/libsystemd-bus src/machine src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Fri Dec 13 20:10:35 PST 2013


 src/core/load-fragment.c           |   13 +--
 src/libsystemd-bus/bus-container.c |  149 ++++++++++++++++++++++++++-----------
 src/libsystemd-bus/bus-container.h |    3 
 src/libsystemd-bus/sd-bus.c        |   69 +++++++++++++++--
 src/libsystemd-bus/sd-event.c      |   34 ++++++--
 src/machine/machinectl.c           |   41 ++--------
 src/shared/logs-show.c             |   39 +++------
 src/shared/util.c                  |   79 +++++++++++++++++++
 src/shared/util.h                  |    5 +
 9 files changed, 312 insertions(+), 120 deletions(-)

New commits:
commit 8d35dae708ffbb88f1b023964913d3d53d4a7418
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Dec 14 05:08:47 2013 +0100

    event: some snake-oil to speed up impossible error checks

diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c
index bf8b0fc..06c84d7 100644
--- a/src/libsystemd-bus/sd-event.c
+++ b/src/libsystemd-bus/sd-event.c
@@ -1605,7 +1605,7 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
                 return -errno;
         }
 
-        if (ss != sizeof(x))
+        if (_unlikely_(ss != sizeof(x)))
                 return -EIO;
 
         if (next)
@@ -1733,7 +1733,7 @@ static int process_signal(sd_event *e, uint32_t events) {
                         return -errno;
                 }
 
-                if (ss != sizeof(si))
+                if (_unlikely_(ss != sizeof(si)))
                         return -EIO;
 
                 read_one = true;

commit 9a800b5622b0feec879c3f5c81f7af5a8640e57c
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Dec 14 05:08:15 2013 +0100

    event: instead of reset the revents field when we get new revents data from epoll, OR it in

diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c
index 9481395..bf8b0fc 100644
--- a/src/libsystemd-bus/sd-event.c
+++ b/src/libsystemd-bus/sd-event.c
@@ -1569,12 +1569,21 @@ static int event_arm_timer(
         return 0;
 }
 
-static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
+static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
         assert(e);
         assert(s);
         assert(s->type == SOURCE_IO);
 
-        s->io.revents = events;
+        /* If the event source was already pending, we just OR in the
+         * new revents, otherwise we reset the value. The ORing is
+         * necessary to handle EPOLLONESHOT events properly where
+         * readability might happen independently of writability, and
+         * we need to keep track of both */
+
+        if (s->pending)
+                s->io.revents |= revents;
+        else
+                s->io.revents = revents;
 
         return source_set_pending(s, true);
 }

commit 15b38f936e257d14854f38bf39357b0b2a5c1e66
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Dec 14 05:07:13 2013 +0100

    event: dynamically adjust size of events array instead of pre-allocating it possibly too large or too small

diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c
index 946ec0f..9481395 100644
--- a/src/libsystemd-bus/sd-event.c
+++ b/src/libsystemd-bus/sd-event.c
@@ -34,7 +34,7 @@
 
 #include "sd-event.h"
 
-#define EPOLL_QUEUE_MAX 64
+#define EPOLL_QUEUE_MAX 512U
 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
 
 typedef enum EventSourceType {
@@ -151,6 +151,8 @@ struct sd_event {
         sd_event **default_event_ptr;
 
         usec_t watchdog_last, watchdog_period;
+
+        unsigned n_sources;
 };
 
 static int pending_prioq_compare(const void *a, const void *b) {
@@ -316,6 +318,7 @@ static int exit_prioq_compare(const void *a, const void *b) {
 
 static void event_free(sd_event *e) {
         assert(e);
+        assert(e->n_sources == 0);
 
         if (e->default_event_ptr)
                 *(e->default_event_ptr) = NULL;
@@ -470,6 +473,8 @@ static void source_free(sd_event_source *s) {
         assert(s);
 
         if (s->event) {
+                assert(s->event->n_sources > 0);
+
                 switch (s->type) {
 
                 case SOURCE_IO:
@@ -532,6 +537,7 @@ static void source_free(sd_event_source *s) {
                 if (s->prepare)
                         prioq_remove(s->event->prepare, s, &s->prepare_index);
 
+                s->event->n_sources--;
                 sd_event_unref(s->event);
         }
 
@@ -585,6 +591,8 @@ static sd_event_source *source_new(sd_event *e, EventSourceType type) {
         s->type = type;
         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
 
+        e->n_sources ++;
+
         return s;
 }
 
@@ -1932,7 +1940,8 @@ static int process_watchdog(sd_event *e) {
 }
 
 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
-        struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
+        struct epoll_event *ev_queue;
+        unsigned ev_queue_max;
         sd_event_source *p;
         int r, i, m;
 
@@ -1962,8 +1971,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
 
         if (event_next_pending(e) || e->need_process_child)
                 timeout = 0;
+        ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
+        ev_queue = newa(struct epoll_event, ev_queue_max);
 
-        m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
+        m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
         if (m < 0) {
                 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;

commit e04b0cdb9001edec916216e1fd7b6e814bc46fc3
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Dec 14 05:06:40 2013 +0100

    util: port last code over to new namespace utility calls

diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
index 0e3fd3d..b24bce5 100644
--- a/src/shared/logs-show.c
+++ b/src/shared/logs-show.c
@@ -1116,10 +1116,8 @@ int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) {
 }
 
 static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
-        _cleanup_free_ char *leader = NULL, *class = NULL;
-        _cleanup_close_pipe_ int sock[2] = { -1, -1 };
-        _cleanup_close_ int nsfd = -1;
-        const char *p, *ns;
+        _cleanup_close_pipe_ int pair[2] = { -1, -1 };
+        _cleanup_close_ int nsfd = -1, rootfd = -1;
         pid_t pid, child;
         siginfo_t si;
         char buf[37];
@@ -1132,26 +1130,15 @@ static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
         if (!filename_is_safe(machine))
                 return -EINVAL;
 
-        p = strappenda("/run/systemd/machines/", machine);
-
-        r = parse_env_file(p, NEWLINE, "LEADER", &leader, "CLASS", &class, NULL);
+        r = container_get_leader(machine, &pid);
         if (r < 0)
                 return r;
-        if (!leader)
-                return -ENODATA;
-        if (!streq_ptr(class, "container"))
-                return -EIO;
-        r = parse_pid(leader, &pid);
+
+        r = namespace_open(pid, &nsfd, &rootfd);
         if (r < 0)
                 return r;
 
-        ns = procfs_file_alloca(pid, "ns/mnt");
-
-        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-        if (nsfd < 0)
-                return -errno;
-
-        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
+        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
                 return -errno;
 
         child = fork();
@@ -1161,10 +1148,10 @@ static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
         if (child == 0) {
                 int fd;
 
-                close_nointr_nofail(sock[0]);
-                sock[0] = -1;
+                close_nointr_nofail(pair[0]);
+                pair[0] = -1;
 
-                r = setns(nsfd, CLONE_NEWNS);
+                r = namespace_enter(nsfd, rootfd);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
 
@@ -1177,17 +1164,17 @@ static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
                 if (k != 36)
                         _exit(EXIT_FAILURE);
 
-                k = send(sock[1], buf, 36, MSG_NOSIGNAL);
+                k = send(pair[1], buf, 36, MSG_NOSIGNAL);
                 if (k != 36)
                         _exit(EXIT_FAILURE);
 
                 _exit(EXIT_SUCCESS);
         }
 
-        close_nointr_nofail(sock[1]);
-        sock[1] = -1;
+        close_nointr_nofail(pair[1]);
+        pair[1] = -1;
 
-        k = recv(sock[0], buf, 36, 0);
+        k = recv(pair[0], buf, 36, 0);
         if (k != 36)
                 return -EIO;
 

commit 5e2b3214aa6e9bb3559552d2218ce2eda312c1fc
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Dec 14 05:04:49 2013 +0100

    util: when joining a namespace make sure to reset all uids to 0 after
    the transition

diff --git a/src/shared/util.c b/src/shared/util.c
index 66276aa..b5ffaa1 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -6022,5 +6022,11 @@ int namespace_enter(int namespace_fd, int root_fd) {
         if (chroot(".") < 0)
                 return -errno;
 
+        if (setresgid(0, 0, 0) < 0)
+                return -errno;
+
+        if (setresuid(0, 0, 0) < 0)
+                return -errno;
+
         return 0;
 }

commit bc9fd78c7bfc39881e19457e476393635f8b0442
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 13 22:02:47 2013 +0100

    bus: when connecting to a container's kdbus instance, enter namespace first
    
    Previously we'd open the connection in the originating namespace, which
    meant most peers of the bus would not be able to make sense of the
    PID/UID/... identity of us since we didn't exist in the namespace they
    run in. However they require this identity for privilege decisions,
    hence disallowing access to anything from the host.
    
    Instead, when connecting to a container, create a temporary subprocess,
    make it join the container's namespace and then connect from there to
    the kdbus instance. This is similar to how we do it for socket
    conections already.
    
    THis also unifies the namespacing code used by machinectl and the bus
    APIs.

diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 99cf20d..183c43d 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2680,31 +2680,28 @@ int unit_load_fragment(Unit *u) {
 
         /* Look for a template */
         if (u->load_state == UNIT_STUB && u->instance) {
-                char *k;
+                _cleanup_free_ char *k;
 
                 k = unit_name_template(u->id);
                 if (!k)
                         return -ENOMEM;
 
                 r = load_from_path(u, k);
-                free(k);
-
                 if (r < 0)
                         return r;
 
                 if (u->load_state == UNIT_STUB)
                         SET_FOREACH(t, u->names, i) {
+                                _cleanup_free_ char *z = NULL;
 
                                 if (t == u->id)
                                         continue;
 
-                                k = unit_name_template(t);
-                                if (!k)
+                                z = unit_name_template(t);
+                                if (!z)
                                         return -ENOMEM;
 
-                                r = load_from_path(u, k);
-                                free(k);
-
+                                r = load_from_path(u, z);
                                 if (r < 0)
                                         return r;
 
diff --git a/src/libsystemd-bus/bus-container.c b/src/libsystemd-bus/bus-container.c
index 33478c0..5d31f5a 100644
--- a/src/libsystemd-bus/bus-container.c
+++ b/src/libsystemd-bus/bus-container.c
@@ -28,51 +28,23 @@
 #include "bus-socket.h"
 #include "bus-container.h"
 
-int bus_container_connect(sd_bus *b) {
-        _cleanup_free_ char *s = NULL, *ns = NULL, *root = NULL, *class = NULL;
+int bus_container_connect_socket(sd_bus *b) {
         _cleanup_close_ int nsfd = -1, rootfd = -1;
-        char *p;
-        siginfo_t si;
         pid_t leader, child;
+        siginfo_t si;
         int r;
 
         assert(b);
         assert(b->input_fd < 0);
         assert(b->output_fd < 0);
 
-        p = strappenda("/run/systemd/machines/", b->machine);
-        r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
-        if (r == -ENOENT)
-                return -EHOSTDOWN;
+        r = container_get_leader(b->machine, &leader);
         if (r < 0)
                 return r;
-        if (!s)
-                return -EIO;
-
-        if (!streq_ptr(class, "container"))
-                return -EIO;
 
-        r = parse_pid(s, &leader);
+        r = namespace_open(leader, &nsfd, &rootfd);
         if (r < 0)
                 return r;
-        if (leader <= 1)
-                return -EIO;
-
-        r = asprintf(&ns, "/proc/%lu/ns/mnt", (unsigned long) leader);
-        if (r < 0)
-                return -ENOMEM;
-
-        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-        if (nsfd < 0)
-                return -errno;
-
-        r = asprintf(&root, "/proc/%lu/root", (unsigned long) leader);
-        if (r < 0)
-                return -ENOMEM;
-
-        rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
-        if (rootfd < 0)
-                return -errno;
 
         b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
         if (b->input_fd < 0)
@@ -89,14 +61,9 @@ int bus_container_connect(sd_bus *b) {
                 return -errno;
 
         if (child == 0) {
-                r = setns(nsfd, CLONE_NEWNS);
-                if (r < 0)
-                        _exit(255);
-
-                if (fchdir(rootfd) < 0)
-                        _exit(255);
 
-                if (chroot(".") < 0)
+                r = namespace_enter(nsfd, rootfd);
+                if (r < 0)
                         _exit(255);
 
                 r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
@@ -107,7 +74,7 @@ int bus_container_connect(sd_bus *b) {
                         _exit(255);
                 }
 
-                _exit(0);
+                _exit(EXIT_SUCCESS);
         }
 
         r = wait_for_terminate(child, &si);
@@ -120,8 +87,108 @@ int bus_container_connect(sd_bus *b) {
         if (si.si_status == 1)
                 return 1;
 
-        if (si.si_status != 0)
+        if (si.si_status != EXIT_SUCCESS)
                 return -EIO;
 
         return bus_socket_start_auth(b);
 }
+
+int bus_container_connect_kernel(sd_bus *b) {
+        _cleanup_close_pipe_ int pair[2] = { -1, -1 };
+        _cleanup_close_ int nsfd = -1, rootfd = -1;
+        union {
+                struct cmsghdr cmsghdr;
+                uint8_t buf[CMSG_SPACE(sizeof(int))];
+        } control = {};
+        struct msghdr mh = {
+                .msg_control = &control,
+                .msg_controllen = sizeof(control),
+        };
+        struct cmsghdr *cmsg;
+        pid_t leader, child;
+        siginfo_t si;
+        int r;
+        _cleanup_close_ int fd = -1;
+
+        assert(b);
+        assert(b->input_fd < 0);
+        assert(b->output_fd < 0);
+
+        r = container_get_leader(b->machine, &leader);
+        if (r < 0)
+                return r;
+
+        r = namespace_open(leader, &nsfd, &rootfd);
+        if (r < 0)
+                return r;
+
+        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+                return -errno;
+
+        child = fork();
+        if (child < 0)
+                return -errno;
+
+        if (child == 0) {
+                close_nointr_nofail(pair[0]);
+                pair[0] = -1;
+
+                r = namespace_enter(nsfd, rootfd);
+                if (r < 0)
+                        _exit(EXIT_FAILURE);
+
+                fd = open(b->kernel, O_RDWR|O_NOCTTY|O_CLOEXEC);
+                if (fd < 0)
+                        _exit(EXIT_FAILURE);
+
+                cmsg = CMSG_FIRSTHDR(&mh);
+                cmsg->cmsg_level = SOL_SOCKET;
+                cmsg->cmsg_type = SCM_RIGHTS;
+                cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+                memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+                mh.msg_controllen = cmsg->cmsg_len;
+
+                if (sendmsg(pair[1], &mh, MSG_NOSIGNAL) < 0)
+                        _exit(EXIT_FAILURE);
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        close_nointr_nofail(pair[1]);
+        pair[1] = -1;
+
+        if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
+                return -errno;
+
+        for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg))
+                if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+                        int *fds;
+                        unsigned n_fds;
+
+                        fds = (int*) CMSG_DATA(cmsg);
+                        n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+                        if (n_fds != 1) {
+                                close_many(fds, n_fds);
+                                return -EIO;
+                        }
+
+                        fd = fds[0];
+                }
+
+        r = wait_for_terminate(child, &si);
+        if (r < 0)
+                return r;
+
+        if (si.si_code != CLD_EXITED)
+                return -EIO;
+
+        if (si.si_status != EXIT_SUCCESS)
+                return -EIO;
+
+        b->input_fd = b->output_fd = fd;
+        fd = -1;
+
+        return bus_kernel_take_fd(b);
+}
diff --git a/src/libsystemd-bus/bus-container.h b/src/libsystemd-bus/bus-container.h
index 65f43ab..c6f757a 100644
--- a/src/libsystemd-bus/bus-container.h
+++ b/src/libsystemd-bus/bus-container.h
@@ -23,4 +23,5 @@
 
 #include "sd-bus.h"
 
-int bus_container_connect(sd_bus *b);
+int bus_container_connect_socket(sd_bus *b);
+int bus_container_connect_kernel(sd_bus *b);
diff --git a/src/libsystemd-bus/sd-bus.c b/src/libsystemd-bus/sd-bus.c
index 932bf22..4eaceef 100644
--- a/src/libsystemd-bus/sd-bus.c
+++ b/src/libsystemd-bus/sd-bus.c
@@ -740,7 +740,7 @@ static int parse_kernel_address(sd_bus *b, const char **p, char **guid) {
         return 0;
 }
 
-static int parse_container_address(sd_bus *b, const char **p, char **guid) {
+static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) {
         _cleanup_free_ char *machine = NULL;
         int r;
 
@@ -782,6 +782,49 @@ static int parse_container_address(sd_bus *b, const char **p, char **guid) {
         return 0;
 }
 
+static int parse_container_kernel_address(sd_bus *b, const char **p, char **guid) {
+        _cleanup_free_ char *machine = NULL;
+        int r;
+
+        assert(b);
+        assert(p);
+        assert(*p);
+        assert(guid);
+
+        while (**p != 0 && **p != ';') {
+                r = parse_address_key(p, "guid", guid);
+                if (r < 0)
+                        return r;
+                else if (r > 0)
+                        continue;
+
+                r = parse_address_key(p, "machine", &machine);
+                if (r < 0)
+                        return r;
+                else if (r > 0)
+                        continue;
+
+                skip_address_key(p);
+        }
+
+        if (!machine)
+                return -EINVAL;
+
+        if (!filename_is_safe(machine))
+                return -EINVAL;
+
+        free(b->machine);
+        b->machine = machine;
+        machine = NULL;
+
+        free(b->kernel);
+        b->kernel = strdup("/dev/kdbus/0-system/bus");
+        if (!b->kernel)
+                return -ENOMEM;
+
+        return 0;
+}
+
 static void bus_reset_parsed_address(sd_bus *b) {
         assert(b);
 
@@ -855,10 +898,18 @@ static int bus_parse_next_address(sd_bus *b) {
                                 return r;
 
                         break;
-                } else if (startswith(a, "x-container:")) {
+                } else if (startswith(a, "x-container-unix:")) {
+
+                        a += 17;
+                        r = parse_container_unix_address(b, &a, &guid);
+                        if (r < 0)
+                                return r;
+
+                        break;
+                } else if (startswith(a, "x-container-kernel:")) {
 
-                        a += 12;
-                        r = parse_container_address(b, &a, &guid);
+                        a += 19;
+                        r = parse_container_kernel_address(b, &a, &guid);
                         if (r < 0)
                                 return r;
 
@@ -892,10 +943,12 @@ static int bus_start_address(sd_bus *b) {
 
                 if (b->exec_path)
                         r = bus_socket_exec(b);
+                else if (b->machine && b->kernel)
+                        r = bus_container_connect_kernel(b);
+                else if (b->machine && b->sockaddr.sa.sa_family != AF_UNSPEC)
+                        r = bus_container_connect_socket(b);
                 else if (b->kernel)
                         r = bus_kernel_connect(b);
-                else if (b->machine)
-                        r = bus_container_connect(b);
                 else if (b->sockaddr.sa.sa_family != AF_UNSPEC)
                         r = bus_socket_connect(b);
                 else
@@ -1144,9 +1197,9 @@ _public_ int sd_bus_open_system_container(const char *machine, sd_bus **ret) {
                 return -ENOMEM;
 
 #ifdef ENABLE_KDBUS
-        p = strjoin("kernel:path=/dev/kdbus/ns/machine-", e, "/0-system/bus;x-container:machine=", e, NULL);
+        p = strjoin("x-container-kernel:machine=", e, ";x-container-unix:machine=", e, NULL);
 #else
-        p = strjoin("x-container:machine=", e, NULL);
+        p = strjoin("x-container-unix:machine=", e, NULL);
 #endif
         if (!p)
                 return -ENOMEM;
diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c
index 7bb7086..f5485b3 100644
--- a/src/machine/machinectl.c
+++ b/src/machine/machinectl.c
@@ -398,8 +398,8 @@ static int terminate_machine(sd_bus *bus, char **args, unsigned n) {
 }
 
 static int openpt_in_namespace(pid_t pid, int flags) {
+        _cleanup_close_pipe_ int pair[2] = { -1, -1 };
         _cleanup_close_ int nsfd = -1, rootfd = -1;
-        _cleanup_close_pipe_ int sock[2] = { -1, -1 };
         union {
                 struct cmsghdr cmsghdr;
                 uint8_t buf[CMSG_SPACE(sizeof(int))];
@@ -410,23 +410,14 @@ static int openpt_in_namespace(pid_t pid, int flags) {
         };
         struct cmsghdr *cmsg;
         int master = -1, r;
-        char *ns, *root;
         pid_t child;
         siginfo_t si;
 
-        ns = procfs_file_alloca(pid, "ns/mnt");
-
-        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-        if (nsfd < 0)
-                return -errno;
-
-        root = procfs_file_alloca(pid, "root");
-
-        rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
-        if (rootfd < 0)
-                return -errno;
+        r = namespace_open(pid, &nsfd, &rootfd);
+        if (r < 0)
+                return r;
 
-        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
+        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
                 return -errno;
 
         child = fork();
@@ -434,19 +425,13 @@ static int openpt_in_namespace(pid_t pid, int flags) {
                 return -errno;
 
         if (child == 0) {
-                close_nointr_nofail(sock[0]);
-                sock[0] = -1;
+                close_nointr_nofail(pair[0]);
+                pair[0] = -1;
 
-                r = setns(nsfd, CLONE_NEWNS);
+                r = namespace_enter(nsfd, rootfd);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
 
-                if (fchdir(rootfd) < 0)
-                        _exit(EXIT_FAILURE);
-
-                if (chroot(".") < 0)
-                        _exit(EXIT_FAILURE);
-
                 master = posix_openpt(flags);
                 if (master < 0)
                         _exit(EXIT_FAILURE);
@@ -459,18 +444,16 @@ static int openpt_in_namespace(pid_t pid, int flags) {
 
                 mh.msg_controllen = cmsg->cmsg_len;
 
-                r = sendmsg(sock[1], &mh, MSG_NOSIGNAL);
-                close_nointr_nofail(master);
-                if (r < 0)
+                if (sendmsg(pair[1], &mh, MSG_NOSIGNAL) < 0)
                         _exit(EXIT_FAILURE);
 
                 _exit(EXIT_SUCCESS);
         }
 
-        close_nointr_nofail(sock[1]);
-        sock[1] = -1;
+        close_nointr_nofail(pair[1]);
+        pair[1] = -1;
 
-        if (recvmsg(sock[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
+        if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
                 return -errno;
 
         for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg))
diff --git a/src/shared/util.c b/src/shared/util.c
index 1c35edf..66276aa 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -5951,3 +5951,76 @@ int proc_cmdline(char **ret) {
 
         return 1;
 }
+
+int container_get_leader(const char *machine, pid_t *pid) {
+        _cleanup_free_ char *s = NULL, *class = NULL;
+        const char *p;
+        pid_t leader;
+        int r;
+
+        assert(machine);
+        assert(pid);
+
+        p = strappenda("/run/systemd/machines/", machine);
+        r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
+        if (r == -ENOENT)
+                return -EHOSTDOWN;
+        if (r < 0)
+                return r;
+        if (!s)
+                return -EIO;
+
+        if (!streq_ptr(class, "container"))
+                return -EIO;
+
+        r = parse_pid(s, &leader);
+        if (r < 0)
+                return r;
+        if (leader <= 1)
+                return -EIO;
+
+        *pid = leader;
+        return 0;
+}
+
+int namespace_open(pid_t pid, int *namespace_fd, int *root_fd) {
+        _cleanup_close_ int nsfd = -1;
+        const char *ns, *root;
+        int rfd;
+
+        assert(pid >= 0);
+        assert(namespace_fd);
+        assert(root_fd);
+
+        ns = procfs_file_alloca(pid, "ns/mnt");
+        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        if (nsfd < 0)
+                return -errno;
+
+        root = procfs_file_alloca(pid, "root");
+        rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (rfd < 0)
+                return -errno;
+
+        *namespace_fd = nsfd;
+        *root_fd = rfd;
+        nsfd = -1;
+
+        return 0;
+}
+
+int namespace_enter(int namespace_fd, int root_fd) {
+        assert(namespace_fd >= 0);
+        assert(root_fd >= 0);
+
+        if (setns(namespace_fd, CLONE_NEWNS) < 0)
+                return -errno;
+
+        if (fchdir(root_fd) < 0)
+                return -errno;
+
+        if (chroot(".") < 0)
+                return -errno;
+
+        return 0;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index 504f63a..d5fa81c 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -789,3 +789,8 @@ static inline void qsort_safe(void *base, size_t nmemb, size_t size,
 }
 
 int proc_cmdline(char **ret);
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int namespace_open(pid_t pid, int *namespace_fd, int *root_fd);
+int namespace_enter(int namespace_fd, int root_fd);



More information about the systemd-commits mailing list