[systemd-commits] 6 commits - src/nspawn src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Mon Jun 30 07:24:12 PDT 2014


 src/nspawn/nspawn.c |  166 +++++++++++++++++++++++++++++++---------------------
 src/shared/util.c   |   11 +++
 2 files changed, 112 insertions(+), 65 deletions(-)

New commits:
commit 28650077f36466d9c5ee27ef2006fae3171a2430
Author: Lennart Poettering <lennart at poettering.net>
Date:   Mon Jun 30 16:22:12 2014 +0200

    nspawn: block open_by_handle_at() and others via seccomp
    
    Let's protect ourselves against the recently reported docker security
    issue. Our man page makes clear that we do not make any security
    promises anyway, but well, this one is easy to mitigate, so let's do it.
    While we are at it block a couple of more syscalls that are no good in
    containers, too.

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index fd61d07..656c1bf 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -1864,22 +1864,25 @@ static int setup_macvlan(pid_t pid) {
         return 0;
 }
 
-static int audit_still_doesnt_work_in_containers(void) {
+static int setup_seccomp(void) {
 
 #ifdef HAVE_SECCOMP
+        static const int blacklist[] = {
+                SCMP_SYS(kexec_load),
+                SCMP_SYS(open_by_handle_at),
+                SCMP_SYS(init_module),
+                SCMP_SYS(finit_module),
+                SCMP_SYS(delete_module),
+                SCMP_SYS(iopl),
+                SCMP_SYS(ioperm),
+                SCMP_SYS(swapon),
+                SCMP_SYS(swapoff),
+        };
+
         scmp_filter_ctx seccomp;
+        unsigned i;
         int r;
 
-        /*
-           Audit is broken in containers, much of the userspace audit
-           hookup will fail if running inside a container. We don't
-           care and just turn off creation of audit sockets.
-
-           This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
-           with EAFNOSUPPORT which audit userspace uses as indication
-           that audit is disabled in the kernel.
-         */
-
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return log_oom();
@@ -1890,6 +1893,26 @@ static int audit_still_doesnt_work_in_containers(void) {
                 goto finish;
         }
 
+        for (i = 0; i < ELEMENTSOF(blacklist); i++) {
+                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
+                if (r == -EFAULT)
+                        continue; /* unknown syscall */
+                if (r < 0) {
+                        log_error("Failed to block syscall: %s", strerror(-r));
+                        goto finish;
+                }
+        }
+
+        /*
+           Audit is broken in containers, much of the userspace audit
+           hookup will fail if running inside a container. We don't
+           care and just turn off creation of audit sockets.
+
+           This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
+           with EAFNOSUPPORT which audit userspace uses as indication
+           that audit is disabled in the kernel.
+         */
+
         r = seccomp_rule_add(
                         seccomp,
                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
@@ -3050,7 +3073,7 @@ int main(int argc, char *argv[]) {
 
                         dev_setup(arg_directory);
 
-                        if (audit_still_doesnt_work_in_containers() < 0)
+                        if (setup_seccomp() < 0)
                                 goto child_fail;
 
                         if (setup_dev_console(arg_directory, console) < 0)

commit 840295fc1e30bb8902e8df08127bbc281318b537
Author: Lennart Poettering <lennart at poettering.net>
Date:   Mon Jun 30 15:20:59 2014 +0200

    nspawn: let's avoid using goto to wildly for non-cleanup purposes

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a00a220..fd61d07 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -3247,63 +3247,61 @@ int main(int argc, char *argv[]) {
                  * join its cgroup which might limit what it can do */
                 r = eventfd_child_succeeded(eventfds[1]);
                 eventfds[1] = safe_close(eventfds[1]);
-                if (r < 0)
-                        goto check_container_status;
 
-                r = register_machine(pid);
-                if (r < 0)
-                        goto finish;
+                if (r >= 0) {
+                        r = register_machine(pid);
+                        if (r < 0)
+                                goto finish;
 
-                r = move_network_interfaces(pid);
-                if (r < 0)
-                        goto finish;
+                        r = move_network_interfaces(pid);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_veth(pid, veth_name);
-                if (r < 0)
-                        goto finish;
+                        r = setup_veth(pid, veth_name);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_bridge(veth_name);
-                if (r < 0)
-                        goto finish;
+                        r = setup_bridge(veth_name);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_macvlan(pid);
-                if (r < 0)
-                        goto finish;
+                        r = setup_macvlan(pid);
+                        if (r < 0)
+                                goto finish;
 
-                /* Block SIGCHLD here, before notifying child.
-                 * process_pty() will handle it with the other signals. */
-                r = sigprocmask(SIG_BLOCK, &mask_chld, NULL);
-                if (r < 0)
-                        goto finish;
+                        /* Block SIGCHLD here, before notifying child.
+                         * process_pty() will handle it with the other signals. */
+                        r = sigprocmask(SIG_BLOCK, &mask_chld, NULL);
+                        if (r < 0)
+                                goto finish;
 
-                /* Reset signal to default */
-                r = default_signals(SIGCHLD, -1);
-                if (r < 0)
-                        goto finish;
+                        /* Reset signal to default */
+                        r = default_signals(SIGCHLD, -1);
+                        if (r < 0)
+                                goto finish;
 
-                /* Notify the child that the parent is ready with all
-                 * its setup, and that the child can now hand over
-                 * control to the code to run inside the container. */
-                r = eventfd_send_state(eventfds[0],
-                                       EVENTFD_PARENT_SUCCEEDED);
-                eventfds[0] = safe_close(eventfds[0]);
-                if (r < 0)
-                        goto finish;
+                        /* Notify the child that the parent is ready with all
+                         * its setup, and that the child can now hand over
+                         * control to the code to run inside the container. */
+                        r = eventfd_send_state(eventfds[0], EVENTFD_PARENT_SUCCEEDED);
+                        eventfds[0] = safe_close(eventfds[0]);
+                        if (r < 0)
+                                goto finish;
 
-                k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
-                if (k < 0) {
-                        r = EXIT_FAILURE;
-                        break;
-                }
+                        k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
+                        if (k < 0) {
+                                r = EXIT_FAILURE;
+                                break;
+                        }
 
-                if (!arg_quiet)
-                        putc('\n', stdout);
+                        if (!arg_quiet)
+                                putc('\n', stdout);
 
-                /* Kill if it is not dead yet anyway */
-                terminate_machine(pid);
+                        /* Kill if it is not dead yet anyway */
+                        terminate_machine(pid);
+                }
 
-check_container_status:
-                /* Redundant, but better safe than sorry */
+                /* Normally redundant, but better safe than sorry */
                 kill(pid, SIGKILL);
 
                 r = wait_for_container(pid, &container_status);

commit ce9f1527b685402974e15c30b2caf3c1fe3ceb81
Author: Lennart Poettering <lennart at poettering.net>
Date:   Mon Jun 30 15:19:00 2014 +0200

    nspawn: simplify exit condition check

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 8fb72d6..a00a220 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -3309,14 +3309,15 @@ check_container_status:
                 r = wait_for_container(pid, &container_status);
                 pid = 0;
 
-                if (r != 0) {
-                        /* If r < 0, explicitly set to EXIT_FAILURE,
-                         * otherwise return the exit code of the
-                         * containered process. */
-                        if (r < 0)
-                                r = EXIT_FAILURE;
+                if (r < 0) {
+                        /* We failed to wait for the container, or the
+                         * container exited abnormally */
+                        r = EXIT_FAILURE;
                         break;
-                } else if (container_status == CONTAINER_TERMINATED)
+                } else if (r > 0 || container_status == CONTAINER_TERMINATED)
+                        /* The container exited with a non-zero
+                         * status, or with zero status and no reboot
+                         * was requested. */
                         break;
 
                 /* CONTAINER_REBOOTED, loop again */

commit 8baaf7a3d8c42970c5215f4dcf84393b84b07e78
Author: Luke Shumaker <LukeShu at sbcglobal.net>
Date:   Sun Jun 29 20:18:03 2014 -0400

    nspawn: log a warning on failure from wait_for_terminate()
    
    This is at the suggestion of Djalal Harouni on the mailing list, and
    reflects the behavior of shared/util.c:wait_for_terminate_and_warn().

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index be0e6b5..8fb72d6 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2666,8 +2666,10 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
         siginfo_t status;
 
         r = wait_for_terminate(pid, &status);
-        if (r < 0)
+        if (r < 0) {
+                log_warning("Failed to wait for container: %s", strerror(-r));
                 return r;
+        }
 
         switch (status.si_code) {
         case CLD_EXITED:

commit 6d416b9cc8ce39e5f97737b749d4bb1fb4f86df0
Author: Luke Shumaker <LukeShu at sbcglobal.net>
Date:   Sun Jun 29 20:18:02 2014 -0400

    nspawn: Fix regression with exit status
    
    Commit 113cea8 introduced a bug that caused the exit code of systemd-nspawn
    to not reflect the exit code of the program executed in the container.

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0a8dc0c..be0e6b5 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2645,12 +2645,21 @@ static int change_uid_gid(char **_home) {
 }
 
 /*
- * Return 0 in case the container is being rebooted, has been shut
- * down or exited successfully. On failures a negative value is
- * returned.
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ *       container, the container was terminated by a signal, or
+ *       failed for an unknown reason.  No change is made to the
+ *       container argument.
+ * > 0 : The program executed in the container terminated with an
+ *       error.  The exit code of the program executed in the
+ *       container is returned.  No change is made to the container
+ *       argument.
+ *   0 : The container is being rebooted, has been shut down or exited
+ *       successfully.  The container argument has been set to either
+ *       CONTAINER_TERMINATED or CONTAINER_REBOOTED.
  *
- * The status of the container "CONTAINER_TERMINATED" or
- * "CONTAINER_REBOOTED" will be saved in the container argument
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
  */
 static int wait_for_container(pid_t pid, ContainerStatus *container) {
         int r;
@@ -2672,7 +2681,6 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
                 } else {
                         log_error("Container %s failed with error code %i.",
                                   arg_machine, status.si_status);
-                        r = -1;
                 }
                 break;
 
@@ -3299,8 +3307,12 @@ check_container_status:
                 r = wait_for_container(pid, &container_status);
                 pid = 0;
 
-                if (r < 0) {
-                        r = EXIT_FAILURE;
+                if (r != 0) {
+                        /* If r < 0, explicitly set to EXIT_FAILURE,
+                         * otherwise return the exit code of the
+                         * containered process. */
+                        if (r < 0)
+                                r = EXIT_FAILURE;
                         break;
                 } else if (container_status == CONTAINER_TERMINATED)
                         break;

commit 0659e8baf25c86cadac8cac79f4e800501694c8b
Author: Luke Shumaker <LukeShu at sbcglobal.net>
Date:   Sun Jun 29 20:18:01 2014 -0400

    shared/util.c:wait_for_terminate_and_warn(): Add a comment on the return values

diff --git a/src/shared/util.c b/src/shared/util.c
index af6bde2..e75f6c9 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -3482,6 +3482,17 @@ int wait_for_terminate(pid_t pid, siginfo_t *status) {
         }
 }
 
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ *       process, the process was terminated by a signal, or
+ *       failed for an unknown reason.
+ * >=0 : The process terminated normally, and its exit code is
+ *       returned.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ */
 int wait_for_terminate_and_warn(const char *name, pid_t pid) {
         int r;
         siginfo_t status;



More information about the systemd-commits mailing list