[systemd-commits] 4 commits - TODO man/machinectl.xml src/machine src/nspawn src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Wed Dec 17 16:36:37 PST 2014


 TODO                     |   16 +
 man/machinectl.xml       |   87 ++++++++-
 src/machine/machinectl.c |  436 ++++++++++++++++++++++++++++++++++++++++-------
 src/nspawn/nspawn.c      |   31 +++
 src/shared/btrfs-util.c  |    2 
 src/shared/copy.c        |   24 +-
 src/shared/copy.h        |    3 
 src/shared/util.c        |   12 -
 8 files changed, 532 insertions(+), 79 deletions(-)

New commits:
commit f2cbe59e113f08549949a76ac5b9b3972df4cc30
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Dec 18 01:35:58 2014 +0100

    machinectl: add new commands for copying files from/to containers

diff --git a/man/machinectl.xml b/man/machinectl.xml
index 9d8a94a..eef1740 100644
--- a/man/machinectl.xml
+++ b/man/machinectl.xml
@@ -185,7 +185,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>status</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>status</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Show terse runtime
                                 status information about one or more
@@ -198,14 +198,14 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>show</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>show</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Show properties of one
                                 or more registered virtual machines or
                                 containers or the manager itself. If
                                 no argument is specified, properties
                                 of the manager will be shown. If an
-                                ID is specified, properties of this
+                                NAME is specified, properties of this
                                 virtual machine or container are
                                 shown. By default, empty properties
                                 are suppressed. Use
@@ -222,7 +222,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>login</command> <replaceable>ID</replaceable></term>
+                                <term><command>login</command> <replaceable>NAME</replaceable></term>
 
                                 <listitem><para>Open a terminal login
                                 session to a container. This will
@@ -235,7 +235,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>reboot</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>reboot</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Reboot one or more
                                 containers. This will trigger a reboot
@@ -248,7 +248,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>poweroff</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>poweroff</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Power off one or more
                                 containers. This will trigger a reboot
@@ -264,7 +264,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>kill</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>kill</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Send a signal to one
                                 or more processes of the virtual
@@ -279,7 +279,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>terminate</command> <replaceable>ID</replaceable>...</term>
+                                <term><command>terminate</command> <replaceable>NAME</replaceable>...</term>
 
                                 <listitem><para>Terminates a virtual
                                 machine or container. This kills all
@@ -290,7 +290,7 @@
                         </varlistentry>
 
                         <varlistentry>
-                                <term><command>bind</command> <replaceable>ID</replaceable> <replaceable>DIRECTORY</replaceable> [<replaceable>DIRECTORY</replaceable>]</term>
+                                <term><command>bind</command> <replaceable>NAME</replaceable> <replaceable>PATH</replaceable> [<replaceable>PATH</replaceable>]</term>
 
                                 <listitem><para>Bind mounts a
                                 directory from the host into the
@@ -314,6 +314,33 @@
                                 containers.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><command>copy-to</command> <replaceable>NAME</replaceable> <replaceable>PATH</replaceable> [<replaceable>PATH</replaceable>]</term>
+
+                                <listitem><para>Copies files or
+                                directories from the host system into
+                                a running container. Takes a container
+                                name, followed by the source path on
+                                the host and the destination path in
+                                the container. If the destination path
+                                is omitted the same as the source path
+                                is used.</para></listitem>
+                        </varlistentry>
+
+
+                        <varlistentry>
+                                <term><command>copy-from</command> <replaceable>NAME</replaceable> <replaceable>PATH</replaceable> [<replaceable>PATH</replaceable>]</term>
+
+                                <listitem><para>Copies files or
+                                directories from a container into the
+                                host system. Takes a container name,
+                                followed by the source path in the
+                                container the destination path on the
+                                host. If the destination path is
+                                omitted the same as the source path is
+                                used.</para></listitem>
+                        </varlistentry>
+
                 </variablelist>
 
         </refsect1>
diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c
index 2571fc0..a62ffe3 100644
--- a/src/machine/machinectl.c
+++ b/src/machine/machinectl.c
@@ -31,6 +31,7 @@
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <sys/mount.h>
+#include <libgen.h>
 
 #include "sd-bus.h"
 #include "log.h"
@@ -48,6 +49,7 @@
 #include "event-util.h"
 #include "path-util.h"
 #include "mkdir.h"
+#include "copy.h"
 
 static char **arg_property = NULL;
 static bool arg_all = false;
@@ -626,6 +628,97 @@ static int machine_get_leader(sd_bus *bus, const char *name, pid_t *ret) {
         return 0;
 }
 
+static int copy_files(sd_bus *bus, char **args, unsigned n) {
+        char *dest, *host_path, *container_path, *host_dirname, *host_basename, *container_dirname, *container_basename, *t;
+        _cleanup_close_ int hostfd = -1;
+        pid_t child, leader;
+        bool copy_from;
+        siginfo_t si;
+        int r;
+
+        if (n > 4) {
+                log_error("Too many arguments.");
+                return -EINVAL;
+        }
+
+        copy_from = streq(args[0], "copy-from");
+        dest = args[3] ?: args[2];
+        host_path = strdupa(copy_from ? dest : args[2]);
+        container_path = strdupa(copy_from ? args[2] : dest);
+
+        if (!path_is_absolute(container_path)) {
+                log_error("Container path not absolute.");
+                return -EINVAL;
+        }
+
+        t = strdup(host_path);
+        host_basename = basename(t);
+        host_dirname = dirname(host_path);
+
+        t = strdup(container_path);
+        container_basename = basename(t);
+        container_dirname = dirname(container_path);
+
+        r = machine_get_leader(bus, args[1], &leader);
+        if (r < 0)
+                return r;
+
+        hostfd = open(host_dirname, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_DIRECTORY);
+        if (r < 0)
+                return log_error_errno(errno, "Failed to open source directory: %m");
+
+        child = fork();
+        if (child < 0)
+                return log_error_errno(errno, "Failed to fork(): %m");
+
+        if (child == 0) {
+                int containerfd;
+                const char *q;
+                int mntfd;
+
+                q = procfs_file_alloca(leader, "ns/mnt");
+                mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                if (mntfd < 0) {
+                        log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                if (setns(mntfd, CLONE_NEWNS) < 0) {
+                        log_error_errno(errno, "Failed to join namespace of leader: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                containerfd = open(container_dirname, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_DIRECTORY);
+                if (containerfd < 0) {
+                        log_error_errno(errno, "Failed top open destination directory: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                if (copy_from)
+                        r = copy_tree_at(containerfd, container_basename, hostfd, host_basename, true);
+                else
+                        r = copy_tree_at(hostfd, host_basename, containerfd, container_basename, true);
+                if (r < 0) {
+                        log_error_errno(errno, "Failed to copy tree: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate(child, &si);
+        if (r < 0)
+                return log_error_errno(r, "Failed to wait for client: %m");
+        if (si.si_code != CLD_EXITED) {
+                log_error("Client died abnormally.");
+                return -EIO;
+        }
+        if (si.si_status != EXIT_SUCCESS)
+                return -EIO;
+
+        return 0;
+}
+
 static int bind_mount(sd_bus *bus, char **args, unsigned n) {
         char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
         pid_t child, leader;
@@ -998,30 +1091,33 @@ static int login_machine(sd_bus *bus, char **args, unsigned n) {
 
 static void help(void) {
         printf("%s [OPTIONS...] {COMMAND} ...\n\n"
-               "Send control commands to or query the virtual machine and container registration manager.\n\n"
-               "  -h --help              Show this help\n"
-               "     --version           Show package version\n"
-               "     --no-pager          Do not pipe output into a pager\n"
-               "     --no-legend         Do not show the headers and footers\n"
-               "  -H --host=[USER@]HOST  Operate on remote host\n"
-               "  -M --machine=CONTAINER Operate on local container\n"
-               "  -p --property=NAME     Show only properties by this name\n"
-               "  -a --all               Show all properties, including empty ones\n"
-               "  -l --full              Do not ellipsize output\n"
-               "     --kill-who=WHO      Who to send signal to\n"
-               "  -s --signal=SIGNAL     Which signal to send\n"
-               "     --read-only         Create read-only bind mount\n"
-               "     --mkdir             Create directory before bind mounting, if missing\n\n"
+               "Send control commands to or query the virtual machine and container\n"
+               "registration manager.\n\n"
+               "  -h --help                   Show this help\n"
+               "     --version                Show package version\n"
+               "     --no-pager               Do not pipe output into a pager\n"
+               "     --no-legend              Do not show the headers and footers\n"
+               "  -H --host=[USER@]HOST       Operate on remote host\n"
+               "  -M --machine=CONTAINER      Operate on local container\n"
+               "  -p --property=NAME          Show only properties by this name\n"
+               "  -a --all                    Show all properties, including empty ones\n"
+               "  -l --full                   Do not ellipsize output\n"
+               "     --kill-who=WHO           Who to send signal to\n"
+               "  -s --signal=SIGNAL          Which signal to send\n"
+               "     --read-only              Create read-only bind mount\n"
+               "     --mkdir                  Create directory before bind mounting, if missing\n\n"
                "Commands:\n"
-               "  list                   List running VMs and containers\n"
-               "  status NAME...         Show VM/container status\n"
-               "  show NAME...           Show properties of one or more VMs/containers\n"
-               "  login NAME             Get a login prompt on a container\n"
-               "  poweroff NAME...       Power off one or more containers\n"
-               "  reboot NAME...         Reboot one or more containers\n"
-               "  kill NAME...           Send signal to processes of a VM/container\n"
-               "  terminate NAME...      Terminate one or more VMs/containers\n"
-               "  bind NAME PATH [PATH]  Bind mount a path from the host into a container\n",
+               "  list                        List running VMs and containers\n"
+               "  status NAME...              Show VM/container status\n"
+               "  show NAME...                Show properties of one or more VMs/containers\n"
+               "  login NAME                  Get a login prompt on a container\n"
+               "  poweroff NAME...            Power off one or more containers\n"
+               "  reboot NAME...              Reboot one or more containers\n"
+               "  kill NAME...                Send signal to processes of a VM/container\n"
+               "  terminate NAME...           Terminate one or more VMs/containers\n"
+               "  bind NAME PATH [PATH]       Bind mount a path from the host into a container\n"
+               "  copy-to NAME PATH [PATH]    Copy files from the host to a container\n"
+               "  copy-from NAME PATH [PATH]  Copy files from a container to the host\n",
                program_invocation_short_name);
 }
 
@@ -1159,6 +1255,8 @@ static int machinectl_main(sd_bus *bus, int argc, char *argv[]) {
                 { "kill",                  MORE,   2, kill_machine      },
                 { "login",                 MORE,   2, login_machine     },
                 { "bind",                  MORE,   3, bind_mount        },
+                { "copy-to",               MORE,   3, copy_files        },
+                { "copy-from",             MORE,   3, copy_files        },
         };
 
         int left;
diff --git a/src/shared/btrfs-util.c b/src/shared/btrfs-util.c
index fcf543a..492d7fc 100644
--- a/src/shared/btrfs-util.c
+++ b/src/shared/btrfs-util.c
@@ -122,7 +122,7 @@ int btrfs_subvol_snapshot(const char *old_path, const char *new_path, bool read_
                         if (r < 0)
                                 return r;
 
-                        r = copy_tree_fd(old_fd, new_path, true);
+                        r = copy_directory_fd(old_fd, new_path, true);
                         if (r < 0) {
                                 btrfs_subvol_remove(new_path);
                                 return r;
diff --git a/src/shared/copy.c b/src/shared/copy.c
index b4a85c7..0c2cdc8 100644
--- a/src/shared/copy.c
+++ b/src/shared/copy.c
@@ -25,6 +25,8 @@
 #include "btrfs-util.h"
 #include "copy.h"
 
+#define COPY_BUFFER_SIZE (16*1024)
+
 int copy_bytes(int fdf, int fdt, off_t max_bytes, bool try_reflink) {
         bool try_sendfile = true;
         int r;
@@ -40,7 +42,7 @@ int copy_bytes(int fdf, int fdt, off_t max_bytes, bool try_reflink) {
         }
 
         for (;;) {
-                size_t m = PIPE_BUF;
+                size_t m = COPY_BUFFER_SIZE;
                 ssize_t n;
 
                 if (max_bytes != (off_t) -1) {
@@ -279,30 +281,34 @@ static int fd_copy_directory(
         return r;
 }
 
-int copy_tree(const char *from, const char *to, bool merge) {
+int copy_tree_at(int fdf, const char *from, int fdt, const char *to, bool merge) {
         struct stat st;
 
         assert(from);
         assert(to);
 
-        if (lstat(from, &st) < 0)
+        if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
                 return -errno;
 
         if (S_ISREG(st.st_mode))
-                return fd_copy_regular(AT_FDCWD, from, &st, AT_FDCWD, to);
+                return fd_copy_regular(fdf, from, &st, fdt, to);
         else if (S_ISDIR(st.st_mode))
-                return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, merge);
+                return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, merge);
         else if (S_ISLNK(st.st_mode))
-                return fd_copy_symlink(AT_FDCWD, from, &st, AT_FDCWD, to);
+                return fd_copy_symlink(fdf, from, &st, fdt, to);
         else if (S_ISFIFO(st.st_mode))
-                return fd_copy_fifo(AT_FDCWD, from, &st, AT_FDCWD, to);
+                return fd_copy_fifo(fdf, from, &st, fdt, to);
         else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode))
-                return fd_copy_node(AT_FDCWD, from, &st, AT_FDCWD, to);
+                return fd_copy_node(fdf, from, &st, fdt, to);
         else
                 return -ENOTSUP;
 }
 
-int copy_tree_fd(int dirfd, const char *to, bool merge) {
+int copy_tree(const char *from, const char *to, bool merge) {
+        return copy_tree_at(AT_FDCWD, from, AT_FDCWD, to, merge);
+}
+
+int copy_directory_fd(int dirfd, const char *to, bool merge) {
 
         struct stat st;
 
diff --git a/src/shared/copy.h b/src/shared/copy.h
index 201fe69..714addf 100644
--- a/src/shared/copy.h
+++ b/src/shared/copy.h
@@ -27,5 +27,6 @@
 int copy_file_fd(const char *from, int to, bool try_reflink);
 int copy_file(const char *from, const char *to, int flags, mode_t mode);
 int copy_tree(const char *from, const char *to, bool merge);
-int copy_tree_fd(int dirfd, const char *to, bool merge);
+int copy_tree_at(int fdf, const char *from, int fdt, const char *to, bool merge);
+int copy_directory_fd(int dirfd, const char *to, bool merge);
 int copy_bytes(int fdf, int fdt, off_t max_bytes, bool try_reflink);

commit 20b63d12b533daf2e9b2936ffb03074861e1673e
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 17 21:54:00 2014 +0100

    util: in make_stdio() use dup2() rather than dup3()
    
    dup3() allows setting O_CLOEXEC which we are not interested in. However,
    it also fails if called with the same fd as input and output, which is
    something we don't want. Hence use dup2().
    
    Also, we need to explicitly turn off O_CLOEXEC for the fds, in case the
    input fd was O_CLOEXEC and < 3.

diff --git a/src/shared/util.c b/src/shared/util.c
index ee95a4b..364f618 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -2468,9 +2468,9 @@ int make_stdio(int fd) {
 
         assert(fd >= 0);
 
-        r = dup3(fd, STDIN_FILENO, 0);
-        s = dup3(fd, STDOUT_FILENO, 0);
-        t = dup3(fd, STDERR_FILENO, 0);
+        r = dup2(fd, STDIN_FILENO);
+        s = dup2(fd, STDOUT_FILENO);
+        t = dup2(fd, STDERR_FILENO);
 
         if (fd >= 3)
                 safe_close(fd);
@@ -2478,7 +2478,11 @@ int make_stdio(int fd) {
         if (r < 0 || s < 0 || t < 0)
                 return -errno;
 
-        /* We rely here that the new fd has O_CLOEXEC not set */
+        /* Explicitly unset O_CLOEXEC, since if fd was < 3, then
+         * dup2() was a NOP and the bit hence possibly set. */
+        fd_cloexec(STDIN_FILENO, false);
+        fd_cloexec(STDOUT_FILENO, false);
+        fd_cloexec(STDERR_FILENO, false);
 
         return 0;
 }

commit 82e6c50c473f4be8df77c7a510577f1975eedddb
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 17 21:53:54 2014 +0100

    update TODO

diff --git a/TODO b/TODO
index 393ee5e..37f4ea4 100644
--- a/TODO
+++ b/TODO
@@ -31,6 +31,22 @@ External:
 
 Features:
 
+* "machinectl run" that works like systemd-run, but allocates a pty in the container and attached the service to it
+
+* "machinectl start/enable/disable foo" as aliases for "systemctl start/enable/disable systemd-nspawn at foo.service"
+
+* "machinectl list-images" for showing a list of container trees collected from /var/lib/containers
+
+* "machinectl cp" to copy files in and out of a container
+
+* "machinectl snapshot" to make a snapshot of a tree or container into /var/lib/containers
+
+* "machinectl rm" to remove a container tree from /var/lib/containers
+
+* machined: open up certain commands to unprivileged clients via polkit
+
+* sysfs set api in libudev is not const
+
 * Find a solution for SMACK capabilities stuff:
   http://lists.freedesktop.org/archives/systemd-devel/2014-December/026188.html
 

commit 785890acf6d629ff881a1f065f431df1b7fc8c7a
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 17 21:51:45 2014 +0100

    machinectl: implement "bind" command to create additional bind mounts from host to container during runtime

diff --git a/man/machinectl.xml b/man/machinectl.xml
index 2f2e257..9d8a94a 100644
--- a/man/machinectl.xml
+++ b/man/machinectl.xml
@@ -146,6 +146,25 @@
                                         footer.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><option>--mkdir</option></term>
+
+                                <listitem><para>When used with
+                                <command>bind</command> creates the
+                                destination directory before applying
+                                the bind mount.</para></listitem>
+                        </varlistentry>
+
+
+                        <varlistentry>
+                                <term><option>--read-only</option></term>
+
+                                <listitem><para>When used with
+                                <command>bind</command> applies a
+                                read-only bind
+                                mount.</para></listitem>
+                        </varlistentry>
+
                         <xi:include href="user-system-options.xml" xpointer="host" />
                         <xi:include href="user-system-options.xml" xpointer="machine" />
 
@@ -270,6 +289,31 @@
                                 instance.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><command>bind</command> <replaceable>ID</replaceable> <replaceable>DIRECTORY</replaceable> [<replaceable>DIRECTORY</replaceable>]</term>
+
+                                <listitem><para>Bind mounts a
+                                directory from the host into the
+                                specified container. The first
+                                directory argument is the source
+                                directory on the host, the second
+                                directory argument the source
+                                directory on the host. When the latter
+                                is omitted the destination path in the
+                                container is the same as the source
+                                path on the host. When combined with
+                                the <option>--read-only</option>
+                                switch a ready-only bind mount is
+                                created. When combined with the
+                                <option>--mkdir</option> switch the
+                                destination path is first created
+                                before the mount is applied. Note that
+                                this option is currently only
+                                supported for
+                                <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+                                containers.</para></listitem>
+                        </varlistentry>
+
                 </variablelist>
 
         </refsect1>
diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c
index f604263..2571fc0 100644
--- a/src/machine/machinectl.c
+++ b/src/machine/machinectl.c
@@ -30,6 +30,7 @@
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <net/if.h>
+#include <sys/mount.h>
 
 #include "sd-bus.h"
 #include "log.h"
@@ -45,6 +46,8 @@
 #include "cgroup-util.h"
 #include "ptyfwd.h"
 #include "event-util.h"
+#include "path-util.h"
+#include "mkdir.h"
 
 static char **arg_property = NULL;
 static bool arg_all = false;
@@ -55,6 +58,8 @@ static const char *arg_kill_who = NULL;
 static int arg_signal = SIGTERM;
 static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
 static char *arg_host = NULL;
+static bool arg_read_only = false;
+static bool arg_mkdir = false;
 
 static void pager_open_if_enabled(void) {
 
@@ -572,6 +577,241 @@ static int terminate_machine(sd_bus *bus, char **args, unsigned n) {
         return 0;
 }
 
+static int machine_get_leader(sd_bus *bus, const char *name, pid_t *ret) {
+        _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_bus_message_unref_ sd_bus_message *reply = NULL, *reply2 = NULL;
+        const char *object;
+        uint32_t leader;
+        int r;
+
+        assert(bus);
+        assert(name);
+        assert(ret);
+
+        r = sd_bus_call_method(
+                        bus,
+                        "org.freedesktop.machine1",
+                        "/org/freedesktop/machine1",
+                        "org.freedesktop.machine1.Manager",
+                        "GetMachine",
+                        &error,
+                        &reply,
+                        "s", name);
+        if (r < 0) {
+                log_error("Could not get path to machine: %s", bus_error_message(&error, -r));
+                return r;
+        }
+
+        r = sd_bus_message_read(reply, "o", &object);
+        if (r < 0)
+                return bus_log_parse_error(r);
+
+        r = sd_bus_get_property(
+                        bus,
+                        "org.freedesktop.machine1",
+                        object,
+                        "org.freedesktop.machine1.Machine",
+                        "Leader",
+                        &error,
+                        &reply2,
+                        "u");
+        if (r < 0)
+                return log_error_errno(r, "Failed to retrieve PID of leader: %m");
+
+        r = sd_bus_message_read(reply2, "u", &leader);
+        if (r < 0)
+                return bus_log_parse_error(r);
+
+        *ret = leader;
+        return 0;
+}
+
+static int bind_mount(sd_bus *bus, char **args, unsigned n) {
+        char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
+        pid_t child, leader;
+        const char *dest;
+        siginfo_t si;
+        bool mount_slave_created = false, mount_slave_mounted = false,
+                mount_tmp_created = false, mount_tmp_mounted = false,
+                mount_outside_created = false, mount_outside_mounted = false;
+        int r;
+
+        /* One day, when bind mounting /proc/self/fd/n works across
+         * namespace boundaries we should rework this logic to make
+         * use of it... */
+
+        if (n > 4) {
+                log_error("Too many arguments.");
+                return -EINVAL;
+        }
+
+        dest = args[3] ?: args[2];
+        if (!path_is_absolute(dest)) {
+                log_error("Destination path not absolute.");
+                return -EINVAL;
+        }
+
+        p = strappenda("/run/systemd/nspawn/propagate/", args[1], "/");
+        if (access(p, F_OK) < 0) {
+                log_error("Container does not allow propagation of mount points.");
+                return -ENOTSUP;
+        }
+
+        r = machine_get_leader(bus, args[1], &leader);
+        if (r < 0)
+                return r;
+
+        /* Our goal is to install a new bind mount into the container,
+           possibly read-only. This is irritatingly complex
+           unfortunately, currently.
+
+           First, we start by creating a private playground in /tmp,
+           that we can mount MS_SLAVE. (Which is necessary, since
+           MS_MOUNT cannot be applied to mounts with MS_SHARED parent
+           mounts.) */
+
+        if (!mkdtemp(mount_slave))
+                return log_error_errno(errno, "Failed to create playground: %m");
+
+        mount_slave_created = true;
+
+        if (mount(mount_slave, mount_slave, NULL, MS_BIND, NULL) < 0) {
+                r = log_error_errno(errno, "Failed to make bind mount: %m");
+                goto finish;
+        }
+
+        mount_slave_mounted = true;
+
+        if (mount(NULL, mount_slave, NULL, MS_SLAVE, NULL) < 0) {
+                r = log_error_errno(errno, "Failed to remount slave: %m");
+                goto finish;
+        }
+
+        /* Second, we mount the source directory to a directory inside
+           of our MS_SLAVE playground. */
+        mount_tmp = strappenda(mount_slave, "/mount");
+        if (mkdir(mount_tmp, 0700) < 0) {
+                r = log_error_errno(errno, "Failed to create temporary mount: %m");
+                goto finish;
+        }
+
+        mount_tmp_created = true;
+
+        if (mount(args[2], mount_tmp, NULL, MS_BIND, NULL) < 0) {
+                r = log_error_errno(errno, "Failed to overmount: %m");
+                goto finish;
+        }
+
+        mount_tmp_mounted = true;
+
+        /* Third, we remount the new bind mount read-only if requested. */
+        if (arg_read_only)
+                if (mount(NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
+                        r = log_error_errno(errno, "Failed to mark read-only: %m");
+                        goto finish;
+                }
+
+        /* Fourth, we move the new bind mount into the propagation
+         * directory. This way it will appear there read-only
+         * right-away. */
+
+        mount_outside = strappenda("/run/systemd/nspawn/propagate/", args[1], "/XXXXXX");
+        if (!mkdtemp(mount_outside)) {
+                r = log_error_errno(errno, "Cannot create propagation directory: %m");
+                goto finish;
+        }
+
+        mount_outside_created = true;
+
+        if (mount(mount_tmp, mount_outside, NULL, MS_MOVE, NULL) < 0) {
+                r = log_error_errno(errno, "Failed to move: %m");
+                goto finish;
+        }
+
+        mount_outside_mounted = true;
+        mount_tmp_mounted = false;
+
+        (void) rmdir(mount_tmp);
+        mount_tmp_created = false;
+
+        (void) umount(mount_slave);
+        mount_slave_mounted = false;
+
+        (void) rmdir(mount_slave);
+        mount_slave_created = false;
+
+        child = fork();
+        if (child < 0) {
+                r = log_error_errno(errno, "Failed to fork(): %m");
+                goto finish;
+        }
+
+        if (child == 0) {
+                const char *mount_inside;
+                int mntfd;
+                const char *q;
+
+                q = procfs_file_alloca(leader, "ns/mnt");
+                mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                if (mntfd < 0) {
+                        log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                if (setns(mntfd, CLONE_NEWNS) < 0) {
+                        log_error_errno(errno, "Failed to join namespace of leader: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                if (arg_mkdir)
+                        mkdir_p(dest, 0755);
+
+                /* Fifth, move the mount to the right place inside */
+                mount_inside = strappenda("/run/systemd/nspawn/incoming/", basename(mount_outside));
+                if (mount(mount_inside, dest, NULL, MS_MOVE, NULL) < 0) {
+                        log_error_errno(errno, "Failed to mount: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        r = wait_for_terminate(child, &si);
+        if (r < 0) {
+                log_error_errno(r, "Failed to wait for client: %m");
+                goto finish;
+        }
+        if (si.si_code != CLD_EXITED) {
+                log_error("Client died abnormally.");
+                r = -EIO;
+                goto finish;
+        }
+        if (si.si_status != EXIT_SUCCESS) {
+                r = -EIO;
+                goto finish;
+        }
+
+        r = 0;
+
+finish:
+        if (mount_outside_mounted)
+                umount(mount_outside);
+        if (mount_outside_created)
+                rmdir(mount_outside);
+
+        if (mount_tmp_mounted)
+                umount(mount_tmp);
+        if (mount_tmp_created)
+                umount(mount_tmp);
+
+        if (mount_slave_mounted)
+                umount(mount_slave);
+        if (mount_slave_created)
+                umount(mount_slave);
+
+        return r;
+}
+
 static int openpt_in_namespace(pid_t pid, int flags) {
         _cleanup_close_pair_ int pair[2] = { -1, -1 };
         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
@@ -658,15 +898,15 @@ static int openpt_in_namespace(pid_t pid, int flags) {
 }
 
 static int login_machine(sd_bus *bus, char **args, unsigned n) {
-        _cleanup_bus_message_unref_ sd_bus_message *reply = NULL, *reply2 = NULL, *reply3 = NULL;
         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
         _cleanup_bus_close_unref_ sd_bus *container_bus = NULL;
         _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
         _cleanup_event_unref_ sd_event *event = NULL;
         _cleanup_close_ int master = -1;
         _cleanup_free_ char *getty = NULL;
-        const char *path, *pty, *p;
-        uint32_t leader;
+        const char *pty, *p;
+        pid_t leader;
         sigset_t mask;
         int r, ret = 0;
 
@@ -686,39 +926,9 @@ static int login_machine(sd_bus *bus, char **args, unsigned n) {
         if (r < 0)
                 return log_error_errno(r, "Failed to attach bus to event loop: %m");
 
-        r = sd_bus_call_method(
-                        bus,
-                        "org.freedesktop.machine1",
-                        "/org/freedesktop/machine1",
-                        "org.freedesktop.machine1.Manager",
-                        "GetMachine",
-                        &error,
-                        &reply,
-                        "s", args[1]);
-        if (r < 0) {
-                log_error("Could not get path to machine: %s", bus_error_message(&error, -r));
-                return r;
-        }
-
-        r = sd_bus_message_read(reply, "o", &path);
-        if (r < 0)
-                return bus_log_parse_error(r);
-
-        r = sd_bus_get_property(
-                        bus,
-                        "org.freedesktop.machine1",
-                        path,
-                        "org.freedesktop.machine1.Machine",
-                        "Leader",
-                        &error,
-                        &reply2,
-                        "u");
-        if (r < 0)
-                return log_error_errno(r, "Failed to retrieve PID of leader: %m");
-
-        r = sd_bus_message_read(reply2, "u", &leader);
+        r = machine_get_leader(bus, args[1], &leader);
         if (r < 0)
-                return bus_log_parse_error(r);
+                return r;
 
         master = openpt_in_namespace(leader, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
         if (master < 0)
@@ -750,7 +960,7 @@ static int login_machine(sd_bus *bus, char **args, unsigned n) {
                                "/org/freedesktop/systemd1",
                                "org.freedesktop.systemd1.Manager",
                                "StartUnit",
-                               &error, &reply3,
+                               &error, &reply,
                                "ss", getty, "replace");
         if (r < 0) {
                 log_error("Failed to start getty service: %s", bus_error_message(&error, r));
@@ -799,7 +1009,9 @@ static void help(void) {
                "  -a --all               Show all properties, including empty ones\n"
                "  -l --full              Do not ellipsize output\n"
                "     --kill-who=WHO      Who to send signal to\n"
-               "  -s --signal=SIGNAL     Which signal to send\n\n"
+               "  -s --signal=SIGNAL     Which signal to send\n"
+               "     --read-only         Create read-only bind mount\n"
+               "     --mkdir             Create directory before bind mounting, if missing\n\n"
                "Commands:\n"
                "  list                   List running VMs and containers\n"
                "  status NAME...         Show VM/container status\n"
@@ -808,7 +1020,8 @@ static void help(void) {
                "  poweroff NAME...       Power off one or more containers\n"
                "  reboot NAME...         Reboot one or more containers\n"
                "  kill NAME...           Send signal to processes of a VM/container\n"
-               "  terminate NAME...      Terminate one or more VMs/containers\n",
+               "  terminate NAME...      Terminate one or more VMs/containers\n"
+               "  bind NAME PATH [PATH]  Bind mount a path from the host into a container\n",
                program_invocation_short_name);
 }
 
@@ -819,6 +1032,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NO_PAGER,
                 ARG_NO_LEGEND,
                 ARG_KILL_WHO,
+                ARG_READ_ONLY,
+                ARG_MKDIR,
         };
 
         static const struct option options[] = {
@@ -833,6 +1048,8 @@ static int parse_argv(int argc, char *argv[]) {
                 { "signal",          required_argument, NULL, 's'                 },
                 { "host",            required_argument, NULL, 'H'                 },
                 { "machine",         required_argument, NULL, 'M'                 },
+                { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
+                { "mkdir",           no_argument,       NULL, ARG_MKDIR           },
                 {}
         };
 
@@ -903,6 +1120,14 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_host = optarg;
                         break;
 
+                case ARG_READ_ONLY:
+                        arg_read_only = true;
+                        break;
+
+                case ARG_MKDIR:
+                        arg_mkdir = true;
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -933,6 +1158,7 @@ static int machinectl_main(sd_bus *bus, int argc, char *argv[]) {
                 { "poweroff",              MORE,   2, poweroff_machine  },
                 { "kill",                  MORE,   2, kill_machine      },
                 { "login",                 MORE,   2, login_machine     },
+                { "bind",                  MORE,   3, bind_mount        },
         };
 
         int left;
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a13c1fc..72f7d66 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2082,6 +2082,27 @@ finish:
 
 }
 
+static int setup_propagate(const char *root) {
+        const char *p, *q;
+
+        (void) mkdir_p("/run/systemd/nspawn/", 0755);
+        (void) mkdir_p("/run/systemd/nspawn/propagate", 0600);
+        p = strappenda("/run/systemd/nspawn/propagate/", arg_machine);
+        (void) mkdir_p(p, 0600);
+
+        q = strappenda(root, "/run/systemd/nspawn/incoming");
+        mkdir_parents(q, 0755);
+        mkdir_p(q, 0600);
+
+        if (mount(p, q, NULL, MS_BIND, NULL) < 0)
+                return log_error_errno(errno, "Failed to install propagation bind mount.");
+
+        if (mount(NULL, q, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0)
+                return log_error_errno(errno, "Failed to make propagation mount read-only");
+
+        return 0;
+}
+
 static int setup_image(char **device_path, int *loop_nr) {
         struct loop_info64 info = {
                 .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN
@@ -3260,6 +3281,9 @@ int main(int argc, char *argv[]) {
 
                         dev_setup(arg_directory);
 
+                        if (setup_propagate(arg_directory) < 0)
+                                _exit(EXIT_FAILURE);
+
                         if (setup_seccomp() < 0)
                                 _exit(EXIT_FAILURE);
 
@@ -3571,6 +3595,13 @@ finish:
                         log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory);
         }
 
+        if (arg_machine) {
+                const char *p;
+
+                p = strappenda("/run/systemd/nspawn/propagate", arg_machine);
+                (void) rm_rf(p, false, true, false);
+        }
+
         free(arg_directory);
         free(arg_template);
         free(arg_image);



More information about the systemd-commits mailing list