[systemd-commits] 12 commits - .gitignore Makefile.am man/systemd-nspawn.xml src/core src/gpt-auto-generator src/hostname src/journal src/locale src/nspawn src/shared src/systemctl src/test src/udev

Lennart Poettering lennart at kemper.freedesktop.org
Fri Dec 12 04:36:28 PST 2014


 .gitignore                                  |    1 
 Makefile.am                                 |   13 +
 man/systemd-nspawn.xml                      |   98 +++++++--
 src/core/load-fragment.c                    |    2 
 src/gpt-auto-generator/gpt-auto-generator.c |   55 -----
 src/hostname/hostnamed.c                    |    2 
 src/journal/coredump.c                      |   12 -
 src/journal/journald-native.c               |    2 
 src/locale/localed.c                        |    4 
 src/nspawn/nspawn.c                         |  293 +++++++++++++++++++--------
 src/shared/btrfs-util.c                     |  294 ++++++++++++++++++++++++++++
 src/shared/btrfs-util.h                     |   34 +++
 src/shared/copy.c                           |   47 +++-
 src/shared/copy.h                           |    1 
 src/shared/dropin.c                         |    2 
 src/shared/locale-util.c                    |    2 
 src/shared/missing.h                        |    8 
 src/shared/path-util.c                      |    2 
 src/shared/seccomp-util.h                   |    1 
 src/shared/util.c                           |  117 ++++++-----
 src/shared/util.h                           |    7 
 src/systemctl/systemctl.c                   |    8 
 src/test/test-btrfs.c                       |   68 ++++++
 src/test/test-util.c                        |   20 -
 src/udev/udev-builtin-btrfs.c               |    3 
 25 files changed, 843 insertions(+), 253 deletions(-)

New commits:
commit ec16945ebfe64d5cd5403ae1a1b16bc05a779a16
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:50:59 2014 +0100

    nspawn: beef up nspawn with some btrfs magic
    
    This adds --template= to duplicate an OS tree as btrfs snpashot and run
    it
    
    This also adds --ephemeral or -x to create a snapshot of an OS tree and
    boot that, removing it after exit.

diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index 75db65e..b66c34d 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -165,15 +165,79 @@
                                 <term><option>--directory=</option></term>
 
                                 <listitem><para>Directory to use as
-                                file system root for the container. If
-                                neither <option>--directory=</option>
-                                nor <option>--image=</option> are
+                                file system root for the container.</para>
+
+                                <para>If neither
+                                <option>--directory=</option>, nor
+                                <option>--image=</option> is specified
+                                the directory is determined as
+                                <filename>/var/lib/container/</filename>
+                                suffixed by the machine name as
+                                specified with
+                                <option>--machine=</option>. If
+                                neither <option>--directory=</option>,
+                                <option>--image=</option>, nor
+                                <option>--machine=</option> are
                                 specified, the current directory will
-                                be used. May not be specified together with
+                                be used. May not be specified together
+                                with
                                 <option>--image=</option>.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
+                                <term><option>--template=</option></term>
+
+                                <listitem><para>Directory or
+                                <literal>btrfs</literal> subvolume to
+                                use as template for the container's
+                                root directory. If this is specified
+                                and the container's root directory (as
+                                configured by
+                                <option>--directory=</option>) does
+                                not yet exist it is created as
+                                <literal>btrfs</literal> subvolume and
+                                populated from this template
+                                tree. Ideally, the specified template
+                                path refers to the root of a
+                                <literal>btrfs</literal> subvolume, in
+                                which case a simple copy-on-write
+                                snapshot is taken, and populating the
+                                root directory is instant. If the
+                                specified template path does not refer
+                                to the root of a
+                                <literal>btrfs</literal> subvolume (or
+                                not even to a <literal>btrfs</literal>
+                                file system at all), the tree is
+                                copied, which can be substantially
+                                more time-consuming. Note that if this
+                                option is used the container's root
+                                directory (in contrast to the template
+                                directory!) must be located on a
+                                <literal>btrfs</literal> file system,
+                                so that the <literal>btrfs</literal>
+                                subvolume may be created. May not be
+                                specified together with
+                                <option>--image=</option> or
+                                <option>--ephemeral</option>.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><option>-x</option></term>
+                                <term><option>--ephemeral</option></term>
+
+                                <listitem><para>If specified, the
+                                container is run with a temporary
+                                <literal>btrfs</literal> snapshot of
+                                its root directory (as configured with
+                                <option>--directory=</option>), that
+                                is removed immediately when the
+                                container terminates. May not be
+                                specified together with
+                                <option>--image=</option> or
+                                <option>--template=</option>.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><option>-i</option></term>
                                 <term><option>--image=</option></term>
 
@@ -197,7 +261,9 @@
                                 partitions, swap partitions or EFI
                                 system partitions are not mounted. May
                                 not be specified together with
-                                <option>--directory=</option>.</para></listitem>
+                                <option>--directory=</option>,
+                                <option>--template=</option> or
+                                <option>--ephemeral</option>.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
@@ -653,13 +719,13 @@
                                 <term><option>--volatile</option><replaceable>=MODE</replaceable></term>
 
                                 <listitem><para>Boots the container in
-                                volatile (ephemeral) mode. When no
-                                mode parameter is passed or when mode
-                                is specified as <literal>yes</literal>
-                                full volatile mode is enabled. This
-                                means the root directory is mounted as
-                                mostly unpopulated
-                                <literal>tmpfs</literal> instance, and
+                                volatile mode. When no mode parameter
+                                is passed or when mode is specified as
+                                <literal>yes</literal> full volatile
+                                mode is enabled. This means the root
+                                directory is mounted as mostly
+                                unpopulated <literal>tmpfs</literal>
+                                instance, and
                                 <filename>/usr</filename> from the OS
                                 tree is mounted into it, read-only
                                 (the system thus starts up with
@@ -677,8 +743,8 @@
                                 changes to the latter are lost on
                                 shutdown). When the mode parameter is
                                 specified as <literal>no</literal>
-                                (the default) the whole OS tree is made
-                                available writable.</para>
+                                (the default) the whole OS tree is
+                                made available writable.</para>
 
                                 <para>Note that setting this to
                                 <literal>yes</literal> or
@@ -748,13 +814,13 @@
                 </example>
 
                 <example>
-                        <title>Boot into a btrfs snapshot of the host system</title>
+                        <title>Boot into a <literal>btrfs</literal> snapshot of the host system</title>
 
                         <programlisting># btrfs subvolume snapshot / /.tmp
 # systemd-nspawn --private-network -D /.tmp -b</programlisting>
 
                         <para>This runs a copy of the host system in a
-                        btrfs snapshot.</para>
+                        <literal>btrfs</literal> snapshot.</para>
                 </example>
 
                 <example>
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index e1e1c36..1bfc99d 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -91,6 +91,7 @@
 #include "barrier.h"
 #include "event-util.h"
 #include "cap-list.h"
+#include "btrfs-util.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -115,6 +116,7 @@ typedef enum Volatile {
 } Volatile;
 
 static char *arg_directory = NULL;
+static char *arg_template = NULL;
 static char *arg_user = NULL;
 static sd_id128_t arg_uuid = {};
 static char *arg_machine = NULL;
@@ -124,6 +126,7 @@ static const char *arg_slice = NULL;
 static bool arg_private_network = false;
 static bool arg_read_only = false;
 static bool arg_boot = false;
+static bool arg_ephemeral = false;
 static LinkJournal arg_link_journal = LINK_AUTO;
 static bool arg_link_journal_try = false;
 static uint64_t arg_retain =
@@ -166,7 +169,7 @@ static char **arg_network_macvlan = NULL;
 static bool arg_network_veth = false;
 static const char *arg_network_bridge = NULL;
 static unsigned long arg_personality = 0xffffffffLU;
-static const char *arg_image = NULL;
+static char *arg_image = NULL;
 static Volatile arg_volatile = VOLATILE_NO;
 
 static void help(void) {
@@ -176,7 +179,11 @@ static void help(void) {
                "     --version              Print version string\n"
                "  -q --quiet                Do not show status information\n"
                "  -D --directory=PATH       Root directory for the container\n"
-               "  -i --image=PATH           File system device or image for the container\n"
+               "     --template=PATH        Initialize root directory from template directory,\n"
+               "                            if missing\n"
+               "  -x --ephemeral            Run container with snapshot of root directory, and\n"
+               "                            remove it after exit\n"
+               "  -i --image=PATH           File system device or disk image for the container\n"
                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
                "  -u --user=USER            Run the command under specified user or uid\n"
                "  -M --machine=NAME         Set the machine name for the container\n"
@@ -221,6 +228,27 @@ static void help(void) {
                program_invocation_short_name);
 }
 
+static int set_sanitized_path(char **b, const char *path) {
+        char *p;
+
+        assert(b);
+        assert(path);
+
+        p = canonicalize_file_name(path);
+        if (!p) {
+                if (errno != ENOENT)
+                        return -errno;
+
+                p = path_make_absolute_cwd(path);
+                if (!p)
+                        return -ENOMEM;
+        }
+
+        free(*b);
+        *b = path_kill_slashes(p);
+        return 0;
+}
+
 static int parse_argv(int argc, char *argv[]) {
 
         enum {
@@ -244,12 +272,15 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NETWORK_BRIDGE,
                 ARG_PERSONALITY,
                 ARG_VOLATILE,
+                ARG_TEMPLATE,
         };
 
         static const struct option options[] = {
                 { "help",                  no_argument,       NULL, 'h'                   },
                 { "version",               no_argument,       NULL, ARG_VERSION           },
                 { "directory",             required_argument, NULL, 'D'                   },
+                { "template",              required_argument, NULL, ARG_TEMPLATE          },
+                { "ephemeral",             no_argument,       NULL, 'x'                   },
                 { "user",                  required_argument, NULL, 'u'                   },
                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
                 { "boot",                  no_argument,       NULL, 'b'                   },
@@ -286,7 +317,7 @@ static int parse_argv(int argc, char *argv[]) {
         assert(argc >= 0);
         assert(argv);
 
-        while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:", options, NULL)) >= 0)
+        while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:x", options, NULL)) >= 0)
 
                 switch (c) {
 
@@ -300,17 +331,28 @@ static int parse_argv(int argc, char *argv[]) {
                         return 0;
 
                 case 'D':
-                        free(arg_directory);
-                        arg_directory = canonicalize_file_name(optarg);
-                        if (!arg_directory) {
-                                log_error_errno(errno, "Invalid root directory: %m");
-                                return -ENOMEM;
-                        }
+                        r = set_sanitized_path(&arg_directory, optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid root directory: %m");
+
+                        break;
+
+                case ARG_TEMPLATE:
+                        r = set_sanitized_path(&arg_template, optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid template directory: %m");
 
                         break;
 
                 case 'i':
-                        arg_image = optarg;
+                        r = set_sanitized_path(&arg_image, optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Invalid image path: %m");
+
+                        break;
+
+                case 'x':
+                        arg_ephemeral = true;
                         break;
 
                 case 'u':
@@ -621,6 +663,26 @@ static int parse_argv(int argc, char *argv[]) {
                 return -EINVAL;
         }
 
+        if (arg_template && arg_image) {
+                log_error("--template= and --image= may not be combined.");
+                return -EINVAL;
+        }
+
+        if (arg_template && !(arg_directory || arg_machine)) {
+                log_error("--template= needs --directory= or --machine=.");
+                return -EINVAL;
+        }
+
+        if (arg_ephemeral && arg_template) {
+                log_error("--ephemeral and --template= may not be combined.");
+                return -EINVAL;
+        }
+
+        if (arg_ephemeral && arg_image) {
+                log_error("--ephemeral and --image= may not be combined.");
+                return -EINVAL;
+        }
+
         if (arg_volatile != VOLATILE_NO && arg_read_only) {
                 log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
                 return -EINVAL;
@@ -2019,6 +2081,7 @@ static int setup_image(char **device_path, int *loop_nr) {
 
         assert(device_path);
         assert(loop_nr);
+        assert(arg_image);
 
         fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
         if (fd < 0)
@@ -2117,6 +2180,7 @@ static int dissect_image(
         assert(home_device);
         assert(srv_device);
         assert(secondary);
+        assert(arg_image);
 
         b = blkid_new_probe();
         if (!b)
@@ -2784,6 +2848,35 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo
         return 0;
 }
 
+static int determine_names(void) {
+
+        if (!arg_image && !arg_directory) {
+                if (arg_machine)
+                        arg_directory = strappend("/var/lib/container/", arg_machine);
+                else
+                        arg_directory = get_current_dir_name();
+
+                if (!arg_directory) {
+                        log_error("Failed to determine path, please use -D.");
+                        return -EINVAL;
+                }
+        }
+
+        if (!arg_machine) {
+                arg_machine = strdup(basename(arg_image ?: arg_directory));
+                if (!arg_machine)
+                        return log_oom();
+
+                hostname_cleanup(arg_machine, false);
+                if (!machine_name_is_valid(arg_machine)) {
+                        log_error("Failed to determine machine name automatically, please use -M.");
+                        return -EINVAL;
+                }
+        }
+
+        return 0;
+}
+
 int main(int argc, char *argv[]) {
 
         _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
@@ -2791,71 +2884,43 @@ int main(int argc, char *argv[]) {
         _cleanup_close_ int master = -1, image_fd = -1;
         _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 };
         _cleanup_fdset_free_ FDSet *fds = NULL;
-        int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
+        int r, n_fd_passed, loop_nr = -1;
         const char *console = NULL;
         char veth_name[IFNAMSIZ];
-        bool secondary = false;
+        bool secondary = false, remove_subvol = false;
         sigset_t mask, mask_chld;
         pid_t pid = 0;
+        int ret = EXIT_SUCCESS;
 
         log_parse_environment();
         log_open();
 
-        k = parse_argv(argc, argv);
-        if (k < 0)
+        r = parse_argv(argc, argv);
+        if (r <= 0)
                 goto finish;
-        else if (k == 0) {
-                r = EXIT_SUCCESS;
-                goto finish;
-        }
-
-        if (!arg_image) {
-                if (arg_directory) {
-                        char *p;
 
-                        p = path_make_absolute_cwd(arg_directory);
-                        free(arg_directory);
-                        arg_directory = p;
-                } else
-                        arg_directory = get_current_dir_name();
-
-                if (!arg_directory) {
-                        log_error("Failed to determine path, please use -D.");
-                        goto finish;
-                }
-                path_kill_slashes(arg_directory);
-        }
-
-        if (!arg_machine) {
-                arg_machine = strdup(basename(arg_image ? arg_image : arg_directory));
-                if (!arg_machine) {
-                        log_oom();
-                        goto finish;
-                }
-
-                hostname_cleanup(arg_machine, false);
-                if (isempty(arg_machine)) {
-                        log_error("Failed to determine machine name automatically, please use -M.");
-                        goto finish;
-                }
-        }
+        r = determine_names();
+        if (r < 0)
+                goto finish;
 
         if (geteuid() != 0) {
                 log_error("Need to be root.");
+                r = -EPERM;
                 goto finish;
         }
 
         if (sd_booted() <= 0) {
                 log_error("Not running on a systemd system.");
+                r = -EINVAL;
                 goto finish;
         }
 
         log_close();
         n_fd_passed = sd_listen_fds(false);
         if (n_fd_passed > 0) {
-                k = fdset_new_listen_fds(&fds, false);
-                if (k < 0) {
-                        log_error_errno(k, "Failed to collect file descriptors: %m");
+                r = fdset_new_listen_fds(&fds, false);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to collect file descriptors: %m");
                         goto finish;
                 }
         }
@@ -2863,14 +2928,53 @@ int main(int argc, char *argv[]) {
         log_open();
 
         if (arg_directory) {
+                assert(!arg_image);
+
                 if (path_equal(arg_directory, "/")) {
                         log_error("Spawning container on root directory not supported.");
+                        r = -EINVAL;
                         goto finish;
                 }
 
+                if (arg_template) {
+                        r = btrfs_subvol_snapshot(arg_template, arg_directory, arg_read_only, true);
+                        if (r == -EEXIST) {
+                                if (!arg_quiet)
+                                        log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
+                        } else if (r < 0) {
+                                log_error_errno(r, "Couldn't create snapshort %s from %s: %m", arg_directory, arg_template);
+                                goto finish;
+                        } else {
+                                if (!arg_quiet)
+                                        log_info("Populated %s from template %s.", arg_directory, arg_template);
+                        }
+
+                } else if (arg_ephemeral) {
+                        char *np;
+
+                        r = tempfn_random(arg_directory, &np);
+                        if (r < 0) {
+                                log_error_errno(r, "Failed to generate name for snapshot: %m");
+                                goto finish;
+                        }
+
+                        r = btrfs_subvol_snapshot(arg_directory, np, arg_read_only, true);
+                        if (r < 0) {
+                                free(np);
+                                log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory);
+                                goto finish;
+                        }
+
+                        free(arg_directory);
+                        arg_directory = np;
+
+                        remove_subvol = true;
+                }
+
                 if (arg_boot) {
                         if (path_is_os_tree(arg_directory) <= 0) {
                                 log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
+                                r = -EINVAL;
                                 goto finish;
                         }
                 } else {
@@ -2880,13 +2984,17 @@ int main(int argc, char *argv[]) {
                                        argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
                         if (access(p, F_OK) < 0) {
                                 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
+                                r = -EINVAL;
                                 goto finish;
-
                         }
                 }
+
         } else {
                 char template[] = "/tmp/nspawn-root-XXXXXX";
 
+                assert(arg_image);
+                assert(!arg_template);
+
                 if (!mkdtemp(template)) {
                         log_error_errno(errno, "Failed to create temporary directory: %m");
                         r = -errno;
@@ -2916,27 +3024,27 @@ int main(int argc, char *argv[]) {
 
         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
         if (master < 0) {
-                log_error_errno(errno, "Failed to acquire pseudo tty: %m");
+                r = log_error_errno(errno, "Failed to acquire pseudo tty: %m");
                 goto finish;
         }
 
         console = ptsname(master);
         if (!console) {
-                log_error_errno(errno, "Failed to determine tty name: %m");
+                r = log_error_errno(errno, "Failed to determine tty name: %m");
                 goto finish;
         }
 
         if (!arg_quiet)
                 log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.",
-                         arg_machine, arg_image ? arg_image : arg_directory);
+                         arg_machine, arg_image ?: arg_directory);
 
         if (unlockpt(master) < 0) {
-                log_error_errno(errno, "Failed to unlock tty: %m");
+                r = log_error_errno(errno, "Failed to unlock tty: %m");
                 goto finish;
         }
 
         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
-                log_error_errno(errno, "Failed to create kmsg socket pair: %m");
+                r = log_error_errno(errno, "Failed to create kmsg socket pair: %m");
                 goto finish;
         }
 
@@ -2970,13 +3078,13 @@ int main(int argc, char *argv[]) {
                  * give it a chance to call wait() and terminate. */
                 r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL);
                 if (r < 0) {
-                        log_error_errno(errno, "Failed to change the signal mask: %m");
+                        r = log_error_errno(errno, "Failed to change the signal mask: %m");
                         goto finish;
                 }
 
                 r = sigaction(SIGCHLD, &sa, NULL);
                 if (r < 0) {
-                        log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+                        r = log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
                         goto finish;
                 }
 
@@ -2985,11 +3093,10 @@ int main(int argc, char *argv[]) {
                                           (arg_private_network ? CLONE_NEWNET : 0), NULL);
                 if (pid < 0) {
                         if (errno == EINVAL)
-                                log_error_errno(errno, "clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
+                                r = log_error_errno(errno, "clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
                         else
-                                log_error_errno(errno, "clone() failed: %m");
+                                r = log_error_errno(errno, "clone() failed: %m");
 
-                        r = pid;
                         goto finish;
                 }
 
@@ -3028,14 +3135,14 @@ int main(int argc, char *argv[]) {
                         reset_all_signal_handlers();
                         reset_signal_mask();
 
-                        k = open_terminal(console, O_RDWR);
-                        if (k != STDIN_FILENO) {
-                                if (k >= 0) {
-                                        safe_close(k);
-                                        k = -EINVAL;
+                        r = open_terminal(console, O_RDWR);
+                        if (r != STDIN_FILENO) {
+                                if (r >= 0) {
+                                        safe_close(r);
+                                        r = -EINVAL;
                                 }
 
-                                log_error_errno(k, "Failed to open console: %m");
+                                log_error_errno(r, "Failed to open console: %m");
                                 _exit(EXIT_FAILURE);
                         }
 
@@ -3090,9 +3197,9 @@ int main(int argc, char *argv[]) {
                                 _exit(EXIT_FAILURE);
 
                         if (arg_read_only) {
-                                k = bind_remount_recursive(arg_directory, true);
-                                if (k < 0) {
-                                        log_error_errno(k, "Failed to make tree read-only: %m");
+                                r = bind_remount_recursive(arg_directory, true);
+                                if (r < 0) {
+                                        log_error_errno(r, "Failed to make tree read-only: %m");
                                         _exit(EXIT_FAILURE);
                                 }
                         }
@@ -3196,9 +3303,9 @@ int main(int argc, char *argv[]) {
                         }
 
                         if (fdset_size(fds) > 0) {
-                                k = fdset_cloexec(fds, false);
-                                if (k < 0) {
-                                        log_error("Failed to unset O_CLOEXEC for file descriptors.");
+                                r = fdset_cloexec(fds, false);
+                                if (r < 0) {
+                                        log_error_errno(r, "Failed to unset O_CLOEXEC for file descriptors.");
                                         _exit(EXIT_FAILURE);
                                 }
 
@@ -3350,8 +3457,10 @@ int main(int argc, char *argv[]) {
                         }
 
                         r = sd_event_loop(event);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to run event loop: %m");
+                        if (r < 0) {
+                                log_error_errno(r, "Failed to run event loop: %m");
+                                goto finish;
+                        }
 
                         forward = pty_forward_free(forward);
 
@@ -3368,16 +3477,17 @@ int main(int argc, char *argv[]) {
                 r = wait_for_container(pid, &container_status);
                 pid = 0;
 
-                if (r < 0) {
+                if (r < 0)
                         /* We failed to wait for the container, or the
                          * container exited abnormally */
-                        r = EXIT_FAILURE;
-                        break;
-                } else if (r > 0 || container_status == CONTAINER_TERMINATED)
+                        goto finish;
+                else if (r > 0 || container_status == CONTAINER_TERMINATED){
                         /* The container exited with a non-zero
                          * status, or with zero status and no reboot
                          * was requested. */
+                        ret = r;
                         break;
+                }
 
                 /* CONTAINER_REBOOTED, loop again */
 
@@ -3392,7 +3502,8 @@ int main(int argc, char *argv[]) {
                          * restart. This is necessary since we might
                          * have cgroup parameters set we want to have
                          * flushed out. */
-                        r = 133;
+                        ret = 133;
+                        r = 0;
                         break;
                 }
         }
@@ -3407,7 +3518,17 @@ finish:
         if (pid > 0)
                 kill(pid, SIGKILL);
 
+        if (remove_subvol && arg_directory) {
+                int k;
+
+                k = btrfs_subvol_remove(arg_directory);
+                if (k < 0)
+                        log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory);
+        }
+
         free(arg_directory);
+        free(arg_template);
+        free(arg_image);
         free(arg_machine);
         free(arg_user);
         strv_free(arg_setenv);
@@ -3417,5 +3538,5 @@ finish:
         strv_free(arg_bind_ro);
         strv_free(arg_tmpfs);
 
-        return r;
+        return r < 0 ? EXIT_FAILURE : ret;
 }
diff --git a/src/shared/copy.c b/src/shared/copy.c
index 4c41f2f..233dbbc 100644
--- a/src/shared/copy.c
+++ b/src/shared/copy.c
@@ -82,7 +82,6 @@ int copy_bytes(int fdf, int fdt, off_t max_bytes) {
                         r = loop_write(fdt, buf, (size_t) n, false);
                         if (r < 0)
                                 return r;
-
                 }
 
         next:

commit 0254b455e9730691e9f90d53afe860a0f3229f6d
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:20:53 2014 +0100

    copy: teach copy_bytes() btrfs reflink magic

diff --git a/src/shared/copy.c b/src/shared/copy.c
index f22a940..4c41f2f 100644
--- a/src/shared/copy.c
+++ b/src/shared/copy.c
@@ -27,10 +27,18 @@
 
 int copy_bytes(int fdf, int fdt, off_t max_bytes) {
         bool try_sendfile = true;
+        int r;
 
         assert(fdf >= 0);
         assert(fdt >= 0);
 
+        /* Try btrfs reflinks first. */
+        if (max_bytes == (off_t) -1) {
+                r = btrfs_reflink(fdf, fdt);
+                if (r >= 0)
+                        return 0;
+        }
+
         for (;;) {
                 size_t m = PIPE_BUF;
                 ssize_t n;
@@ -64,7 +72,6 @@ int copy_bytes(int fdf, int fdt, off_t max_bytes) {
                 /* As a fallback just copy bits by hand */
                 {
                         char buf[m];
-                        int r;
 
                         n = read(fdf, buf, m);
                         if (n < 0)
@@ -72,7 +79,7 @@ int copy_bytes(int fdf, int fdt, off_t max_bytes) {
                         if (n == 0) /* EOF */
                                 break;
 
-                        r = loop_write(fdt, buf, n, false);
+                        r = loop_write(fdt, buf, (size_t) n, false);
                         if (r < 0)
                                 return r;
 

commit f9ac15442e4132f00eca5495d53c17062aae13e0
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:20:34 2014 +0100

    gpt-auto-generator: make use of new btrfs-util.h APIs

diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
index 32e9b78..909fdda 100644
--- a/src/gpt-auto-generator/gpt-auto-generator.c
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -26,10 +26,6 @@
 #include <sys/statfs.h>
 #include <blkid/blkid.h>
 
-#ifdef HAVE_LINUX_BTRFS_H
-#include <linux/btrfs.h>
-#endif
-
 #include "sd-id128.h"
 #include "libudev.h"
 #include "path-util.h"
@@ -45,6 +41,7 @@
 #include "fileio.h"
 #include "efivars.h"
 #include "blkid-util.h"
+#include "btrfs-util.h"
 
 static const char *arg_dest = "/tmp";
 static bool arg_enabled = true;
@@ -572,54 +569,6 @@ static int enumerate_partitions(dev_t devnum) {
         return r;
 }
 
-static int get_btrfs_block_device(const char *path, dev_t *dev) {
-        struct btrfs_ioctl_fs_info_args fsi = {};
-        _cleanup_close_ int fd = -1;
-        uint64_t id;
-
-        assert(path);
-        assert(dev);
-
-        fd = open(path, O_DIRECTORY|O_CLOEXEC);
-        if (fd < 0)
-                return -errno;
-
-        if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
-                return -errno;
-
-        /* We won't do this for btrfs RAID */
-        if (fsi.num_devices != 1)
-                return 0;
-
-        for (id = 1; id <= fsi.max_id; id++) {
-                struct btrfs_ioctl_dev_info_args di = {
-                        .devid = id,
-                };
-                struct stat st;
-
-                if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
-                        if (errno == ENODEV)
-                                continue;
-
-                        return -errno;
-                }
-
-                if (stat((char*) di.path, &st) < 0)
-                        return -errno;
-
-                if (!S_ISBLK(st.st_mode))
-                        return -ENODEV;
-
-                if (major(st.st_rdev) == 0)
-                        return -ENODEV;
-
-                *dev = st.st_rdev;
-                return 1;
-        }
-
-        return -ENODEV;
-}
-
 static int get_block_device(const char *path, dev_t *dev) {
         struct stat st;
         struct statfs sfs;
@@ -639,7 +588,7 @@ static int get_block_device(const char *path, dev_t *dev) {
                 return -errno;
 
         if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
-                return get_btrfs_block_device(path, dev);
+                return btrfs_get_block_device(path, dev);
 
         return 0;
 }

commit d7c7c334f56edab8cfc102b657366277a65738cf
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:15:58 2014 +0100

    shared: add new btrfs-util.[ch] helpers for doing common btrfs operation

diff --git a/.gitignore b/.gitignore
index b0fc12f..dbc56bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,6 +134,7 @@
 /test-async
 /test-barrier
 /test-boot-timestamp
+/test-btrfs
 /test-bus-chat
 /test-bus-cleanup
 /test-bus-creds
diff --git a/Makefile.am b/Makefile.am
index 15e4484..4170749 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -891,7 +891,9 @@ libsystemd_shared_la_SOURCES = \
 	src/shared/memfd-util.h \
 	src/shared/uid-range.c \
 	src/shared/uid-range.h \
-	src/shared/nss-util.h
+	src/shared/nss-util.h \
+	src/shared/btrfs-util.c \
+	src/shared/btrfs-util.h
 
 if HAVE_UTMP
 libsystemd_shared_la_SOURCES += \
@@ -1319,7 +1321,8 @@ manual_tests += \
 	test-install \
 	test-watchdog \
 	test-log \
-	test-ipcrm
+	test-ipcrm \
+	test-btrfs
 
 if HAVE_KMOD
 manual_tests += \
@@ -1756,6 +1759,12 @@ test_ipcrm_LDADD = \
 	libsystemd-shared.la \
 	-lrt
 
+test_btrfs_SOURCES = \
+	src/test/test-btrfs.c
+
+test_btrfs_LDADD = \
+	libsystemd-shared.la
+
 test_rtnl_manual_SOURCES = \
 	src/test/test-rtnl-manual.c
 
diff --git a/src/shared/btrfs-util.c b/src/shared/btrfs-util.c
new file mode 100644
index 0000000..fcf543a
--- /dev/null
+++ b/src/shared/btrfs-util.c
@@ -0,0 +1,294 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+
+#ifdef HAVE_LINUX_BTRFS_H
+#include <linux/btrfs.h>
+#endif
+
+#include "missing.h"
+#include "util.h"
+#include "path-util.h"
+#include "macro.h"
+#include "strv.h"
+#include "copy.h"
+#include "btrfs-util.h"
+
+static int validate_subvolume_name(const char *name) {
+
+        if (!filename_is_valid(name))
+                return -EINVAL;
+
+        if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
+                return -E2BIG;
+
+        return 0;
+}
+
+static int open_parent(const char *path, int flags) {
+        _cleanup_free_ char *parent = NULL;
+        int r, fd;
+
+        assert(path);
+
+        r = path_get_parent(path, &parent);
+        if (r < 0)
+                return r;
+
+        fd = open(parent, flags);
+        if (fd < 0)
+                return -errno;
+
+        return fd;
+}
+
+static int extract_subvolume_name(const char *path, const char **subvolume) {
+        const char *fn;
+        int r;
+
+        assert(path);
+        assert(subvolume);
+
+        fn = basename(path);
+
+        r = validate_subvolume_name(fn);
+        if (r < 0)
+                return r;
+
+        *subvolume = fn;
+        return 0;
+}
+
+int btrfs_is_snapshot(int fd) {
+        struct stat st;
+        struct statfs sfs;
+
+        if (fstatfs(fd, &sfs) < 0)
+                return -errno;
+
+        if (!F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
+                return 0;
+
+        if (fstat(fd, &st) < 0)
+                return -errno;
+
+        /* On btrfs subvolumes always have the inode 256 */
+
+        return S_ISDIR(st.st_mode) && st.st_ino == 256;
+}
+
+int btrfs_subvol_snapshot(const char *old_path, const char *new_path, bool read_only, bool fallback_copy) {
+        struct btrfs_ioctl_vol_args_v2 args = {
+                .flags = read_only ? BTRFS_SUBVOL_RDONLY : 0,
+        };
+        _cleanup_close_ int old_fd = -1, new_fd = -1;
+        const char *subvolume;
+        int r;
+
+        assert(old_path);
+
+        old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (old_fd < 0)
+                return -errno;
+
+        r = btrfs_is_snapshot(old_fd);
+        if (r < 0)
+                return r;
+        if (r == 0) {
+
+                if (fallback_copy) {
+                        r = btrfs_subvol_make(new_path);
+                        if (r < 0)
+                                return r;
+
+                        r = copy_tree_fd(old_fd, new_path, true);
+                        if (r < 0) {
+                                btrfs_subvol_remove(new_path);
+                                return r;
+                        }
+
+                        if (read_only) {
+                                r = btrfs_subvol_read_only(new_path, true);
+                                if (r < 0) {
+                                        btrfs_subvol_remove(new_path);
+                                        return r;
+                                }
+                        }
+
+                        return 0;
+                }
+
+                return -EISDIR;
+        }
+
+        r = extract_subvolume_name(new_path, &subvolume);
+        if (r < 0)
+                return r;
+
+        new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (new_fd < 0)
+                return new_fd;
+
+        strncpy(args.name, subvolume, sizeof(args.name)-1);
+        args.fd = old_fd;
+
+        if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &args) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int btrfs_subvol_make(const char *path) {
+        struct btrfs_ioctl_vol_args args = {};
+        _cleanup_close_ int fd = -1;
+        const char *subvolume;
+        int r;
+
+        assert(path);
+
+        r = extract_subvolume_name(path, &subvolume);
+        if (r < 0)
+                return r;
+
+        fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (fd < 0)
+                return fd;
+
+        strncpy(args.name, subvolume, sizeof(args.name)-1);
+
+        if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int btrfs_subvol_remove(const char *path) {
+        struct btrfs_ioctl_vol_args args = {};
+        _cleanup_close_ int fd = -1;
+        const char *subvolume;
+        int r;
+
+        assert(path);
+
+        r = extract_subvolume_name(path, &subvolume);
+        if (r < 0)
+                return r;
+
+        fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (fd < 0)
+                return fd;
+
+        strncpy(args.name, subvolume, sizeof(args.name)-1);
+
+        if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int btrfs_subvol_read_only(const char *path, bool b) {
+        _cleanup_close_ int fd = -1;
+        uint64_t flags, nflags;
+
+        fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        if (fd < 0)
+                return -errno;
+
+        if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+                return -errno;
+
+        if (b)
+                nflags = flags | BTRFS_SUBVOL_RDONLY;
+        else
+                nflags = flags & ~BTRFS_SUBVOL_RDONLY;
+
+        if (flags == nflags)
+                return 0;
+
+        if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int btrfs_reflink(int infd, int outfd) {
+        int r;
+
+        assert(infd >= 0);
+        assert(outfd >= 0);
+
+        r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
+        if (r < 0)
+                return -errno;
+
+        return 0;
+}
+
+int btrfs_get_block_device(const char *path, dev_t *dev) {
+        struct btrfs_ioctl_fs_info_args fsi = {};
+        _cleanup_close_ int fd = -1;
+        uint64_t id;
+
+        assert(path);
+        assert(dev);
+
+        fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (fd < 0)
+                return -errno;
+
+        if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
+                return -errno;
+
+        /* We won't do this for btrfs RAID */
+        if (fsi.num_devices != 1)
+                return 0;
+
+        for (id = 1; id <= fsi.max_id; id++) {
+                struct btrfs_ioctl_dev_info_args di = {
+                        .devid = id,
+                };
+                struct stat st;
+
+                if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
+                        if (errno == ENODEV)
+                                continue;
+
+                        return -errno;
+                }
+
+                if (stat((char*) di.path, &st) < 0)
+                        return -errno;
+
+                if (!S_ISBLK(st.st_mode))
+                        return -ENODEV;
+
+                if (major(st.st_rdev) == 0)
+                        return -ENODEV;
+
+                *dev = st.st_rdev;
+                return 1;
+        }
+
+        return -ENODEV;
+}
diff --git a/src/shared/btrfs-util.h b/src/shared/btrfs-util.h
new file mode 100644
index 0000000..28dbeb4
--- /dev/null
+++ b/src/shared/btrfs-util.h
@@ -0,0 +1,34 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+int btrfs_is_snapshot(int fd);
+
+int btrfs_subvol_make(const char *path);
+int btrfs_subvol_remove(const char *path);
+int btrfs_subvol_snapshot(const char *old_path, const char *new_path, bool read_only, bool fallback_copy);
+int btrfs_subvol_read_only(const char *path, bool b);
+
+int btrfs_reflink(int infd, int outfd);
+
+int btrfs_get_block_device(const char *path, dev_t *dev);
diff --git a/src/shared/copy.c b/src/shared/copy.c
index b8b1ba1..f22a940 100644
--- a/src/shared/copy.c
+++ b/src/shared/copy.c
@@ -22,6 +22,7 @@
 #include <sys/sendfile.h>
 
 #include "util.h"
+#include "btrfs-util.h"
 #include "copy.h"
 
 int copy_bytes(int fdf, int fdt, off_t max_bytes) {
@@ -187,20 +188,28 @@ static int fd_copy_node(int df, const char *from, const struct stat *st, int dt,
         return r;
 }
 
-static int fd_copy_directory(int df, const char *from, const struct stat *st, int dt, const char *to, dev_t original_device, bool merge) {
+static int fd_copy_directory(
+                int df,
+                const char *from,
+                const struct stat *st,
+                int dt,
+                const char *to,
+                dev_t original_device,
+                bool merge) {
+
         _cleanup_close_ int fdf = -1, fdt = -1;
         _cleanup_closedir_ DIR *d = NULL;
         struct dirent *de;
         bool created;
         int r;
 
-        assert(from);
         assert(st);
         assert(to);
 
-        fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
-        if (fdf < 0)
-                return -errno;
+        if (from)
+                fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+        else
+                fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
 
         d = fdopendir(fdf);
         if (!d)
@@ -287,6 +296,22 @@ int copy_tree(const char *from, const char *to, bool merge) {
                 return -ENOTSUP;
 }
 
+int copy_tree_fd(int dirfd, const char *to, bool merge) {
+
+        struct stat st;
+
+        assert(dirfd >= 0);
+        assert(to);
+
+        if (fstat(dirfd, &st) < 0)
+                return -errno;
+
+        if (!S_ISDIR(st.st_mode))
+                return -ENOTDIR;
+
+        return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, merge);
+}
+
 int copy_file_fd(const char *from, int fdt) {
         _cleanup_close_ int fdf = -1;
 
diff --git a/src/shared/copy.h b/src/shared/copy.h
index 6293211..15faf54 100644
--- a/src/shared/copy.h
+++ b/src/shared/copy.h
@@ -27,4 +27,5 @@
 int copy_file_fd(const char *from, int to);
 int copy_file(const char *from, const char *to, int flags, mode_t mode);
 int copy_tree(const char *from, const char *to, bool merge);
+int copy_tree_fd(int dirfd, const char *to, bool merge);
 int copy_bytes(int fdf, int fdt, off_t max_bytes);
diff --git a/src/test/test-btrfs.c b/src/test/test-btrfs.c
new file mode 100644
index 0000000..7c4cc55
--- /dev/null
+++ b/src/test/test-btrfs.c
@@ -0,0 +1,68 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+
+#include "log.h"
+#include "btrfs-util.h"
+#include "fileio.h"
+
+int main(int argc, char *argv[]) {
+        int r;
+
+        r = btrfs_subvol_make("/xxxtest");
+        if (r < 0)
+                log_error_errno(r, "Failed to make subvolume: %m");
+
+        r = write_string_file("/xxxtest/afile", "ljsadhfljasdkfhlkjdsfha");
+        if (r < 0)
+                log_error_errno(r, "Failed to write file: %m");
+
+        r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest2", false, false);
+        if (r < 0)
+                log_error_errno(r, "Failed to make snapshot: %m");
+
+        r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest3", true, false);
+        if (r < 0)
+                log_error_errno(r, "Failed to make snapshot: %m");
+
+        r = btrfs_subvol_remove("/xxxtest");
+        if (r < 0)
+                log_error_errno(r, "Failed to remove subvolume: %m");
+
+        r = btrfs_subvol_remove("/xxxtest2");
+        if (r < 0)
+                log_error_errno(r, "Failed to remove subvolume: %m");
+
+        r = btrfs_subvol_remove("/xxxtest3");
+        if (r < 0)
+                log_error_errno(r, "Failed to remove subvolume: %m");
+
+        r = btrfs_subvol_snapshot("/etc", "/etc2", true, true);
+        if (r < 0)
+                log_error_errno(r, "Failed to make snapshot: %m");
+
+        r = btrfs_subvol_remove("/etc2");
+        if (r < 0)
+                log_error_errno(r, "Failed to remove subvolume: %m");
+
+        return 0;
+}

commit 700c6087eb58de7af510f747414ea97b07e08b2b
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:13:48 2014 +0100

    shared: missing.h should include btrfs.h, before redefining some of its definitions

diff --git a/src/shared/missing.h b/src/shared/missing.h
index f226197..c547479 100644
--- a/src/shared/missing.h
+++ b/src/shared/missing.h
@@ -44,6 +44,10 @@
 #include <asm/sgidefs.h>
 #endif
 
+#ifdef HAVE_LINUX_BTRFS_H
+#include <linux/btrfs.h>
+#endif
+
 #include "macro.h"
 
 #ifndef RLIMIT_RTTIME

commit 6ce830fa612ca3f3159253c4c37aa9c81e7178ea
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 03:12:58 2014 +0100

    util: minor simplification for loop_write() and loop_read()

diff --git a/src/shared/util.c b/src/shared/util.c
index 273552f..254b563 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -2268,21 +2268,25 @@ ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
                 ssize_t k;
 
                 k = read(fd, p, nbytes);
-                if (k < 0 && errno == EINTR)
-                        continue;
+                if (k < 0) {
+                        if (errno == EINTR)
+                                continue;
 
-                if (k < 0 && errno == EAGAIN && do_poll) {
+                        if (errno == EAGAIN && do_poll) {
 
-                        /* We knowingly ignore any return value here,
-                         * and expect that any error/EOF is reported
-                         * via read() */
+                                /* We knowingly ignore any return value here,
+                                 * and expect that any error/EOF is reported
+                                 * via read() */
 
-                        fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
-                        continue;
+                                fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
+                                continue;
+                        }
+
+                        return n > 0 ? n : -errno;
                 }
 
-                if (k <= 0)
-                        return n > 0 ? n : (k < 0 ? -errno : 0);
+                if (k == 0)
+                        return n;
 
                 p += k;
                 nbytes -= k;
@@ -2294,7 +2298,6 @@ ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
 
 int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
         const uint8_t *p = buf;
-        ssize_t n = 0;
 
         assert(fd >= 0);
         assert(buf);
@@ -2305,26 +2308,27 @@ int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
                 ssize_t k;
 
                 k = write(fd, p, nbytes);
-                if (k < 0 && errno == EINTR)
-                        continue;
+                if (k < 0) {
+                        if (errno == EINTR)
+                                continue;
 
-                if (k < 0 && errno == EAGAIN && do_poll) {
+                        if (errno == EAGAIN && do_poll) {
+                                /* We knowingly ignore any return value here,
+                                 * and expect that any error/EOF is reported
+                                 * via write() */
 
-                        /* We knowingly ignore any return value here,
-                         * and expect that any error/EOF is reported
-                         * via write() */
+                                fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
+                                continue;
+                        }
 
-                        fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
-                        continue;
+                        return -errno;
                 }
 
-                if (k <= 0)
-                        /* We were not done yet, and a write error occured. */
-                        return errno ? -errno : -EIO;
+                if (k == 0) /* Can't really happen */
+                        return -EIO;
 
                 p += k;
                 nbytes -= k;
-                n += k;
         }
 
         return 0;

commit 0c3c42847da2f614f1a3f93c7cc96cd241e17e3a
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:49:40 2014 +0100

    nspawn: properly validate machine names

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0466ddb..e1e1c36 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -369,15 +369,13 @@ static int parse_argv(int argc, char *argv[]) {
                                 free(arg_machine);
                                 arg_machine = NULL;
                         } else {
-
-                                if (!hostname_is_valid(optarg)) {
+                                if (!machine_name_is_valid(optarg)) {
                                         log_error("Invalid machine name: %s", optarg);
                                         return -EINVAL;
                                 }
 
-                                free(arg_machine);
-                                arg_machine = strdup(optarg);
-                                if (!arg_machine)
+                                r = free_and_strdup(&arg_machine, optarg);
+                                if (r < 0)
                                         return log_oom();
 
                                 break;

commit a60e9f7fc81558345c59bf203ace357223f208ef
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:35:39 2014 +0100

    seccomp-util.h: make sure seccomp-util.h can be included alone

diff --git a/src/shared/missing.h b/src/shared/missing.h
index 478988c..f226197 100644
--- a/src/shared/missing.h
+++ b/src/shared/missing.h
@@ -40,12 +40,12 @@
 #include <libaudit.h>
 #endif
 
-#include "macro.h"
-
 #ifdef ARCH_MIPS
 #include <asm/sgidefs.h>
 #endif
 
+#include "macro.h"
+
 #ifndef RLIMIT_RTTIME
 #define RLIMIT_RTTIME 15
 #endif
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 9a51a85..60d9715 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -21,6 +21,7 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <seccomp.h>
 
 const char* seccomp_arch_to_string(uint32_t c);
 int seccomp_arch_from_string(const char *n, uint32_t *ret);

commit db594aef549b1376ea80030bd0966ed872eac01e
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:35:14 2014 +0100

    path-util: no need to check whether p is absolute twice

diff --git a/src/shared/path-util.c b/src/shared/path-util.c
index be03695..b3fe0b8 100644
--- a/src/shared/path-util.c
+++ b/src/shared/path-util.c
@@ -129,7 +129,7 @@ char *path_make_absolute_cwd(const char *p) {
         if (!cwd)
                 return NULL;
 
-        return path_make_absolute(p, cwd);
+        return strjoin(cwd, "/", p, NULL);
 }
 
 int path_make_relative(const char *from_dir, const char *to_path, char **_r) {

commit a2e22d07c65aa7c1174a381589ad76841a04d100
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:34:12 2014 +0100

    udev-builtin-btrfs: properly initialize ioctl struct to zeroes

diff --git a/src/udev/udev-builtin-btrfs.c b/src/udev/udev-builtin-btrfs.c
index 2e2112f..74b2209 100644
--- a/src/udev/udev-builtin-btrfs.c
+++ b/src/udev/udev-builtin-btrfs.c
@@ -24,6 +24,7 @@
 #include <fcntl.h>
 #include <errno.h>
 #include <sys/ioctl.h>
+
 #ifdef HAVE_LINUX_BTRFS_H
 #include <linux/btrfs.h>
 #endif
@@ -32,7 +33,7 @@
 #include "udev.h"
 
 static int builtin_btrfs(struct udev_device *dev, int argc, char *argv[], bool test) {
-        struct  btrfs_ioctl_vol_args args;
+        struct btrfs_ioctl_vol_args args = {};
         _cleanup_close_ int fd = -1;
         int err;
 

commit 257224b0cdbb5ae1ac463a6a6645e033882b967e
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:33:48 2014 +0100

    util: document why we have alloca_align()

diff --git a/src/shared/util.h b/src/shared/util.h
index a15ce95..9a878ca 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -883,6 +883,7 @@ int unlink_noerrno(const char *path);
                 (void *) memset(_new_, 0, _len_);       \
         })
 
+/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */
 #define alloca_align(size, align)                                       \
         ({                                                              \
                 void *_ptr_;                                            \

commit ae6c3cc009a21df4b51851fb8fe3fde0b7d6d8f0
Author: Lennart Poettering <lennart at poettering.net>
Date:   Fri Dec 12 02:32:33 2014 +0100

    util: when using basename() for creating temporary files, verify the resulting name is actually valid
    
    Also, rename filename_is_safe() to filename_is_valid(), since it
    actually does a full validation for what the kernel will accept as file
    name, it's not just a heuristic.

diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 259323b..3fbe680 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -3066,7 +3066,7 @@ int config_parse_runtime_directory(
                 if (!n)
                         return log_oom();
 
-                if (!filename_is_safe(n)) {
+                if (!filename_is_valid(n)) {
                         log_syntax(unit, LOG_ERR, filename, line, EINVAL,
                                    "Runtime directory is not valid, ignoring assignment: %s", rvalue);
                         continue;
diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c
index 970e800..ef45e56 100644
--- a/src/hostname/hostnamed.c
+++ b/src/hostname/hostnamed.c
@@ -552,7 +552,7 @@ static int set_machine_info(Context *c, sd_bus *bus, sd_bus_message *m, int prop
                 /* The icon name might ultimately be used as file
                  * name, so better be safe than sorry */
 
-                if (prop == PROP_ICON_NAME && !filename_is_safe(name))
+                if (prop == PROP_ICON_NAME && !filename_is_valid(name))
                         return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid icon name '%s'", name);
                 if (prop == PROP_PRETTY_HOSTNAME && string_has_cc(name, NULL))
                         return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid pretty host name '%s'", name);
diff --git a/src/journal/coredump.c b/src/journal/coredump.c
index be45a68..8678ec6 100644
--- a/src/journal/coredump.c
+++ b/src/journal/coredump.c
@@ -306,9 +306,9 @@ static int save_external_coredump(
         if (r < 0)
                 return log_error_errno(r, "Failed to determine coredump file name: %m");
 
-        tmp = tempfn_random(fn);
-        if (!tmp)
-                return log_oom();
+        r = tempfn_random(fn, &tmp);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine temporary file name: %m");
 
         mkdir_p_label("/var/lib/systemd/coredump", 0755);
 
@@ -352,9 +352,9 @@ static int save_external_coredump(
                         goto uncompressed;
                 }
 
-                tmp_compressed = tempfn_random(fn_compressed);
-                if (!tmp_compressed) {
-                        log_oom();
+                r = tempfn_random(fn_compressed, &tmp_compressed);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to determine temporary file name for %s: %m", fn_compressed);
                         goto uncompressed;
                 }
 
diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c
index f982696..f701233 100644
--- a/src/journal/journald-native.c
+++ b/src/journal/journald-native.c
@@ -350,7 +350,7 @@ void server_process_native_file(
                         return;
                 }
 
-                if (!filename_is_safe(e)) {
+                if (!filename_is_valid(e)) {
                         log_error("Received file in subdirectory of allowed directories. Refusing.");
                         return;
                 }
diff --git a/src/locale/localed.c b/src/locale/localed.c
index 8d60d0f..0aaa63d 100644
--- a/src/locale/localed.c
+++ b/src/locale/localed.c
@@ -965,8 +965,8 @@ static int method_set_vc_keyboard(sd_bus *bus, sd_bus_message *m, void *userdata
         if (!streq_ptr(keymap, c->vc_keymap) ||
             !streq_ptr(keymap_toggle, c->vc_keymap_toggle)) {
 
-                if ((keymap && (!filename_is_safe(keymap) || !string_is_safe(keymap))) ||
-                    (keymap_toggle && (!filename_is_safe(keymap_toggle) || !string_is_safe(keymap_toggle))))
+                if ((keymap && (!filename_is_valid(keymap) || !string_is_safe(keymap))) ||
+                    (keymap_toggle && (!filename_is_valid(keymap_toggle) || !string_is_safe(keymap_toggle))))
                         return sd_bus_error_set_errnof(error, -EINVAL, "Received invalid keymap data");
 
                 r = bus_verify_polkit_async(m, CAP_SYS_ADMIN, "org.freedesktop.locale1.set-keyboard", interactive, &c->polkit_registry, error);
diff --git a/src/shared/dropin.c b/src/shared/dropin.c
index ac09be9..c5c4f95 100644
--- a/src/shared/dropin.c
+++ b/src/shared/dropin.c
@@ -43,7 +43,7 @@ int drop_in_file(const char *dir, const char *unit, unsigned level,
         if (!b)
                 return -ENOMEM;
 
-        if (!filename_is_safe(b))
+        if (!filename_is_valid(b))
                 return -EINVAL;
 
         p = strjoin(dir, "/", unit, ".d", NULL);
diff --git a/src/shared/locale-util.c b/src/shared/locale-util.c
index 9addb05..61db9a8 100644
--- a/src/shared/locale-util.c
+++ b/src/shared/locale-util.c
@@ -195,7 +195,7 @@ bool locale_is_valid(const char *name) {
         if (!utf8_is_valid(name))
                 return false;
 
-        if (!filename_is_safe(name))
+        if (!filename_is_valid(name))
                 return false;
 
         if (!string_is_safe(name))
diff --git a/src/shared/util.c b/src/shared/util.c
index 6383c0f..273552f 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -4284,15 +4284,15 @@ int fd_wait_for_event(int fd, int event, usec_t t) {
 int fopen_temporary(const char *path, FILE **_f, char **_temp_path) {
         FILE *f;
         char *t;
-        int fd;
+        int r, fd;
 
         assert(path);
         assert(_f);
         assert(_temp_path);
 
-        t = tempfn_xxxxxx(path);
-        if (!t)
-                return -ENOMEM;
+        r = tempfn_xxxxxx(path, &t);
+        if (r < 0)
+                return r;
 
         fd = mkostemp_safe(t, O_WRONLY|O_CLOEXEC);
         if (fd < 0) {
@@ -4403,13 +4403,14 @@ int vt_disallocate(const char *name) {
 
 int symlink_atomic(const char *from, const char *to) {
         _cleanup_free_ char *t = NULL;
+        int r;
 
         assert(from);
         assert(to);
 
-        t = tempfn_random(to);
-        if (!t)
-                return -ENOMEM;
+        r = tempfn_random(to, &t);
+        if (r < 0)
+                return r;
 
         if (symlink(from, t) < 0)
                 return -errno;
@@ -4424,12 +4425,13 @@ int symlink_atomic(const char *from, const char *to) {
 
 int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
         _cleanup_free_ char *t = NULL;
+        int r;
 
         assert(path);
 
-        t = tempfn_random(path);
-        if (!t)
-                return -ENOMEM;
+        r = tempfn_random(path, &t);
+        if (r < 0)
+                return r;
 
         if (mknod(t, mode, dev) < 0)
                 return -errno;
@@ -4444,12 +4446,13 @@ int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
 
 int mkfifo_atomic(const char *path, mode_t mode) {
         _cleanup_free_ char *t = NULL;
+        int r;
 
         assert(path);
 
-        t = tempfn_random(path);
-        if (!t)
-                return -ENOMEM;
+        r = tempfn_random(path, &t);
+        if (r < 0)
+                return r;
 
         if (mkfifo(t, mode) < 0)
                 return -errno;
@@ -5561,7 +5564,7 @@ int get_shell(char **_s) {
         return 0;
 }
 
-bool filename_is_safe(const char *p) {
+bool filename_is_valid(const char *p) {
 
         if (isempty(p))
                 return false;
@@ -6963,42 +6966,45 @@ int fflush_and_check(FILE *f) {
         return 0;
 }
 
-char *tempfn_xxxxxx(const char *p) {
+int tempfn_xxxxxx(const char *p, char **ret) {
         const char *fn;
         char *t;
-        size_t k;
 
         assert(p);
+        assert(ret);
+
+        fn = basename(p);
+        if (!filename_is_valid(fn))
+                return -EINVAL;
 
         t = new(char, strlen(p) + 1 + 6 + 1);
         if (!t)
-                return NULL;
-
-        fn = basename(p);
-        k = fn - p;
+                return -ENOMEM;
 
-        strcpy(stpcpy(stpcpy(mempcpy(t, p, k), "."), fn), "XXXXXX");
+        strcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), "."), fn), "XXXXXX");
 
-        return t;
+        *ret = t;
+        return 0;
 }
 
-char *tempfn_random(const char *p) {
+int tempfn_random(const char *p, char **ret) {
         const char *fn;
         char *t, *x;
         uint64_t u;
-        size_t k;
         unsigned i;
 
         assert(p);
+        assert(ret);
+
+        fn = basename(p);
+        if (!filename_is_valid(fn))
+                return -EINVAL;
 
         t = new(char, strlen(p) + 1 + 16 + 1);
         if (!t)
-                return NULL;
-
-        fn = basename(p);
-        k = fn - p;
+                return -ENOMEM;
 
-        x = stpcpy(stpcpy(mempcpy(t, p, k), "."), fn);
+        x = stpcpy(stpcpy(mempcpy(t, p, fn - p), "."), fn);
 
         u = random_u64();
         for (i = 0; i < 16; i++) {
@@ -7008,7 +7014,8 @@ char *tempfn_random(const char *p) {
 
         *x = 0;
 
-        return t;
+        *ret = t;
+        return 0;
 }
 
 /* make sure the hostname is not "localhost" */
diff --git a/src/shared/util.h b/src/shared/util.h
index 73bd901..a15ce95 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -714,7 +714,7 @@ _alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t a, size_
         return memdup(p, a * b);
 }
 
-bool filename_is_safe(const char *p) _pure_;
+bool filename_is_valid(const char *p) _pure_;
 bool path_is_safe(const char *p) _pure_;
 bool string_is_safe(const char *p) _pure_;
 bool string_has_cc(const char *p, const char *ok) _pure_;
@@ -1015,8 +1015,8 @@ int bind_remount_recursive(const char *prefix, bool ro);
 
 int fflush_and_check(FILE *f);
 
-char *tempfn_xxxxxx(const char *p);
-char *tempfn_random(const char *p);
+int tempfn_xxxxxx(const char *p, char **ret);
+int tempfn_random(const char *p, char **ret);
 
 bool is_localhost(const char *hostname);
 
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 2ceb303..5ed430c 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -5730,16 +5730,16 @@ static int unit_file_find_path(LookupPaths *lp, const char *unit_name, char **un
 }
 
 static int create_edit_temp_file(const char *new_path, const char *original_path, char **ret_tmp_fn) {
-        int r;
         char *t;
+        int r;
 
         assert(new_path);
         assert(original_path);
         assert(ret_tmp_fn);
 
-        t = tempfn_random(new_path);
-        if (!t)
-                return log_oom();
+        r = tempfn_random(new_path, &t);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine temporary filename for %s: %m", new_path);
 
         r = mkdir_parents(new_path, 0755);
         if (r < 0) {
diff --git a/src/test/test-util.c b/src/test/test-util.c
index fe54586..6c7d77b 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -802,24 +802,24 @@ static void test_foreach_string(void) {
                 assert_se(streq(x, "zzz"));
 }
 
-static void test_filename_is_safe(void) {
+static void test_filename_is_valid(void) {
         char foo[FILENAME_MAX+2];
         int i;
 
-        assert_se(!filename_is_safe(""));
-        assert_se(!filename_is_safe("/bar/foo"));
-        assert_se(!filename_is_safe("/"));
-        assert_se(!filename_is_safe("."));
-        assert_se(!filename_is_safe(".."));
+        assert_se(!filename_is_valid(""));
+        assert_se(!filename_is_valid("/bar/foo"));
+        assert_se(!filename_is_valid("/"));
+        assert_se(!filename_is_valid("."));
+        assert_se(!filename_is_valid(".."));
 
         for (i=0; i<FILENAME_MAX+1; i++)
                 foo[i] = 'a';
         foo[FILENAME_MAX+1] = '\0';
 
-        assert_se(!filename_is_safe(foo));
+        assert_se(!filename_is_valid(foo));
 
-        assert_se(filename_is_safe("foo_bar-333"));
-        assert_se(filename_is_safe("o.o"));
+        assert_se(filename_is_valid("foo_bar-333"));
+        assert_se(filename_is_valid("o.o"));
 }
 
 static void test_string_has_cc(void) {
@@ -1360,7 +1360,7 @@ int main(int argc, char *argv[]) {
         test_hexdump();
         test_log2i();
         test_foreach_string();
-        test_filename_is_safe();
+        test_filename_is_valid();
         test_string_has_cc();
         test_ascii_strlower();
         test_files_same();



More information about the systemd-commits mailing list