[systemd-devel] [PATCH 8/8] shutdown: add kexec loading, avoid calling `kexec` binary unnessecarily

Shawn Landden shawn at churchofgit.com
Fri Feb 20 14:31:05 PST 2015


Still use helper when Xen Dom0, to avoid duplicating some hairy code.
---
 Makefile.am               |   4 +-
 TODO                      |   3 -
 src/power/shutdown.c      |  28 ++++---
 src/shared/missing.h      |  11 +++
 src/shared/strv.c         |   9 ++-
 src/systemctl/bootspec.c  | 195 ++++++++++++++++++++++++++++++++++++++++++++++
 src/systemctl/bootspec.h  |  54 +++++++++++++
 src/systemctl/systemctl.c |  33 +++++++-
 8 files changed, 321 insertions(+), 16 deletions(-)
 create mode 100644 src/systemctl/bootspec.c
 create mode 100644 src/systemctl/bootspec.h

diff --git a/Makefile.am b/Makefile.am
index 1310a20..0c5df47 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2736,7 +2736,9 @@ systemd_escape_LDADD = \
 
 # -----------------------------------------------------------------------------
 systemctl_SOURCES = \
-	src/systemctl/systemctl.c
+	src/systemctl/systemctl.c \
+	src/systemctl/bootspec.c \
+	src/systemctl/bootspec.h
 
 systemctl_LDADD = \
 	libsystemd-units.la \
diff --git a/TODO b/TODO
index bf66ba1..b2b6cb3 100644
--- a/TODO
+++ b/TODO
@@ -69,9 +69,6 @@ Features:
 * maybe introduce WantsMountsFor=? Usecase:
   http://lists.freedesktop.org/archives/systemd-devel/2015-January/027729.html
 
-* rework kexec logic to use new kexec_file_load() syscall, so that we
-  don't have to call kexec tool anymore.
-
 * The udev blkid built-in should expose a property that reflects
   whether media was sensed in USB CF/SD card readers. This should then
   be used to control SYSTEMD_READY=1/0 so that USB card readers aren't
diff --git a/src/power/shutdown.c b/src/power/shutdown.c
index ffd12b8..3044fbc 100644
--- a/src/power/shutdown.c
+++ b/src/power/shutdown.c
@@ -19,6 +19,7 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <ctype.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/reboot.h>
@@ -187,9 +188,13 @@ int main(int argc, char *argv[]) {
                 cmd = RB_POWER_OFF;
         else if (streq(arg_verb, "halt"))
                 cmd = RB_HALT_SYSTEM;
-        else if (streq(arg_verb, "kexec"))
-                cmd = LINUX_REBOOT_CMD_KEXEC;
-        else {
+        else if (streq(arg_verb, "kexec")) {
+                if (in_container) {
+                        log_warning("Can't kexec from container. Rebooting…");
+                        cmd = RB_AUTOBOOT;
+                } else
+                        cmd = LINUX_REBOOT_CMD_KEXEC;
+        } else {
                 r = -EINVAL;
                 log_error("Unknown action '%s'.", arg_verb);
                 goto error;
@@ -208,8 +213,6 @@ int main(int argc, char *argv[]) {
         log_info("Sending SIGKILL to remaining processes...");
         broadcast_signal(SIGKILL, true, false);
 
-        in_container = detect_container(NULL) > 0;
-
         need_umount = !in_container;
         need_swapoff = !in_container;
         need_loop_detach = !in_container;
@@ -349,11 +352,14 @@ int main(int argc, char *argv[]) {
 
         case LINUX_REBOOT_CMD_KEXEC:
 
-                if (!in_container) {
-                        /* We cheat and exec kexec to avoid doing all its work */
-                        pid_t pid;
+                log_info("Rebooting with kexec.");
 
-                        log_info("Rebooting with kexec.");
+                /* kexec-tools has a bunch of special code to make Xen Dom0 work,
+                 * otherwise it is only doing stuff we have already done.
+                 * This is true for Dom0 and DomU but we only get Dom0
+                 * because of the !in_container check */
+                if (access("/proc/xen", F_OK) == 0) {
+                        pid_t pid;
 
                         pid = fork();
                         if (pid < 0)
@@ -370,7 +376,9 @@ int main(int argc, char *argv[]) {
                                 _exit(EXIT_FAILURE);
                         } else
                                 wait_for_terminate_and_warn("kexec", pid, true);
-                }
+
+                } else
+                        reboot(cmd);
 
                 cmd = RB_AUTOBOOT;
                 /* Fall through */
diff --git a/src/shared/missing.h b/src/shared/missing.h
index b33a70c..d80b2a7 100644
--- a/src/shared/missing.h
+++ b/src/shared/missing.h
@@ -35,6 +35,7 @@
 #include <linux/loop.h>
 #include <linux/audit.h>
 #include <linux/capability.h>
+#include <linux/kexec.h>
 
 #ifdef HAVE_AUDIT
 #include <libaudit.h>
@@ -763,3 +764,13 @@ static inline int kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, uns
 #ifndef KCMP_FILE
 #define KCMP_FILE 0
 #endif
+
+/* v3.17 */
+#ifndef __NR_kexec_file_load
+#ifdef __x86_64__
+#define __NR_kexec_file_load 320
+#endif
+#endif
+#ifndef KEXEC_FILE_NO_INITRAMFS
+#define KEXEC_FILE_NO_INITRAMFS        0x00000004
+#endif
diff --git a/src/shared/strv.c b/src/shared/strv.c
index e27ac68..d983665 100644
--- a/src/shared/strv.c
+++ b/src/shared/strv.c
@@ -82,7 +82,14 @@ void strv_clear(char **l) {
 }
 
 void strv_free(char **l) {
-        strv_clear(l);
+        char **k;
+
+        if (!l)
+                return;
+
+        for (k = l; *k; k++)
+                free(*k);
+
         free(l);
 }
 
diff --git a/src/systemctl/bootspec.c b/src/systemctl/bootspec.c
new file mode 100644
index 0000000..8673957
--- /dev/null
+++ b/src/systemctl/bootspec.c
@@ -0,0 +1,195 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2015 Shawn Landden
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * Implements http://freedesktop.org/wiki/Specifications/BootLoaderSpec/
+ * for use with kexec
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <sys/utsname.h>
+#include <linux/kexec.h>
+
+#include "bootspec.h"
+#include "strv.h"
+#include "fileio.h"
+#include "rpmvercmp.h"
+
+void bootspec_free(struct BootSpec *s) {
+        if (!s)
+                return;
+
+        free(s->conf);
+        free(s);
+}
+
+static int bootspec_cmp(const struct rbtree_node *left, const struct rbtree_node *right) {
+        struct BootSpec *l = rbtree_container_of( left, struct BootSpec, node),
+                        *r = rbtree_container_of(right, struct BootSpec, node);
+
+        return rpmvercmp(l->version, r->version);
+}
+
+int kernel_bootloaderspec_readconf(struct rbtree *tree) {
+        int r = 0;
+        _cleanup_closedir_ DIR *entries;
+        struct dirent *dir;
+
+        rbtree_init(tree, bootspec_cmp, 0);
+
+        entries = opendir(BOOTENTRIESDIR);
+        if (!entries) {
+                if (r == -ENOENT)
+                        return 0;
+                else
+                        return -errno;
+        }
+
+        for (size_t i=0;(dir = readdir(entries));i++) {
+                struct BootSpec *entry;
+                /*   compiler wont allow 256 + strlen(BOOTENTRIESDIR) here */
+                char fn[512] = {BOOTENTRIESDIR}, *d_name = &fn[strlen(BOOTENTRIESDIR)],
+                     *m, *l, *k;
+
+                if (!endswith(dir->d_name, ".conf")) {
+                        i--;
+                        continue;
+                }
+
+                entry = new0(struct BootSpec, 1);
+                if (!entry)
+                        return -ENOMEM;
+
+                (void)strcpy(d_name, dir->d_name);
+
+                r = read_full_file(fn, &entry->conf, NULL);
+                if (r < 0)
+                        return -errno;
+
+                for (m = entry->conf; ; m = k + 1) {
+                        if (m[0] == '#')
+                                continue;
+
+                        k = strchr(m, '\n');
+
+                        if (k)
+                                *k = '\0';
+                        else
+                                break;
+
+                        if      ((l = startswith(m, "title ")))
+                                entry->title      = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "version ")))
+                                entry->version    = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "machine-id ")))
+                                (void)sd_id128_from_string(l + strspn(l, WHITESPACE), &entry->machine_id);
+                        else if ((l = startswith(m, "options ")))
+                                entry->options    = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "linux ")))
+                                entry->linux_loc  = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "initrd ")))
+                                entry->initrd     = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "efi ")))
+                                entry->efi        = l + strspn(l, WHITESPACE);
+                        else if ((l = startswith(m, "devicetree ")))
+                                entry->devicetree = l + strspn(l, WHITESPACE);
+                        else
+                                continue;
+                }
+
+                /* not interested in EFI programs */
+                if (!entry->linux_loc) {
+                        i--;
+                        continue;
+                }
+
+                if (&entry->node != rbtree_insert(&entry->node, tree)) {
+                        /* already an entry with the same version string */
+                }
+        }
+
+        return r;
+}
+
+int kernel_load(bool overwrite) {
+        int r = -ENOTSUP;
+
+/* only x86_64 and x32 in 3.18 */
+#ifdef __NR_kexec_file_load
+                if (!overwrite && !kexec_loaded()) {
+                        struct utsname u;
+                        char vmlinuz[PATH_MAX+1], initrd[PATH_MAX+1], *cmdline = NULL;
+                        _cleanup_close_ int vmlinuz_fd = -1, initrd_fd = -1;
+                        struct rbtree tree;
+                        struct BootSpec *c;
+                        struct rbtree_node *n;
+                        int flags = 0;
+
+                        r = uname(&u);
+                        if (r < 0)
+                                return -errno;
+
+                        r = kernel_bootloaderspec_readconf(&tree);
+                        if (r < 0)
+                                return r;
+
+                        n = rbtree_first(&tree);
+                        if (!n)
+                                return -ENOENT;
+                        c = rbtree_container_of(n, struct BootSpec, node);
+
+                        strcpy(vmlinuz, BOOTSPECDIR);
+                        strncat(vmlinuz, c->linux_loc, PATH_MAX - strlen(vmlinuz));
+                        cmdline = c->options;
+
+                        vmlinuz_fd = open(vmlinuz, O_RDONLY);
+                        if (vmlinuz_fd < 0)
+                                return -errno;
+
+                        if (c->initrd) {
+                                strcpy(initrd, BOOTSPECDIR);
+                                strncat(initrd,  c->initrd,    PATH_MAX - strlen(initrd));
+                                initrd_fd  = open(initrd,  O_RDONLY);
+                                if (initrd_fd < 0)
+                                        return -errno;
+                        } else {
+                                flags |= KEXEC_FILE_NO_INITRAMFS;
+                                initrd_fd = -1;
+                        }
+
+                        log_info("kexec: kexec -l %s --initrd=%s --command-line=%s", vmlinuz, initrd, cmdline);
+
+                        r = syscall(__NR_kexec_file_load, vmlinuz_fd, initrd_fd, cmdline, strlen(cmdline), flags);
+                        if (r < 0)
+                                return -errno;
+
+                        /* free tree */
+                        c = NULL;
+                        for (n = rbtree_first(&tree); n; n = rbtree_next(&c->node)) {
+                                bootspec_free(c);
+                                c = rbtree_container_of(n, struct BootSpec, node);
+                        }
+                } else
+                        r = 0;
+#endif
+        return r;
+}
diff --git a/src/systemctl/bootspec.h b/src/systemctl/bootspec.h
new file mode 100644
index 0000000..19e2901
--- /dev/null
+++ b/src/systemctl/bootspec.h
@@ -0,0 +1,54 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2015 Shawn Landden
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+#include <errno.h>
+
+#include "sd-id128.h"
+#include "rbtree.h"
+
+#define BOOTSPECDIR "/boot/loader/"
+#define BOOTENTRIESDIR BOOTSPECDIR"entries/"
+
+struct BootSpec {
+        /* The others are just pointers into malloc()ed conf */
+        char *conf;
+
+        struct rbtree_node node;
+
+        char *title;
+        char *version;
+        sd_id128_t machine_id;
+        char *options;
+        char *linux_loc; /* linux is a reserved keyword with gcc! (and clang!)
+                            https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65128 */
+        char *initrd;
+        char *efi;
+        char *devicetree;
+};
+
+void bootspec_free(struct BootSpec *l);
+
+/* returns a populated rbtree of BootSpec, which has been sorted via treesort */
+int kernel_bootloaderspec_readconf(struct rbtree *tree);
+int kernel_load(bool overwrite);
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 9b08313..e1f7af9 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -74,6 +74,7 @@
 #include "bus-common-errors.h"
 #include "mkdir.h"
 #include "dropin.h"
+#include "bootspec.h"
 
 static char **arg_types = NULL;
 static char **arg_states = NULL;
@@ -228,6 +229,29 @@ static int translate_bus_error_to_exit_status(int r, const sd_bus_error *error)
         return EXIT_FAILURE;
 }
 
+static int list_kernels(sd_bus *bus, char **args) {
+        int r;
+        struct rbtree tree;
+        struct BootSpec *b = NULL;
+        struct rbtree_node *n;
+
+        r = kernel_bootloaderspec_readconf(&tree);
+        if (r < 0) {
+                log_error_errno(r, "Failed to enumerate boot entries: %m");
+                return r;
+        }
+
+        for (n = rbtree_first(&tree); n; n = rbtree_next(&b->node)) {
+                /* free prev (or NULL) */
+                bootspec_free(b);
+
+                b = rbtree_container_of(n, struct BootSpec, node);
+                printf("%s - %s\n", b->version, b->title);
+        }
+
+        return 0;
+}
+
 static void warn_wall(enum action a) {
         static const char *table[_ACTION_MAX] = {
                 [ACTION_HALT]            = "The system is going down for system halt NOW!",
@@ -6960,6 +6984,7 @@ static int systemctl_main(sd_bus *bus, int argc, char *argv[], int bus_error) {
                 } bus;
         } verbs[] = {
                 { "list-units",            MORE,  0, list_units        },
+                { "list-kernels",          EQUAL, 1, list_kernels      },
                 { "list-unit-files",       MORE,  1, list_unit_files,  NOBUS },
                 { "list-sockets",          MORE,  1, list_sockets      },
                 { "list-timers",           MORE,  1, list_timers       },
@@ -7371,10 +7396,16 @@ int main(int argc, char*argv[]) {
                 r = systemctl_main(bus, argc, argv, r);
                 break;
 
+        case ACTION_KEXEC:
+                r = kernel_load(false);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to load kernel: %m");
+                        goto finish;
+                }
+                /* fall through */
         case ACTION_HALT:
         case ACTION_POWEROFF:
         case ACTION_REBOOT:
-        case ACTION_KEXEC:
                 r = halt_main(bus);
                 break;
 
-- 
2.2.1.209.g41e5f3a



More information about the systemd-devel mailing list