[systemd-devel] [v1] shutdown: add kexec loading, avoid calling `kexec` binary unnessecarily
Zbigniew Jędrzejewski-Szmek
zbyszek at in.waw.pl
Thu Feb 26 18:14:57 PST 2015
On Fri, Feb 20, 2015 at 07:16:36PM -0800, Shawn Landden wrote:
> Still use helper when Xen Dom0, to avoid duplicating some hairy code.
>
> Future: generate BootLoaderSpec files for other kernel install locations
>
> v2: support specifying the kernel version
> support appending to the kernel cmdline
> some docs
> support double force with kexec
> allow rpmvercmp() to compare against NULL
> ---
> Makefile.am | 4 +-
> TODO | 3 -
> man/systemctl.xml | 14 +++-
> src/power/shutdown.c | 28 ++++---
> src/shared/missing.h | 11 +++
> src/shared/rpmvercmp.c | 12 ++-
> src/shared/strv.c | 9 +-
> src/systemctl/bootspec.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++
> src/systemctl/bootspec.h | 54 ++++++++++++
> src/systemctl/systemctl.c | 59 +++++++++++++-
> 10 files changed, 376 insertions(+), 22 deletions(-)
> create mode 100644 src/systemctl/bootspec.c
> create mode 100644 src/systemctl/bootspec.h
>
> diff --git a/Makefile.am b/Makefile.am
> index 1310a20..0c5df47 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -2736,7 +2736,9 @@ systemd_escape_LDADD = \
>
> # -----------------------------------------------------------------------------
> systemctl_SOURCES = \
> - src/systemctl/systemctl.c
> + src/systemctl/systemctl.c \
> + src/systemctl/bootspec.c \
> + src/systemctl/bootspec.h
>
> systemctl_LDADD = \
> libsystemd-units.la \
> diff --git a/TODO b/TODO
> index bf66ba1..b2b6cb3 100644
> --- a/TODO
> +++ b/TODO
> @@ -69,9 +69,6 @@ Features:
> * maybe introduce WantsMountsFor=? Usecase:
> http://lists.freedesktop.org/archives/systemd-devel/2015-January/027729.html
>
> -* rework kexec logic to use new kexec_file_load() syscall, so that we
> - don't have to call kexec tool anymore.
> -
> * The udev blkid built-in should expose a property that reflects
> whether media was sensed in USB CF/SD card readers. This should then
> be used to control SYSTEMD_READY=1/0 so that USB card readers aren't
> diff --git a/man/systemctl.xml b/man/systemctl.xml
> index 338c1d3..fc33d95 100644
> --- a/man/systemctl.xml
> +++ b/man/systemctl.xml
> @@ -607,6 +607,15 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
> </varlistentry>
>
> <varlistentry>
> + <term><command>list-kernels</command></term>
> +
> + <listitem>
> + <para>List kernels ordered by version.
> + </para>
> + </listitem>
> + </varlistentry>
> +
> + <varlistentry>
> <term><command>start <replaceable>PATTERN</replaceable>...</command></term>
>
> <listitem>
> @@ -1550,7 +1559,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
> </varlistentry>
>
> <varlistentry>
> - <term><command>kexec</command></term>
> + <term><command>kexec <optional><replaceable>VERSION</replaceable></optional><optional><replaceable>CMDLINE</replaceable>...</optional></command></term>
Ellipsis means repetition. So this should be something like KERNEL-ARG instead
of CMDLINE.
> <listitem>
> <para>Shut down and reboot the system via kexec. This is
> @@ -1560,6 +1569,9 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
> services is skipped, however all processes are killed and
> all file systems are unmounted or mounted read-only,
> immediately followed by the reboot.</para>
> +
> + <para>Also allows specifying the version and optionally
> + extra kernel parameters to append.</para>
What happens if the version is not specified? Current kernel, latest kernel?
> </listitem>
> </varlistentry>
>
> diff --git a/src/power/shutdown.c b/src/power/shutdown.c
> index ffd12b8..3044fbc 100644
> --- a/src/power/shutdown.c
> +++ b/src/power/shutdown.c
> @@ -19,6 +19,7 @@
> along with systemd; If not, see <http://www.gnu.org/licenses/>.
> ***/
>
> +#include <ctype.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> #include <sys/reboot.h>
> @@ -187,9 +188,13 @@ int main(int argc, char *argv[]) {
> cmd = RB_POWER_OFF;
> else if (streq(arg_verb, "halt"))
> cmd = RB_HALT_SYSTEM;
> - else if (streq(arg_verb, "kexec"))
> - cmd = LINUX_REBOOT_CMD_KEXEC;
> - else {
> + else if (streq(arg_verb, "kexec")) {
> + if (in_container) {
> + log_warning("Can't kexec from container. Rebooting…");
> + cmd = RB_AUTOBOOT;
> + } else
> + cmd = LINUX_REBOOT_CMD_KEXEC;
> + } else {
> r = -EINVAL;
> log_error("Unknown action '%s'.", arg_verb);
> goto error;
> @@ -208,8 +213,6 @@ int main(int argc, char *argv[]) {
> log_info("Sending SIGKILL to remaining processes...");
> broadcast_signal(SIGKILL, true, false);
>
> - in_container = detect_container(NULL) > 0;
> -
> need_umount = !in_container;
> need_swapoff = !in_container;
> need_loop_detach = !in_container;
> @@ -349,11 +352,14 @@ int main(int argc, char *argv[]) {
>
> case LINUX_REBOOT_CMD_KEXEC:
>
> - if (!in_container) {
> - /* We cheat and exec kexec to avoid doing all its work */
> - pid_t pid;
> + log_info("Rebooting with kexec.");
>
> - log_info("Rebooting with kexec.");
> + /* kexec-tools has a bunch of special code to make Xen Dom0 work,
> + * otherwise it is only doing stuff we have already done.
> + * This is true for Dom0 and DomU but we only get Dom0
> + * because of the !in_container check */
> + if (access("/proc/xen", F_OK) == 0) {
> + pid_t pid;
>
> pid = fork();
> if (pid < 0)
> @@ -370,7 +376,9 @@ int main(int argc, char *argv[]) {
> _exit(EXIT_FAILURE);
> } else
> wait_for_terminate_and_warn("kexec", pid, true);
> - }
> +
> + } else
> + reboot(cmd);
>
> cmd = RB_AUTOBOOT;
> /* Fall through */
> diff --git a/src/shared/missing.h b/src/shared/missing.h
> index b33a70c..d80b2a7 100644
> --- a/src/shared/missing.h
> +++ b/src/shared/missing.h
> @@ -35,6 +35,7 @@
> #include <linux/loop.h>
> #include <linux/audit.h>
> #include <linux/capability.h>
> +#include <linux/kexec.h>
>
> #ifdef HAVE_AUDIT
> #include <libaudit.h>
> @@ -763,3 +764,13 @@ static inline int kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, uns
> #ifndef KCMP_FILE
> #define KCMP_FILE 0
> #endif
> +
> +/* v3.17 */
> +#ifndef __NR_kexec_file_load
> +#ifdef __x86_64__
> +#define __NR_kexec_file_load 320
> +#endif
> +#endif
> +#ifndef KEXEC_FILE_NO_INITRAMFS
> +#define KEXEC_FILE_NO_INITRAMFS 0x00000004
> +#endif
> diff --git a/src/shared/rpmvercmp.c b/src/shared/rpmvercmp.c
> index c69c2e3..09cfd97 100644
> --- a/src/shared/rpmvercmp.c
> +++ b/src/shared/rpmvercmp.c
> @@ -13,8 +13,16 @@
> /* return 1: a is newer than b */
> /* 0: a and b are the same version */
> /* -1: b is newer than a */
> -int rpmvercmp(const char * a, const char * b)
> -{
> +int rpmvercmp(const char * a, const char * b) {
> + if (!a) {
> + if (b)
> + return -1;
> + else
> + return 0;
> + }
> + if (!b)
> + return 1;
> +
> /* easy comparison to see if versions are identical */
> if (streq_ptr(a, b)) return 0;
>
> diff --git a/src/shared/strv.c b/src/shared/strv.c
> index e27ac68..d983665 100644
> --- a/src/shared/strv.c
> +++ b/src/shared/strv.c
> @@ -82,7 +82,14 @@ void strv_clear(char **l) {
> }
>
> void strv_free(char **l) {
> - strv_clear(l);
> + char **k;
> +
> + if (!l)
> + return;
> +
> + for (k = l; *k; k++)
> + free(*k);
> +
> free(l);
> }
>
> diff --git a/src/systemctl/bootspec.c b/src/systemctl/bootspec.c
> new file mode 100644
> index 0000000..0a43c7b
> --- /dev/null
> +++ b/src/systemctl/bootspec.c
> @@ -0,0 +1,204 @@
> +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
> +
> +/***
> + This file is part of systemd.
> +
> + Copyright 2015 Shawn Landden
> +
> + systemd is free software; you can redistribute it and/or modify it
> + under the terms of the GNU Lesser General Public License as published by
> + the Free Software Foundation; either version 2.1 of the License, or
> + (at your option) any later version.
> +
> + systemd is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with systemd; If not, see <http://www.gnu.org/licenses/>.
> +***/
> +
> +/*
> + * Implements http://freedesktop.org/wiki/Specifications/BootLoaderSpec/
> + * for use with kexec
> + */
> +
> +#include <ctype.h>
> +#include <dirent.h>
> +#include <sys/utsname.h>
> +#include <linux/kexec.h>
> +
> +#include "bootspec.h"
> +#include "strv.h"
> +#include "fileio.h"
> +#include "rpmvercmp.h"
> +
> +void bootspec_free(struct BootSpec *s) {
> + if (!s)
> + return;
> +
> + free(s->conf);
> + free(s);
> +}
> +
> +static int bootspec_cmp(const struct rbtree_node *left, const struct rbtree_node *right) {
> + struct BootSpec *l = rbtree_container_of( left, struct BootSpec, node),
> + *r = rbtree_container_of(right, struct BootSpec, node);
> +
> + return rpmvercmp(l->version, r->version);
> +}
> +
> +int kernel_bootloaderspec_readconf(struct rbtree *tree) {
> + int r = 0;
> + _cleanup_closedir_ DIR *entries;
> + struct dirent *dir;
> +
> + rbtree_init(tree, bootspec_cmp, 0);
> +
> + entries = opendir(BOOTENTRIESDIR);
> + if (!entries) {
> + if (r == -ENOENT)
errno, not r.
> + return 0;
> + else
> + return -errno;
> + }
> +
> + for (size_t i=0;(dir = readdir(entries));i++) {
Please use the same style as other readdir loops.
> + struct BootSpec *entry;
> + /* compiler wont allow 256 + strlen(BOOTENTRIESDIR) here */
> + char fn[512] = {BOOTENTRIESDIR}, *d_name = &fn[strlen(BOOTENTRIESDIR)],
Are you trying to open the files in the directory in turn?
Use openat(dirfd(dir), ...) instead.
> + *m, *l, *k;
> +
> + if (!endswith(dir->d_name, ".conf")) {
> + i--;
> + continue;
> + }
> +
> + entry = new0(struct BootSpec, 1);
> + if (!entry)
> + return -ENOMEM;
> +
> + (void)strcpy(d_name, dir->d_name);
> +
> + r = read_full_file(fn, &entry->conf, NULL);
> + if (r < 0)
> + return -errno;
> +
Reading of a single file should be a separate function. We'll want
to add some tests for it later on.
> + for (m = entry->conf; ; m = k + 1) {
> + if (m[0] == '#')
> + continue;
> +
> + k = strchr(m, '\n');
> +
> + if (k)
> + *k = '\0';
> + else
> + break;
> +
> + if ((l = startswith(m, "title ")))
> + entry->title = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "version ")))
> + entry->version = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "machine-id ")))
> + (void)sd_id128_from_string(l + strspn(l, WHITESPACE), &entry->machine_id);
> + else if ((l = startswith(m, "options ")))
> + entry->options = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "linux ")))
> + entry->linux_loc = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "initrd ")))
> + entry->initrd = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "efi ")))
> + entry->efi = l + strspn(l, WHITESPACE);
> + else if ((l = startswith(m, "devicetree ")))
> + entry->devicetree = l + strspn(l, WHITESPACE);
> + else
> + continue;
> + }
> +
> + /* not interested in EFI programs */
> + if (!entry->linux_loc) {
> + i--;
> + continue;
> + }
> +
> + if (&entry->node != rbtree_insert(&entry->node, tree)) {
> + /* already an entry with the same version string */
> + }
> + }
> +
> + return r;
> +}
> +
> +int kernel_load(char *version, char *append_cmdline, bool overwrite) {
> + int r = -ENOTSUP;
> +
> +/* only x86_64 and x32 in 3.18 */
> +#ifdef __NR_kexec_file_load
> + if (!overwrite && !kexec_loaded()) {
> + struct utsname u;
> + char vmlinuz[PATH_MAX+1], initrd[PATH_MAX+1], *cmdline = NULL;
> + _cleanup_close_ int vmlinuz_fd = -1, initrd_fd = -1;
> + struct rbtree tree;
> + struct BootSpec *c;
> + struct rbtree_node *n;
> + int flags = 0;
> +
> + r = uname(&u);
> + if (r < 0)
> + return -errno;
> +
> + r = kernel_bootloaderspec_readconf(&tree);
> + if (r < 0)
> + return r;
> +
> + if (version) {
> + struct BootSpec q;
> +
> + q.version = version;
> + n = rbtree_lookup(&q.node, &tree);
> + } else
> + n = rbtree_first(&tree);
> + if (!n)
> + return -ENOENT;
> + c = rbtree_container_of(n, struct BootSpec, node);
> +
> + strcpy(vmlinuz, BOOTSPECDIR);
> + strncat(vmlinuz, c->linux_loc, PATH_MAX - strlen(vmlinuz));
> + cmdline = strjoina(c->options, " ", append_cmdline);
Why not just use strjoina, here and below?
> + vmlinuz_fd = open(vmlinuz, O_RDONLY);
> + if (vmlinuz_fd < 0)
> + return -errno;
> +
> + if (c->initrd) {
> + strcpy(initrd, BOOTSPECDIR);
> + strncat(initrd, c->initrd, PATH_MAX - strlen(initrd));
> + initrd_fd = open(initrd, O_RDONLY);
> + if (initrd_fd < 0)
> + return -errno;
> + } else {
> + flags |= KEXEC_FILE_NO_INITRAMFS;
> + initrd_fd = -1;
This was already set to -1 above.
> + }
> +
> + if (initrd_fd < 0)
> + log_info("kexec: kexec -l %s --command-line=%s", vmlinuz, cmdline);
> + else
> + log_info("kexec: kexec -l %s --initrd=%s --command-line=%s", vmlinuz, initrd_fd < 0 ? "" : initrd, cmdline);
> +
> + r = syscall(__NR_kexec_file_load, vmlinuz_fd, initrd_fd, cmdline, strlen(cmdline), flags);
Maybe you can do away with flags and say
syscall(__NR_kexec_file_load, vmlinuz_fd, initrd_fd, cmdline, strlen(cmdline),
initrd_fd >= 0 ? KEXEC_FILE_NO_INITRAMFS : 0);
> + if (r < 0)
> + return -errno;
> +
> + /* free tree */
> + c = NULL;
> + for (n = rbtree_first(&tree); n; n = rbtree_next(&c->node)) {
> + bootspec_free(c);
> + c = rbtree_container_of(n, struct BootSpec, node);
> + }
> + } else
> + r = 0;
> +#endif
> + return r;
> +}
> diff --git a/src/systemctl/bootspec.h b/src/systemctl/bootspec.h
> new file mode 100644
> index 0000000..b6a884a
> --- /dev/null
> +++ b/src/systemctl/bootspec.h
> @@ -0,0 +1,54 @@
> +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
> +
> +#pragma once
> +
> +/***
> + This file is part of systemd.
> +
> + Copyright 2015 Shawn Landden
> +
> + systemd is free software; you can redistribute it and/or modify it
> + under the terms of the GNU Lesser General Public License as published by
> + the Free Software Foundation; either version 2.1 of the License, or
> + (at your option) any later version.
> +
> + systemd is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with systemd; If not, see <http://www.gnu.org/licenses/>.
> +***/
> +
> +#include <stdbool.h>
> +#include <errno.h>
> +
> +#include "sd-id128.h"
> +#include "rbtree.h"
> +
> +#define BOOTSPECDIR "/boot/loader/"
> +#define BOOTENTRIESDIR BOOTSPECDIR"entries/"
> +
> +struct BootSpec {
> + /* The others are just pointers into malloc()ed conf */
> + char *conf;
> +
> + struct rbtree_node node;
> +
> + char *title;
> + char *version;
> + sd_id128_t machine_id;
> + char *options;
> + char *linux_loc; /* linux is a reserved keyword with gcc! (and clang!)
> + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65128 */
> + char *initrd;
> + char *efi;
> + char *devicetree;
> +};
> +
> +void bootspec_free(struct BootSpec *l);
> +
> +/* returns a populated rbtree of BootSpec, which has been sorted via treesort */
> +int kernel_bootloaderspec_readconf(struct rbtree *tree);
> +int kernel_load(char *version, char *append_cmdline, bool overwrite);
> diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
> index 9b08313..f469c70 100644
> --- a/src/systemctl/systemctl.c
> +++ b/src/systemctl/systemctl.c
> @@ -74,6 +74,7 @@
> #include "bus-common-errors.h"
> #include "mkdir.h"
> #include "dropin.h"
> +#include "bootspec.h"
>
> static char **arg_types = NULL;
> static char **arg_states = NULL;
> @@ -228,6 +229,29 @@ static int translate_bus_error_to_exit_status(int r, const sd_bus_error *error)
> return EXIT_FAILURE;
> }
>
> +static int list_kernels(sd_bus *bus, char **args) {
> + int r;
> + struct rbtree tree;
> + struct BootSpec *b = NULL;
> + struct rbtree_node *n;
> +
> + r = kernel_bootloaderspec_readconf(&tree);
> + if (r < 0) {
> + log_error_errno(r, "Failed to enumerate boot entries: %m");
> + return r;
> + }
> +
> + for (n = rbtree_first(&tree); n; n = rbtree_next(&b->node)) {
> + /* free prev (or NULL) */
> + bootspec_free(b);
> +
> + b = rbtree_container_of(n, struct BootSpec, node);
> + printf("%s - %s\n", b->version, b->title);
> + }
> +
> + return 0;
> +}
> +
> static void warn_wall(enum action a) {
> static const char *table[_ACTION_MAX] = {
> [ACTION_HALT] = "The system is going down for system halt NOW!",
> @@ -2937,10 +2961,24 @@ static int start_special(sd_bus *bus, char **args) {
> return r;
> }
>
> + if (a == ACTION_KEXEC) {
> + char *cmd_append = NULL;
> +
> + if (args[2])
> + cmd_append = strv_join(&args[2], " ");
> +
> + r = kernel_load(args[1], cmd_append, false);
> + if (r < 0) {
> + log_error_errno(r, "Failed to load kernel: %m");
> + return r;
> + }
> + }
> +
> if (arg_force >= 2 &&
> (a == ACTION_HALT ||
> a == ACTION_POWEROFF ||
> - a == ACTION_REBOOT))
> + a == ACTION_REBOOT ||
> + a == ACTION_KEXEC))
> return halt_now(a);
>
> if (arg_force >= 1 &&
> @@ -6032,6 +6070,7 @@ static void systemctl_help(void) {
> " daemon-reload Reload systemd manager configuration\n"
> " daemon-reexec Reexecute systemd manager\n\n"
> "System Commands:\n"
> + " list-kernels List BootLoaderSpec kernels highest version first\n"
> " is-system-running Check whether system is fully running\n"
> " default Enter system default mode\n"
> " rescue Enter system rescue mode\n"
> @@ -6039,7 +6078,7 @@ static void systemctl_help(void) {
> " halt Shut down and halt the system\n"
> " poweroff Shut down and power-off the system\n"
> " reboot [ARG] Shut down and reboot the system\n"
> - " kexec Shut down and reboot the system with kexec\n"
> + " kexec [VERSION] [CMDLINE...] Shut down and reboot the system with kexec\n"
> " exit Request user instance exit\n"
> " switch-root ROOT [INIT] Change to a different root file system\n"
> " suspend Suspend the system\n"
> @@ -6960,6 +6999,7 @@ static int systemctl_main(sd_bus *bus, int argc, char *argv[], int bus_error) {
> } bus;
> } verbs[] = {
> { "list-units", MORE, 0, list_units },
> + { "list-kernels", EQUAL, 1, list_kernels },
> { "list-unit-files", MORE, 1, list_unit_files, NOBUS },
> { "list-sockets", MORE, 1, list_sockets },
> { "list-timers", MORE, 1, list_timers },
> @@ -6998,7 +7038,7 @@ static int systemctl_main(sd_bus *bus, int argc, char *argv[], int bus_error) {
> { "halt", EQUAL, 1, start_special, FORCE },
> { "poweroff", EQUAL, 1, start_special, FORCE },
> { "reboot", MORE, 1, start_special, FORCE },
> - { "kexec", EQUAL, 1, start_special },
> + { "kexec", MORE, 1, start_special },
> { "suspend", EQUAL, 1, start_special },
> { "hibernate", EQUAL, 1, start_special },
> { "hybrid-sleep", EQUAL, 1, start_special },
> @@ -7211,6 +7251,11 @@ static int halt_now(enum action a) {
> reboot(RB_POWER_OFF);
> return -errno;
>
> + case ACTION_KEXEC:
> + log_info("Rebooting via kexec.");
> + reboot(LINUX_REBOOT_CMD_KEXEC);
> + /* fall through */
> +
> case ACTION_REBOOT: {
> _cleanup_free_ char *param = NULL;
>
> @@ -7371,10 +7416,16 @@ int main(int argc, char*argv[]) {
> r = systemctl_main(bus, argc, argv, r);
> break;
>
> + case ACTION_KEXEC:
> + r = kernel_load(NULL, NULL, false);
> + if (r < 0) {
> + log_error_errno(r, "Failed to load kernel: %m");
> + goto finish;
> + }
> + /* fall through */
> case ACTION_HALT:
> case ACTION_POWEROFF:
> case ACTION_REBOOT:
> - case ACTION_KEXEC:
> r = halt_main(bus);
> break;
>
I'll follow up with some more comments in the other part of the thread.
Zbyszek
More information about the systemd-devel
mailing list