[systemd-devel] [PATCH] shutdown: avoid calling `kexec` binary unnessecarily

Zbigniew Jędrzejewski-Szmek zbyszek at in.waw.pl
Tue Feb 17 07:00:43 PST 2015


On Mon, Feb 16, 2015 at 07:53:47PM -0800, Shawn Landden wrote:
> On Mon, Feb 16, 2015 at 5:08 AM, Lennart Poettering <lennart at poettering.net>
> wrote:
> 
> > On Fri, 13.02.15 14:18, Shawn Landden (shawn at churchofgit.com) wrote:
> >
> > > Still use helper when Xen Dom0, to avoid duplicating some hairy
> > > code.
> >
> > Hmm, what precisely does the helper do on xen?
> >
> > > So we don't have any logic to load kexec kernels?
> >
> > Currently we don't.
> >
> > My hope though was that we can make the whole kexec thing work without
> > having kexec-tools installed. With the new kernel syscall for loading
> > the kernel we should be able to implement this all without any other
> > tools.
> >
> > Ideally we'd not use any external tools anymore, not even in the Xen
> > case, hence I am curious what precisely the special hookup for Xen is
> > here...?
> >
> > Lennart
> >
> >
> https://git.kernel.org/cgit/utils/kernel/kexec/kexec-tools.git/tree/kexec/kexec-xen.c
> 
> I've attached my patch.
> I'm having a problem with kexec_file_load() returning ENOMEM that I havn't
> resolved.
> 
> > --
> > Lennart Poettering, Red Hat
> > _______________________________________________
> > systemd-devel mailing list
> > systemd-devel at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/systemd-devel
> >
> 
> 
> 
> -- 
> Shawn Landden

> From d5446e324143f55e67bc2defe1c78a4ea4201142 Mon Sep 17 00:00:00 2001
> From: Shawn Landden <shawn at churchofgit.com>
> Date: Fri, 13 Feb 2015 13:48:07 -0800
> Subject: [PATCH] shutdown: avoid calling `kexec` binary unnessecarily
> 
> Still use helper when Xen Dom0, to avoid duplicating some hairy code.
> So we don't have any logic to load kexec kernels?
> ---
>  TODO                 |   3 --
>  src/core/shutdown.c  | 121 ++++++++++++++++++++++++++++++++++++++++++++++-----
>  src/shared/missing.h |   7 +++
>  3 files changed, 116 insertions(+), 15 deletions(-)
> 
> diff --git a/TODO b/TODO
> index 255a4f2..d914d2c 100644
> --- a/TODO
> +++ b/TODO
> @@ -90,9 +90,6 @@ Features:
>  * maybe introduce WantsMountsFor=? Usecase:
>    http://lists.freedesktop.org/archives/systemd-devel/2015-January/027729.html
>  
> -* rework kexec logic to use new kexec_file_load() syscall, so that we
> -  don't have to call kexec tool anymore.
> -
>  * The udev blkid built-in should expose a property that reflects
>    whether media was sensed in USB CF/SD card readers. This should then
>    be used to control SYSTEMD_READY=1/0 so that USB card readers aren't
> diff --git a/src/core/shutdown.c b/src/core/shutdown.c
> index 71f001a..14febf3 100644
> --- a/src/core/shutdown.c
> +++ b/src/core/shutdown.c
> @@ -19,6 +19,7 @@
>    along with systemd; If not, see <http://www.gnu.org/licenses/>.
>  ***/
>  
> +#include <ctype.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
>  #include <sys/reboot.h>
> @@ -27,6 +28,7 @@
>  #include <sys/stat.h>
>  #include <sys/mount.h>
>  #include <sys/syscall.h>
> +#include <sys/utsname.h>
>  #include <fcntl.h>
>  #include <dirent.h>
>  #include <errno.h>
> @@ -138,6 +140,35 @@ static int parse_argv(int argc, char *argv[]) {
>          return 0;
>  }
>  
> +/*
> + * Remove parameter from a kernel command line. Helper function for kexec.
> + * (copied from kexec-tools)
> + */
> +static void remove_parameter(char *line, const char *param_name) {
> +        char *start, *end;
> +
> +        start = strstr(line, param_name);
> +
> +        /* parameter not found */
> +        if (!start)
> +                return;
> +
> +        /*
> +         * check if that's really the start of a parameter and not in
> +         * the middle of the word
> +         */
> +        if (start != line && !isspace(*(start-1)))
> +                return;
> +
> +        end = strstr(start, " ");
> +        if (!end)
> +                *start = 0;
> +        else {
> +                memmove(start, end+1, strlen(end));
> +                *(end + strlen(end)) = 0;
> +        }
> +}
> +
>  static int switch_root_initramfs(void) {
>          if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
>                  return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
> @@ -152,6 +183,57 @@ static int switch_root_initramfs(void) {
>          return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
>  }
>  
> +static int kernel_load(bool overwrite) {
> +        int r = -ENOTSUP;
> +
> +/* only x86_64 and x32 in 3.18 */
> +#ifdef __NR_kexec_file_load
> +                /* If uses has no already loaded a kexec kernel assume they
> +                 * want the same one they are currently running. */
> +                if (!overwrite && !kexec_loaded()) {
> +                        struct utsname u;
> +                        _cleanup_free_ char *vmlinuz = NULL, *initrd = NULL, *cmdline = NULL;
> +                        _cleanup_close_ int vmlinuz_fd = -1, initrd_fd = -1;
> +
> +                        r = uname(&u);
> +                        if (r < 0)
> +                                return -errno;
> +
> +                        vmlinuz = malloc(strlen("/boot/vmlinuz-") + strlen(u.release) + 1);
> +                        initrd  = malloc(strlen("/boot/initrd.img-") + strlen(u.release) + 1);
> +                        if (!vmlinuz || !initrd)
> +                                return -ENOMEM;
> +
> +                        r = read_one_line_file("/proc/cmdline", &cmdline);
> +                        if (r < 0)
> +                                return r;
> +
> +                        strcpy(stpcpy(vmlinuz, "/boot/vmlinuz-"),    u.release);
> +                        strcpy(stpcpy(initrd,  "/boot/initrd.img-"), u.release);
Wouldn't a simple strjoina work just as well here?

We also have the boot loader specification. We really need to follow
it here, and look for kernels in any place where we would install them.
So this scheme most likely would have to be more comples.

> +                        if (access(vmlinuz, R_OK) != 0 || access(initrd, R_OK) != 0)
> +                                return -errno;
No need to to do this check, the open()s below work just as well.

> +                        vmlinuz_fd = open(vmlinuz, O_RDONLY);
> +                        if (vmlinuz_fd < 0)
> +                                return -errno;
> +
> +                        initrd_fd  = open(initrd,  O_RDONLY);
> +                        if (initrd_fd < 0)
> +                                return -errno;
> +
> +                        /* clean up cmdline like kexec-tools does */
> +                        remove_parameter(cmdline, "BOOT_IMAGE");
> +
> +                        log_info("kexec: kexec -l %s --initrd=%s --command-line=%s", vmlinuz, initrd, cmdline);
> +
> +                        r = syscall(__NR_kexec_file_load, vmlinuz_fd, initrd_fd, cmdline, strlen(cmdline), 0);
> +                        if (r < 0)
> +                                return -errno;
> +                } else
> +                        r = 0;
> +#endif
> +        return r;
> +}
>  
>  int main(int argc, char *argv[]) {
>          bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
> @@ -175,11 +257,13 @@ int main(int argc, char *argv[]) {
>  
>          umask(0022);
>  
> -        if (getpid() != 1) {
> +        /*if (getpid() != 1) {
>                  log_error("Not executed by init (PID 1).");
>                  r = -EPERM;
>                  goto error;
> -        }
> +        }*/
> +
> +        in_container = detect_container(NULL) > 0;
>  
>          if (streq(arg_verb, "reboot"))
>                  cmd = RB_AUTOBOOT;
> @@ -187,9 +271,19 @@ int main(int argc, char *argv[]) {
>                  cmd = RB_POWER_OFF;
>          else if (streq(arg_verb, "halt"))
>                  cmd = RB_HALT_SYSTEM;
> -        else if (streq(arg_verb, "kexec"))
> -                cmd = LINUX_REBOOT_CMD_KEXEC;
> -        else {
> +        else if (streq(arg_verb, "kexec")) {
> +                if (in_container) {
> +                        log_warning("Can't kexec from container. Rebooting…");
> +                        cmd = RB_AUTOBOOT;
> +                } else {
> +                        r = kernel_load(false);
> +                        if (r < 0) {
> +                                log_warning("Failed to load kernel: %s. Rebooting…", strerror(-r));

I think this should be changed too. I'd very much prefer to get an error if kexec fails.
Maybe we could add a parameter like --always-reboot to reboot as a fallback if the
kenrel cannot be loaded.

> +                                cmd = RB_AUTOBOOT;
> +                        } else
> +                                cmd = LINUX_REBOOT_CMD_KEXEC;
> +                }
> +        } else {
>                  r = -EINVAL;
>                  log_error("Unknown action '%s'.", arg_verb);
>                  goto error;
> @@ -208,8 +302,6 @@ int main(int argc, char *argv[]) {
>          log_info("Sending SIGKILL to remaining processes...");
>          broadcast_signal(SIGKILL, true, false);
>  
> -        in_container = detect_container(NULL) > 0;
> -
>          need_umount = !in_container;
>          need_swapoff = !in_container;
>          need_loop_detach = !in_container;
> @@ -349,11 +441,14 @@ int main(int argc, char *argv[]) {
>  
>          case LINUX_REBOOT_CMD_KEXEC:
>  
> -                if (!in_container) {
> -                        /* We cheat and exec kexec to avoid doing all its work */
> -                        pid_t pid;
> +                log_info("Rebooting with kexec.");
>  
> -                        log_info("Rebooting with kexec.");
> +                /* kexec-tools has a bunch of special code to make Xen Dom0 work,
> +                 * otherwise it is only doing stuff we have already done.
> +                 * This is true for Dom0 and DomU but we only get Dom0
> +                 * because of the !in_container check */
> +                if (access("/proc/xen", F_OK) == 0) {
> +                        pid_t pid;
>  
>                          pid = fork();
>                          if (pid < 0)
> @@ -370,7 +465,9 @@ int main(int argc, char *argv[]) {
>                                  _exit(EXIT_FAILURE);
>                          } else
>                                  wait_for_terminate_and_warn("kexec", pid, true);
> -                }
> +
> +                } else
> +                        reboot(cmd);
>  
>                  cmd = RB_AUTOBOOT;
>                  /* Fall through */
> diff --git a/src/shared/missing.h b/src/shared/missing.h
> index b33a70c..efd650a 100644
> --- a/src/shared/missing.h
> +++ b/src/shared/missing.h
> @@ -763,3 +763,10 @@ static inline int kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, uns
>  #ifndef KCMP_FILE
>  #define KCMP_FILE 0
>  #endif
> +
> +/* v3.17 */
> +#ifndef __NR_kexec_file_load
> +#ifdef __x86_64__
> +#define __NR_kexec_file_load 320
> +#endif
> +#endif
> -- 

Zbyszek


More information about the systemd-devel mailing list