[systemd-devel] [PATCH] shutdown: avoid calling `kexec` binary unnessecarily

Zbigniew Jędrzejewski-Szmek zbyszek at in.waw.pl
Tue Feb 17 17:58:55 PST 2015


On Tue, Feb 17, 2015 at 04:39:46PM -0800, Shawn Landden wrote:
> On Tue, Feb 17, 2015 at 4:34 PM, Shawn Landden <shawn at churchofgit.com>
> wrote:
> 
> > On Tue, Feb 17, 2015 at 7:00 AM, Zbigniew Jędrzejewski-Szmek <
> > zbyszek at in.waw.pl> wrote:
> >
> >> On Mon, Feb 16, 2015 at 07:53:47PM -0800, Shawn Landden wrote:
> >> > On Mon, Feb 16, 2015 at 5:08 AM, Lennart Poettering <
> >> lennart at poettering.net>
> >> > wrote:
> >> >
> >> > > On Fri, 13.02.15 14:18, Shawn Landden (shawn at churchofgit.com) wrote:
> >> > >
> >> > > > Still use helper when Xen Dom0, to avoid duplicating some hairy
> >> > > > code.
> >> > >
> >> > > Hmm, what precisely does the helper do on xen?
> >> > >
> >> > > > So we don't have any logic to load kexec kernels?
> >> > >
> >> > > Currently we don't.
> >> > >
> >> > > My hope though was that we can make the whole kexec thing work without
> >> > > having kexec-tools installed. With the new kernel syscall for loading
> >> > > the kernel we should be able to implement this all without any other
> >> > > tools.
> >> > >
> >> > > Ideally we'd not use any external tools anymore, not even in the Xen
> >> > > case, hence I am curious what precisely the special hookup for Xen is
> >> > > here...?
> >> > >
> >> > > Lennart
> >> > >
> >> > >
> >> >
> >> https://git.kernel.org/cgit/utils/kernel/kexec/kexec-tools.git/tree/kexec/kexec-xen.c
> >> >
> >> > I've attached my patch.
> >> > I'm having a problem with kexec_file_load() returning ENOMEM that I
> >> havn't
> >> > resolved.
> >> >
> >> > > --
> >> > > Lennart Poettering, Red Hat
> >> > > _______________________________________________
> >> > > systemd-devel mailing list
> >> > > systemd-devel at lists.freedesktop.org
> >> > > http://lists.freedesktop.org/mailman/listinfo/systemd-devel
> >> > >
> >> >
> >> >
> >> >
> >> > --
> >> > Shawn Landden
> >>
> >> > From d5446e324143f55e67bc2defe1c78a4ea4201142 Mon Sep 17 00:00:00 2001
> >> > From: Shawn Landden <shawn at churchofgit.com>
> >> > Date: Fri, 13 Feb 2015 13:48:07 -0800
> >> > Subject: [PATCH] shutdown: avoid calling `kexec` binary unnessecarily
> >> >
> >> > Still use helper when Xen Dom0, to avoid duplicating some hairy code.
> >> > So we don't have any logic to load kexec kernels?
> >> > ---
> >> >  TODO                 |   3 --
> >> >  src/core/shutdown.c  | 121
> >> ++++++++++++++++++++++++++++++++++++++++++++++-----
> >> >  src/shared/missing.h |   7 +++
> >> >  3 files changed, 116 insertions(+), 15 deletions(-)
> >> >
> >> > diff --git a/TODO b/TODO
> >> > index 255a4f2..d914d2c 100644
> >> > --- a/TODO
> >> > +++ b/TODO
> >> > @@ -90,9 +90,6 @@ Features:
> >> >  * maybe introduce WantsMountsFor=? Usecase:
> >> >
> >> http://lists.freedesktop.org/archives/systemd-devel/2015-January/027729.html
> >> >
> >> > -* rework kexec logic to use new kexec_file_load() syscall, so that we
> >> > -  don't have to call kexec tool anymore.
> >> > -
> >> >  * The udev blkid built-in should expose a property that reflects
> >> >    whether media was sensed in USB CF/SD card readers. This should then
> >> >    be used to control SYSTEMD_READY=1/0 so that USB card readers aren't
> >> > diff --git a/src/core/shutdown.c b/src/core/shutdown.c
> >> > index 71f001a..14febf3 100644
> >> > --- a/src/core/shutdown.c
> >> > +++ b/src/core/shutdown.c
> >> > @@ -19,6 +19,7 @@
> >> >    along with systemd; If not, see <http://www.gnu.org/licenses/>.
> >> >  ***/
> >> >
> >> > +#include <ctype.h>
> >> >  #include <sys/mman.h>
> >> >  #include <sys/types.h>
> >> >  #include <sys/reboot.h>
> >> > @@ -27,6 +28,7 @@
> >> >  #include <sys/stat.h>
> >> >  #include <sys/mount.h>
> >> >  #include <sys/syscall.h>
> >> > +#include <sys/utsname.h>
> >> >  #include <fcntl.h>
> >> >  #include <dirent.h>
> >> >  #include <errno.h>
> >> > @@ -138,6 +140,35 @@ static int parse_argv(int argc, char *argv[]) {
> >> >          return 0;
> >> >  }
> >> >
> >> > +/*
> >> > + * Remove parameter from a kernel command line. Helper function for
> >> kexec.
> >> > + * (copied from kexec-tools)
> >> > + */
> >> > +static void remove_parameter(char *line, const char *param_name) {
> >> > +        char *start, *end;
> >> > +
> >> > +        start = strstr(line, param_name);
> >> > +
> >> > +        /* parameter not found */
> >> > +        if (!start)
> >> > +                return;
> >> > +
> >> > +        /*
> >> > +         * check if that's really the start of a parameter and not in
> >> > +         * the middle of the word
> >> > +         */
> >> > +        if (start != line && !isspace(*(start-1)))
> >> > +                return;
> >> > +
> >> > +        end = strstr(start, " ");
> >> > +        if (!end)
> >> > +                *start = 0;
> >> > +        else {
> >> > +                memmove(start, end+1, strlen(end));
> >> > +                *(end + strlen(end)) = 0;
> >> > +        }
> >> > +}
> >> > +
> >> >  static int switch_root_initramfs(void) {
> >> >          if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND,
> >> NULL) < 0)
> >> >                  return log_error_errno(errno, "Failed to mount bind
> >> /run/initramfs on /run/initramfs: %m");
> >> > @@ -152,6 +183,57 @@ static int switch_root_initramfs(void) {
> >> >          return switch_root("/run/initramfs", "/oldroot", false,
> >> MS_BIND);
> >> >  }
> >> >
> >> > +static int kernel_load(bool overwrite) {
> >> > +        int r = -ENOTSUP;
> >> > +
> >> > +/* only x86_64 and x32 in 3.18 */
> >> > +#ifdef __NR_kexec_file_load
> >> > +                /* If uses has no already loaded a kexec kernel assume
> >> they
> >> > +                 * want the same one they are currently running. */
> >> > +                if (!overwrite && !kexec_loaded()) {
> >> > +                        struct utsname u;
> >> > +                        _cleanup_free_ char *vmlinuz = NULL, *initrd =
> >> NULL, *cmdline = NULL;
> >> > +                        _cleanup_close_ int vmlinuz_fd = -1, initrd_fd
> >> = -1;
> >> > +
> >> > +                        r = uname(&u);
> >> > +                        if (r < 0)
> >> > +                                return -errno;
> >> > +
> >> > +                        vmlinuz = malloc(strlen("/boot/vmlinuz-") +
> >> strlen(u.release) + 1);
> >> > +                        initrd  = malloc(strlen("/boot/initrd.img-") +
> >> strlen(u.release) + 1);
> >> > +                        if (!vmlinuz || !initrd)
> >> > +                                return -ENOMEM;
> >> > +
> >> > +                        r = read_one_line_file("/proc/cmdline",
> >> &cmdline);
> >> > +                        if (r < 0)
> >> > +                                return r;
> >> > +
> >> > +                        strcpy(stpcpy(vmlinuz, "/boot/vmlinuz-"),
> >> u.release);
> >> > +                        strcpy(stpcpy(initrd,  "/boot/initrd.img-"),
> >> u.release);
> >> Wouldn't a simple strjoina work just as well here?
> >>
> >> We also have the boot loader specification. We really need to follow
> >> it here, and look for kernels in any place where we would install them.
> >> So this scheme most likely would have to be more comples.
> >>
> >> > +                        if (access(vmlinuz, R_OK) != 0 ||
> >> access(initrd, R_OK) != 0)
> >> > +                                return -errno;
> >> No need to to do this check, the open()s below work just as well.
> >>
> >> > +                        vmlinuz_fd = open(vmlinuz, O_RDONLY);
> >> > +                        if (vmlinuz_fd < 0)
> >> > +                                return -errno;
> >> > +
> >> > +                        initrd_fd  = open(initrd,  O_RDONLY);
> >> > +                        if (initrd_fd < 0)
> >> > +                                return -errno;
> >> > +
> >> > +                        /* clean up cmdline like kexec-tools does */
> >> > +                        remove_parameter(cmdline, "BOOT_IMAGE");
> >> > +
> >> > +                        log_info("kexec: kexec -l %s --initrd=%s
> >> --command-line=%s", vmlinuz, initrd, cmdline);
> >> > +
> >> > +                        r = syscall(__NR_kexec_file_load, vmlinuz_fd,
> >> initrd_fd, cmdline, strlen(cmdline), 0);
> >> > +                        if (r < 0)
> >> > +                                return -errno;
> >> > +                } else
> >> > +                        r = 0;
> >> > +#endif
> >> > +        return r;
> >> > +}
> >> >
> >> >  int main(int argc, char *argv[]) {
> >> >          bool need_umount, need_swapoff, need_loop_detach,
> >> need_dm_detach;
> >> > @@ -175,11 +257,13 @@ int main(int argc, char *argv[]) {
> >> >
> >> >          umask(0022);
> >> >
> >> > -        if (getpid() != 1) {
> >> > +        /*if (getpid() != 1) {
> >> >                  log_error("Not executed by init (PID 1).");
> >> >                  r = -EPERM;
> >> >                  goto error;
> >> > -        }
> >> > +        }*/
> >> > +
> >> > +        in_container = detect_container(NULL) > 0;
> >> >
> >> >          if (streq(arg_verb, "reboot"))
> >> >                  cmd = RB_AUTOBOOT;
> >> > @@ -187,9 +271,19 @@ int main(int argc, char *argv[]) {
> >> >                  cmd = RB_POWER_OFF;
> >> >          else if (streq(arg_verb, "halt"))
> >> >                  cmd = RB_HALT_SYSTEM;
> >> > -        else if (streq(arg_verb, "kexec"))
> >> > -                cmd = LINUX_REBOOT_CMD_KEXEC;
> >> > -        else {
> >> > +        else if (streq(arg_verb, "kexec")) {
> >> > +                if (in_container) {
> >> > +                        log_warning("Can't kexec from container.
> >> Rebooting…");
> >> > +                        cmd = RB_AUTOBOOT;
> >> > +                } else {
> >> > +                        r = kernel_load(false);
> >> > +                        if (r < 0) {
> >> > +                                log_warning("Failed to load kernel:
> >> %s. Rebooting…", strerror(-r));
> >>
> >> I think this should be changed too. I'd very much prefer to get an error
> >> if kexec fails.
> >> Maybe we could add a parameter like --always-reboot to reboot as a
> >> fallback if the
> >> kenrel cannot be loaded.
> >>
> >  This is not the user API, that is systemctl. This binary is executed only
> > by pid 1.
> >
> This _IS_ pid 1, after the main pid 1 quits by executing this shutdown
> binary.
So maybe more things need to be modified to achieve what I'd like.
I haven't looked at how all the piecies fit together.

What I'd like to have, is that when I say 'systemctl kexec',
1. a kernel is loaded
2. reboot commences
3. I'm rebooted into that kernel, and if that fails, a normal reboot
   is performed.

In other words, if load_kernel fails, I'd like to get an error message
that tell me to try again with an explicit kernel path or something like
that.

Zbyszek

> 
> >
> >> > +                                cmd = RB_AUTOBOOT;
> >> > +                        } else
> >> > +                                cmd = LINUX_REBOOT_CMD_KEXEC;
> >> > +                }
> >> > +        } else {
> >> >                  r = -EINVAL;
> >> >                  log_error("Unknown action '%s'.", arg_verb);
> >> >                  goto error;
> >> > @@ -208,8 +302,6 @@ int main(int argc, char *argv[]) {
> >> >          log_info("Sending SIGKILL to remaining processes...");
> >> >          broadcast_signal(SIGKILL, true, false);
> >> >
> >> > -        in_container = detect_container(NULL) > 0;
> >> > -
> >> >          need_umount = !in_container;
> >> >          need_swapoff = !in_container;
> >> >          need_loop_detach = !in_container;
> >> > @@ -349,11 +441,14 @@ int main(int argc, char *argv[]) {
> >> >
> >> >          case LINUX_REBOOT_CMD_KEXEC:
> >> >
> >> > -                if (!in_container) {
> >> > -                        /* We cheat and exec kexec to avoid doing all
> >> its work */
> >> > -                        pid_t pid;
> >> > +                log_info("Rebooting with kexec.");
> >> >
> >> > -                        log_info("Rebooting with kexec.");
> >> > +                /* kexec-tools has a bunch of special code to make Xen
> >> Dom0 work,
> >> > +                 * otherwise it is only doing stuff we have already
> >> done.
> >> > +                 * This is true for Dom0 and DomU but we only get Dom0
> >> > +                 * because of the !in_container check */
> >> > +                if (access("/proc/xen", F_OK) == 0) {
> >> > +                        pid_t pid;
> >> >
> >> >                          pid = fork();
> >> >                          if (pid < 0)
> >> > @@ -370,7 +465,9 @@ int main(int argc, char *argv[]) {
> >> >                                  _exit(EXIT_FAILURE);
> >> >                          } else
> >> >                                  wait_for_terminate_and_warn("kexec",
> >> pid, true);
> >> > -                }
> >> > +
> >> > +                } else
> >> > +                        reboot(cmd);
> >> >
> >> >                  cmd = RB_AUTOBOOT;
> >> >                  /* Fall through */
> >> > diff --git a/src/shared/missing.h b/src/shared/missing.h
> >> > index b33a70c..efd650a 100644
> >> > --- a/src/shared/missing.h
> >> > +++ b/src/shared/missing.h
> >> > @@ -763,3 +763,10 @@ static inline int kcmp(pid_t pid1, pid_t pid2, int
> >> type, unsigned long idx1, uns
> >> >  #ifndef KCMP_FILE
> >> >  #define KCMP_FILE 0
> >> >  #endif
> >> > +
> >> > +/* v3.17 */
> >> > +#ifndef __NR_kexec_file_load
> >> > +#ifdef __x86_64__
> >> > +#define __NR_kexec_file_load 320
> >> > +#endif
> >> > +#endif
> >> > --
> >>
> >> Zbyszek
> >> _______________________________________________
> >> systemd-devel mailing list
> >> systemd-devel at lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/systemd-devel
> >>
> >
> >
> >
> > --
> > Shawn Landden
> >
> 
> 
> 
> -- 
> Shawn Landden


More information about the systemd-devel mailing list