[systemd-devel] [PATCH] shutdown: avoid calling `kexec` binary unnessecarily
Shawn Landden
shawn at churchofgit.com
Tue Feb 17 16:39:46 PST 2015
On Tue, Feb 17, 2015 at 4:34 PM, Shawn Landden <shawn at churchofgit.com>
wrote:
> On Tue, Feb 17, 2015 at 7:00 AM, Zbigniew Jędrzejewski-Szmek <
> zbyszek at in.waw.pl> wrote:
>
>> On Mon, Feb 16, 2015 at 07:53:47PM -0800, Shawn Landden wrote:
>> > On Mon, Feb 16, 2015 at 5:08 AM, Lennart Poettering <
>> lennart at poettering.net>
>> > wrote:
>> >
>> > > On Fri, 13.02.15 14:18, Shawn Landden (shawn at churchofgit.com) wrote:
>> > >
>> > > > Still use helper when Xen Dom0, to avoid duplicating some hairy
>> > > > code.
>> > >
>> > > Hmm, what precisely does the helper do on xen?
>> > >
>> > > > So we don't have any logic to load kexec kernels?
>> > >
>> > > Currently we don't.
>> > >
>> > > My hope though was that we can make the whole kexec thing work without
>> > > having kexec-tools installed. With the new kernel syscall for loading
>> > > the kernel we should be able to implement this all without any other
>> > > tools.
>> > >
>> > > Ideally we'd not use any external tools anymore, not even in the Xen
>> > > case, hence I am curious what precisely the special hookup for Xen is
>> > > here...?
>> > >
>> > > Lennart
>> > >
>> > >
>> >
>> https://git.kernel.org/cgit/utils/kernel/kexec/kexec-tools.git/tree/kexec/kexec-xen.c
>> >
>> > I've attached my patch.
>> > I'm having a problem with kexec_file_load() returning ENOMEM that I
>> havn't
>> > resolved.
>> >
>> > > --
>> > > Lennart Poettering, Red Hat
>> > > _______________________________________________
>> > > systemd-devel mailing list
>> > > systemd-devel at lists.freedesktop.org
>> > > http://lists.freedesktop.org/mailman/listinfo/systemd-devel
>> > >
>> >
>> >
>> >
>> > --
>> > Shawn Landden
>>
>> > From d5446e324143f55e67bc2defe1c78a4ea4201142 Mon Sep 17 00:00:00 2001
>> > From: Shawn Landden <shawn at churchofgit.com>
>> > Date: Fri, 13 Feb 2015 13:48:07 -0800
>> > Subject: [PATCH] shutdown: avoid calling `kexec` binary unnessecarily
>> >
>> > Still use helper when Xen Dom0, to avoid duplicating some hairy code.
>> > So we don't have any logic to load kexec kernels?
>> > ---
>> > TODO | 3 --
>> > src/core/shutdown.c | 121
>> ++++++++++++++++++++++++++++++++++++++++++++++-----
>> > src/shared/missing.h | 7 +++
>> > 3 files changed, 116 insertions(+), 15 deletions(-)
>> >
>> > diff --git a/TODO b/TODO
>> > index 255a4f2..d914d2c 100644
>> > --- a/TODO
>> > +++ b/TODO
>> > @@ -90,9 +90,6 @@ Features:
>> > * maybe introduce WantsMountsFor=? Usecase:
>> >
>> http://lists.freedesktop.org/archives/systemd-devel/2015-January/027729.html
>> >
>> > -* rework kexec logic to use new kexec_file_load() syscall, so that we
>> > - don't have to call kexec tool anymore.
>> > -
>> > * The udev blkid built-in should expose a property that reflects
>> > whether media was sensed in USB CF/SD card readers. This should then
>> > be used to control SYSTEMD_READY=1/0 so that USB card readers aren't
>> > diff --git a/src/core/shutdown.c b/src/core/shutdown.c
>> > index 71f001a..14febf3 100644
>> > --- a/src/core/shutdown.c
>> > +++ b/src/core/shutdown.c
>> > @@ -19,6 +19,7 @@
>> > along with systemd; If not, see <http://www.gnu.org/licenses/>.
>> > ***/
>> >
>> > +#include <ctype.h>
>> > #include <sys/mman.h>
>> > #include <sys/types.h>
>> > #include <sys/reboot.h>
>> > @@ -27,6 +28,7 @@
>> > #include <sys/stat.h>
>> > #include <sys/mount.h>
>> > #include <sys/syscall.h>
>> > +#include <sys/utsname.h>
>> > #include <fcntl.h>
>> > #include <dirent.h>
>> > #include <errno.h>
>> > @@ -138,6 +140,35 @@ static int parse_argv(int argc, char *argv[]) {
>> > return 0;
>> > }
>> >
>> > +/*
>> > + * Remove parameter from a kernel command line. Helper function for
>> kexec.
>> > + * (copied from kexec-tools)
>> > + */
>> > +static void remove_parameter(char *line, const char *param_name) {
>> > + char *start, *end;
>> > +
>> > + start = strstr(line, param_name);
>> > +
>> > + /* parameter not found */
>> > + if (!start)
>> > + return;
>> > +
>> > + /*
>> > + * check if that's really the start of a parameter and not in
>> > + * the middle of the word
>> > + */
>> > + if (start != line && !isspace(*(start-1)))
>> > + return;
>> > +
>> > + end = strstr(start, " ");
>> > + if (!end)
>> > + *start = 0;
>> > + else {
>> > + memmove(start, end+1, strlen(end));
>> > + *(end + strlen(end)) = 0;
>> > + }
>> > +}
>> > +
>> > static int switch_root_initramfs(void) {
>> > if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND,
>> NULL) < 0)
>> > return log_error_errno(errno, "Failed to mount bind
>> /run/initramfs on /run/initramfs: %m");
>> > @@ -152,6 +183,57 @@ static int switch_root_initramfs(void) {
>> > return switch_root("/run/initramfs", "/oldroot", false,
>> MS_BIND);
>> > }
>> >
>> > +static int kernel_load(bool overwrite) {
>> > + int r = -ENOTSUP;
>> > +
>> > +/* only x86_64 and x32 in 3.18 */
>> > +#ifdef __NR_kexec_file_load
>> > + /* If uses has no already loaded a kexec kernel assume
>> they
>> > + * want the same one they are currently running. */
>> > + if (!overwrite && !kexec_loaded()) {
>> > + struct utsname u;
>> > + _cleanup_free_ char *vmlinuz = NULL, *initrd =
>> NULL, *cmdline = NULL;
>> > + _cleanup_close_ int vmlinuz_fd = -1, initrd_fd
>> = -1;
>> > +
>> > + r = uname(&u);
>> > + if (r < 0)
>> > + return -errno;
>> > +
>> > + vmlinuz = malloc(strlen("/boot/vmlinuz-") +
>> strlen(u.release) + 1);
>> > + initrd = malloc(strlen("/boot/initrd.img-") +
>> strlen(u.release) + 1);
>> > + if (!vmlinuz || !initrd)
>> > + return -ENOMEM;
>> > +
>> > + r = read_one_line_file("/proc/cmdline",
>> &cmdline);
>> > + if (r < 0)
>> > + return r;
>> > +
>> > + strcpy(stpcpy(vmlinuz, "/boot/vmlinuz-"),
>> u.release);
>> > + strcpy(stpcpy(initrd, "/boot/initrd.img-"),
>> u.release);
>> Wouldn't a simple strjoina work just as well here?
>>
>> We also have the boot loader specification. We really need to follow
>> it here, and look for kernels in any place where we would install them.
>> So this scheme most likely would have to be more comples.
>>
>> > + if (access(vmlinuz, R_OK) != 0 ||
>> access(initrd, R_OK) != 0)
>> > + return -errno;
>> No need to to do this check, the open()s below work just as well.
>>
>> > + vmlinuz_fd = open(vmlinuz, O_RDONLY);
>> > + if (vmlinuz_fd < 0)
>> > + return -errno;
>> > +
>> > + initrd_fd = open(initrd, O_RDONLY);
>> > + if (initrd_fd < 0)
>> > + return -errno;
>> > +
>> > + /* clean up cmdline like kexec-tools does */
>> > + remove_parameter(cmdline, "BOOT_IMAGE");
>> > +
>> > + log_info("kexec: kexec -l %s --initrd=%s
>> --command-line=%s", vmlinuz, initrd, cmdline);
>> > +
>> > + r = syscall(__NR_kexec_file_load, vmlinuz_fd,
>> initrd_fd, cmdline, strlen(cmdline), 0);
>> > + if (r < 0)
>> > + return -errno;
>> > + } else
>> > + r = 0;
>> > +#endif
>> > + return r;
>> > +}
>> >
>> > int main(int argc, char *argv[]) {
>> > bool need_umount, need_swapoff, need_loop_detach,
>> need_dm_detach;
>> > @@ -175,11 +257,13 @@ int main(int argc, char *argv[]) {
>> >
>> > umask(0022);
>> >
>> > - if (getpid() != 1) {
>> > + /*if (getpid() != 1) {
>> > log_error("Not executed by init (PID 1).");
>> > r = -EPERM;
>> > goto error;
>> > - }
>> > + }*/
>> > +
>> > + in_container = detect_container(NULL) > 0;
>> >
>> > if (streq(arg_verb, "reboot"))
>> > cmd = RB_AUTOBOOT;
>> > @@ -187,9 +271,19 @@ int main(int argc, char *argv[]) {
>> > cmd = RB_POWER_OFF;
>> > else if (streq(arg_verb, "halt"))
>> > cmd = RB_HALT_SYSTEM;
>> > - else if (streq(arg_verb, "kexec"))
>> > - cmd = LINUX_REBOOT_CMD_KEXEC;
>> > - else {
>> > + else if (streq(arg_verb, "kexec")) {
>> > + if (in_container) {
>> > + log_warning("Can't kexec from container.
>> Rebooting…");
>> > + cmd = RB_AUTOBOOT;
>> > + } else {
>> > + r = kernel_load(false);
>> > + if (r < 0) {
>> > + log_warning("Failed to load kernel:
>> %s. Rebooting…", strerror(-r));
>>
>> I think this should be changed too. I'd very much prefer to get an error
>> if kexec fails.
>> Maybe we could add a parameter like --always-reboot to reboot as a
>> fallback if the
>> kenrel cannot be loaded.
>>
> This is not the user API, that is systemctl. This binary is executed only
> by pid 1.
>
This _IS_ pid 1, after the main pid 1 quits by executing this shutdown
binary.
>
>> > + cmd = RB_AUTOBOOT;
>> > + } else
>> > + cmd = LINUX_REBOOT_CMD_KEXEC;
>> > + }
>> > + } else {
>> > r = -EINVAL;
>> > log_error("Unknown action '%s'.", arg_verb);
>> > goto error;
>> > @@ -208,8 +302,6 @@ int main(int argc, char *argv[]) {
>> > log_info("Sending SIGKILL to remaining processes...");
>> > broadcast_signal(SIGKILL, true, false);
>> >
>> > - in_container = detect_container(NULL) > 0;
>> > -
>> > need_umount = !in_container;
>> > need_swapoff = !in_container;
>> > need_loop_detach = !in_container;
>> > @@ -349,11 +441,14 @@ int main(int argc, char *argv[]) {
>> >
>> > case LINUX_REBOOT_CMD_KEXEC:
>> >
>> > - if (!in_container) {
>> > - /* We cheat and exec kexec to avoid doing all
>> its work */
>> > - pid_t pid;
>> > + log_info("Rebooting with kexec.");
>> >
>> > - log_info("Rebooting with kexec.");
>> > + /* kexec-tools has a bunch of special code to make Xen
>> Dom0 work,
>> > + * otherwise it is only doing stuff we have already
>> done.
>> > + * This is true for Dom0 and DomU but we only get Dom0
>> > + * because of the !in_container check */
>> > + if (access("/proc/xen", F_OK) == 0) {
>> > + pid_t pid;
>> >
>> > pid = fork();
>> > if (pid < 0)
>> > @@ -370,7 +465,9 @@ int main(int argc, char *argv[]) {
>> > _exit(EXIT_FAILURE);
>> > } else
>> > wait_for_terminate_and_warn("kexec",
>> pid, true);
>> > - }
>> > +
>> > + } else
>> > + reboot(cmd);
>> >
>> > cmd = RB_AUTOBOOT;
>> > /* Fall through */
>> > diff --git a/src/shared/missing.h b/src/shared/missing.h
>> > index b33a70c..efd650a 100644
>> > --- a/src/shared/missing.h
>> > +++ b/src/shared/missing.h
>> > @@ -763,3 +763,10 @@ static inline int kcmp(pid_t pid1, pid_t pid2, int
>> type, unsigned long idx1, uns
>> > #ifndef KCMP_FILE
>> > #define KCMP_FILE 0
>> > #endif
>> > +
>> > +/* v3.17 */
>> > +#ifndef __NR_kexec_file_load
>> > +#ifdef __x86_64__
>> > +#define __NR_kexec_file_load 320
>> > +#endif
>> > +#endif
>> > --
>>
>> Zbyszek
>> _______________________________________________
>> systemd-devel mailing list
>> systemd-devel at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/systemd-devel
>>
>
>
>
> --
> Shawn Landden
>
--
Shawn Landden
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/systemd-devel/attachments/20150217/fc5107aa/attachment-0001.html>
More information about the systemd-devel
mailing list