[systemd-devel] [PATCH v3 4/4] add basic watchdog daemon

Michael Olbrich m.olbrich at pengutronix.de
Wed Feb 1 08:17:15 PST 2012


This patch introduces a small watchdog daemon that supervises systemd
and handles /dev/watchdog.
---
changes in v3:
 - new patch

 .gitignore                         |    1 +
 Makefile.am                        |   39 ++++++-
 configure.ac                       |    7 +
 src/99-systemd.rules.in            |    2 +
 src/watchdog.c                     |   76 ++++++++++++
 src/watchdog.h                     |   32 +++++
 src/watchdogd.c                    |  227 ++++++++++++++++++++++++++++++++++++
 units/.gitignore                   |    1 +
 units/systemd-watchdogd.service.in |   18 +++
 9 files changed, 402 insertions(+), 1 deletions(-)
 create mode 100644 src/watchdog.c
 create mode 100644 src/watchdog.h
 create mode 100644 src/watchdogd.c
 create mode 100644 units/systemd-watchdogd.service.in

diff --git a/.gitignore b/.gitignore
index 3da7e66..42c03d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/systemd-watchdogd
 /systemd-multi-seat-x
 /systemd-cgtop
 /systemd-coredump
diff --git a/Makefile.am b/Makefile.am
index 5473623..53045d5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -647,7 +647,8 @@ EXTRA_DIST += \
 	src/dbus-loop.h \
 	src/spawn-agent.h \
 	src/acl-util.h \
-	src/logs-show.h
+	src/logs-show.h \
+	src/watchdog.h
 
 MANPAGES = \
 	man/systemd.1 \
@@ -1551,6 +1552,42 @@ MANPAGES += \
 endif
 
 # ------------------------------------------------------------------------------
+if ENABLE_WATCHDOGD
+rootlibexec_PROGRAMS += \
+	systemd-watchdogd
+
+nodist_systemunit_DATA += \
+	units/systemd-watchdogd.service
+
+EXTRA_DIST += \
+	units/systemd-watchdogd.service.in
+
+systemd_watchdogd_SOURCES = \
+	src/watchdog.c \
+	src/watchdogd.c \
+	src/dbus-common.c
+
+systemd_watchdogd_CFLAGS = \
+	$(AM_CFLAGS) \
+	$(DBUS_CFLAGS)
+
+systemd_watchdogd_LDADD = \
+	libsystemd-basic.la \
+	libsystemd-daemon.la \
+	$(DBUS_LIBS)
+
+watchdogd-install-data-hook:
+	$(MKDIR_P) -m 0755 \
+		$(DESTDIR)$(systemunitdir)/sysinit.target.wants
+	( cd $(DESTDIR)$(systemunitdir)/sysinit.target.wants && \
+		rm -f systemd-watchdogd.service && \
+		$(LN_S) ../systemd-watchdogd.service systemd-watchdogd.service )
+
+INSTALL_DATA_HOOKS += \
+	watchdogd-install-data-hook
+endif
+
+# ------------------------------------------------------------------------------
 if ENABLE_QUOTACHECK
 rootlibexec_PROGRAMS += \
 	systemd-quotacheck
diff --git a/configure.ac b/configure.ac
index 37814de..e73c508 100644
--- a/configure.ac
+++ b/configure.ac
@@ -380,6 +380,13 @@ if test "x$enable_coredump" != "xno"; then
 fi
 AM_CONDITIONAL(ENABLE_COREDUMP, [test "$have_coredump" = "yes"])
 
+have_watchdogd=no
+AC_ARG_ENABLE(watchdogd, AS_HELP_STRING([--enable-watchdogd], [enable watchdogd hook]))
+if test "x$enable_watchdogd" == "xyes"; then
+	have_watchdogd=yes
+fi
+AM_CONDITIONAL(ENABLE_WATCHDOGD, [test "$have_watchdogd" = "yes"])
+
 have_gtk=no
 AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [disable GTK tools]))
 if test "x$enable_gtk" != "xno"; then
diff --git a/src/99-systemd.rules.in b/src/99-systemd.rules.in
index d306f71..8cb9e41 100644
--- a/src/99-systemd.rules.in
+++ b/src/99-systemd.rules.in
@@ -12,6 +12,8 @@ SUBSYSTEM=="tty", KERNEL=="tty[a-zA-Z]*|hvc*|xvc*|hvsi*", TAG+="systemd"
 
 KERNEL=="vport*", TAG+="systemd"
 
+KERNEL=="watchdog", TAG+="systemd"
+
 SUBSYSTEM=="block", KERNEL!="ram*|loop*", TAG+="systemd"
 SUBSYSTEM=="block", KERNEL!="ram*|loop*", ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}=="1", ENV{SYSTEMD_READY}="0"
 
diff --git a/src/watchdog.c b/src/watchdog.c
new file mode 100644
index 0000000..c3a1828
--- /dev/null
+++ b/src/watchdog.c
@@ -0,0 +1,76 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Michael Olbrich
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/watchdog.h>
+
+#include "watchdog.h"
+
+int watchdog_init(int *timeout, int *fd) {
+        assert(fd);
+        assert(timeout);
+
+        *fd = open("/dev/watchdog", O_RDWR);
+        if (*fd < 0) {
+                log_error("Could not open /dev/watchdog: %s", strerror(errno));
+                return -errno;
+        }
+
+        if (ioctl(*fd, WDIOC_SETTIMEOUT, timeout) < 0) {
+                log_error("Failed to set watchdog timeout: %s", strerror(errno));
+                return -errno;
+        }
+        return 0;
+}
+
+int watchdog_done(int fd) {
+        /* Write the 'magic close' character befoce closing. This should stop
+           any watchdog that can be stopped. */
+        write(fd, "V", 1);
+        close(fd);
+        return 0;
+}
+
+int watchdog_handle(int fd, usec_t reboot_monotonic, usec_t shutdown_delay) {
+        usec_t t;
+
+        t = now(CLOCK_MONOTONIC);
+
+        if (reboot_monotonic && ((reboot_monotonic + shutdown_delay) < t)) {
+                log_error("Reboot timeout exceeded! Skiping watchdog keep-alive and trying to reset.");
+                reboot(RB_AUTOBOOT);
+                return 1;
+        }
+
+        if (ioctl(fd, WDIOC_KEEPALIVE, 0) < 0) {
+                log_error("watchdog keep-alive ioctl failed: %s", strerror(errno));
+                return -errno;
+        }
+        return 0;
+}
diff --git a/src/watchdog.h b/src/watchdog.h
new file mode 100644
index 0000000..49ce3e2
--- /dev/null
+++ b/src/watchdog.h
@@ -0,0 +1,32 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#ifndef foowatchdoghfoo
+#define foowatchdoghfoo
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Michael Olbrich
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "util.h"
+
+int watchdog_init(int *timeout, int *fd);
+int watchdog_done(int fd);
+
+int watchdog_handle(int fd, usec_t reboot_monotonic, usec_t shutdown_delay);
+
+#endif
diff --git a/src/watchdogd.c b/src/watchdogd.c
new file mode 100644
index 0000000..8949ca9
--- /dev/null
+++ b/src/watchdogd.c
@@ -0,0 +1,227 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Michael Olbrich
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+#include <dbus/dbus.h>
+
+#include "log.h"
+#include "util.h"
+#include "dbus-common.h"
+#include "sd-daemon.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static bool nowayout = false;
+static int timeout = 10;
+static bool running;
+static usec_t reset_delay_usec = 60 * USEC_PER_SEC;
+
+static int help(void) {
+
+        printf("%s [OPTIONS...]\n\n"
+               "Handles /dev/watchdog and monitors systemd to check when a hardware\n"
+               "reset is necessary\n\n"
+               "  -h --help               Show this help\n"
+               "  -N --nowayout           If possible never disable the watchdog. By default,\n"
+               "                          a graceful shutdown (triggered by SIGUSR1) stops\n"
+               "                          the watchdog\n"
+               "  -R --reset-delay=REBOOT The amount of time systemd gets to reboot the\n"
+               "                          system gracefully in seconds [default %d]\n"
+               "  -T --timeout=TIMEOUT    Watchdog timeout in seconds [default %d]\n",
+               program_invocation_short_name, reset_delay_usec/USEC_PER_SEC, timeout);
+
+        return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+        static const struct option options[] = {
+                { "help",        no_argument,       NULL, 'h' },
+                { "nowayout",    required_argument, NULL, 'N' },
+                { "reset-delay", required_argument, NULL, 'R' },
+                { "timeout",     required_argument, NULL, 'T' },
+                { NULL,          0,                 NULL, 0   }
+        };
+
+        int c;
+
+        assert(argc >= 0);
+        assert(argv);
+
+        while ((c = getopt_long(argc, argv, "+hT:R:", options, NULL)) >= 0) {
+
+                switch (c) {
+
+                case 'h':
+                        help();
+                        return 0;
+
+                case 'N':
+                        nowayout = true;
+                        break;
+
+                case 'R':
+                        reset_delay_usec = atoi(optarg) * USEC_PER_SEC;
+                        break;
+
+                case 'T':
+                        timeout = atoi(optarg);
+                        break;
+
+                default:
+                        log_error("Unknown option code %c", c);
+                        help();
+                        return -EINVAL;
+                }
+        }
+
+        return 1;
+}
+
+static int bus_init(DBusConnection **bus, DBusMessage **m) {
+        int r;
+        DBusError error;
+        const char *iface = "org.freedesktop.systemd1.Manager";
+        const char *property = "WatchdogRebootTimestampMonotonic";
+
+        assert(bus);
+        assert(m);
+
+        dbus_error_init(&error);
+
+        if ((r = bus_connect(DBUS_BUS_SYSTEM, bus, NULL, &error)) < 0) {
+                log_error("Failed to get D-Bus connection: %s", bus_error_message(&error));
+                return -EINVAL;
+        }
+
+        if (!(*m = dbus_message_new_method_call("org.freedesktop.systemd1",
+                                               "/org/freedesktop/systemd1",
+                                               "org.freedesktop.DBus.Properties",
+                                               "Get"))) {
+                log_error("Could not allocate message.");
+                return -ENOMEM;
+        }
+
+        if (!dbus_message_append_args(*m,
+                                      DBUS_TYPE_STRING, &iface,
+                                      DBUS_TYPE_STRING, &property,
+                                      DBUS_TYPE_INVALID)) {
+                log_error("Could not attach target and flag information to message.");
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+static int bus_get_timestamp(DBusConnection *bus, DBusMessage *m, usec_t *timestamp) {
+        DBusMessage *reply;
+        DBusMessageIter iter, sub;
+        DBusError error;
+        int r = 0;
+
+        assert(bus);
+        assert(m);
+        assert(timestamp);
+
+        reply = dbus_connection_send_with_reply_and_block(bus, m, -1, &error);
+        if (!reply) {
+                log_error("Error fetching reboot timestamp: %s",
+                          bus_error_message(&error));
+                return -EINVAL;
+        }
+
+        if (!dbus_message_iter_init(reply, &iter)) {
+                log_error("Failed to parse reply (init).");
+                r = -EINVAL;
+                goto finish;
+        }
+
+        dbus_message_iter_recurse(&iter, &sub);
+
+        r = bus_iter_get_basic_and_next(&sub, DBUS_TYPE_UINT64, timestamp, false);
+        if (r < 0)
+                log_error("Failed to parse reply (value).");
+
+finish:
+        dbus_message_unref(reply);
+        return r;
+}
+
+static void sig_handler(int sig) {
+        running = false;
+        if (sig == SIGUSR1)
+                watchdog_done(watchdog_fd);
+}
+
+int main(int argc, char *argv[]) {
+        int r;
+        struct timespec ts;
+        usec_t watchdog_usec;
+        DBusConnection *bus = NULL;
+        DBusMessage *m = NULL;
+
+        log_set_target(LOG_TARGET_AUTO);
+        log_parse_environment();
+        log_open();
+
+        umask(0022);
+
+        r = parse_argv(argc, argv);
+        if (r <= 0)
+                return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+
+        if (bus_init(&bus, &m) != 0)
+                return EXIT_FAILURE;
+
+        if (watchdog_init(&timeout, &watchdog_fd) != 0)
+                return EXIT_FAILURE;
+
+        if (!nowayout)
+                signal(SIGUSR1, sig_handler);
+
+        log_info("Watchdog timeout set to %ds", timeout);
+        sd_notify(false,
+                  "READY=1\n"
+                  "STATUS=Watchdog started.\n");
+
+        timeout /= 2;
+        running = true;
+        clock_gettime(CLOCK_MONOTONIC, &ts);
+        while (running) {
+                r = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, 0);
+                if (r == EINTR)
+                        continue;
+
+                ts.tv_sec += timeout;
+
+                if (bus_get_timestamp(bus, m, &watchdog_usec) != 0)
+                        continue;
+
+                /* keep-alive failed. There is nothing we can do but quit */
+                if (watchdog_handle(watchdog_fd, watchdog_usec, reset_delay_usec) < 0)
+                        return EXIT_FAILURE;
+        }
+        return EXIT_SUCCESS;
+}
diff --git a/units/.gitignore b/units/.gitignore
index 94412d5..e53de0b 100644
--- a/units/.gitignore
+++ b/units/.gitignore
@@ -1,3 +1,4 @@
+/systemd-watchdogd.service
 /systemd-journald.service
 user at .service
 systemd-logind.service
diff --git a/units/systemd-watchdogd.service.in b/units/systemd-watchdogd.service.in
new file mode 100644
index 0000000..3ef38b1
--- /dev/null
+++ b/units/systemd-watchdogd.service.in
@@ -0,0 +1,18 @@
+#  This file is part of systemd.
+#
+#  systemd is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+
+[Unit]
+Description=Watchdog Daemon
+DefaultDependencies=no
+BindTo=dev-watchdog.device
+After=dev-watchdog.device
+Before=sysinit.target
+
+[Service]
+Type=notify
+ExecStart=@rootlibexecdir@/systemd-watchdogd --timeout=100 --reset-delay=60
+KillSignal=SIGUSR1
-- 
1.7.7.3



More information about the systemd-devel mailing list