[systemd-devel] [PATCH v3 4/4] add basic watchdog daemon
Michael Olbrich
m.olbrich at pengutronix.de
Wed Feb 1 08:17:15 PST 2012
This patch introduces a small watchdog daemon that supervises systemd
and handles /dev/watchdog.
---
changes in v3:
- new patch
.gitignore | 1 +
Makefile.am | 39 ++++++-
configure.ac | 7 +
src/99-systemd.rules.in | 2 +
src/watchdog.c | 76 ++++++++++++
src/watchdog.h | 32 +++++
src/watchdogd.c | 227 ++++++++++++++++++++++++++++++++++++
units/.gitignore | 1 +
units/systemd-watchdogd.service.in | 18 +++
9 files changed, 402 insertions(+), 1 deletions(-)
create mode 100644 src/watchdog.c
create mode 100644 src/watchdog.h
create mode 100644 src/watchdogd.c
create mode 100644 units/systemd-watchdogd.service.in
diff --git a/.gitignore b/.gitignore
index 3da7e66..42c03d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/systemd-watchdogd
/systemd-multi-seat-x
/systemd-cgtop
/systemd-coredump
diff --git a/Makefile.am b/Makefile.am
index 5473623..53045d5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -647,7 +647,8 @@ EXTRA_DIST += \
src/dbus-loop.h \
src/spawn-agent.h \
src/acl-util.h \
- src/logs-show.h
+ src/logs-show.h \
+ src/watchdog.h
MANPAGES = \
man/systemd.1 \
@@ -1551,6 +1552,42 @@ MANPAGES += \
endif
# ------------------------------------------------------------------------------
+if ENABLE_WATCHDOGD
+rootlibexec_PROGRAMS += \
+ systemd-watchdogd
+
+nodist_systemunit_DATA += \
+ units/systemd-watchdogd.service
+
+EXTRA_DIST += \
+ units/systemd-watchdogd.service.in
+
+systemd_watchdogd_SOURCES = \
+ src/watchdog.c \
+ src/watchdogd.c \
+ src/dbus-common.c
+
+systemd_watchdogd_CFLAGS = \
+ $(AM_CFLAGS) \
+ $(DBUS_CFLAGS)
+
+systemd_watchdogd_LDADD = \
+ libsystemd-basic.la \
+ libsystemd-daemon.la \
+ $(DBUS_LIBS)
+
+watchdogd-install-data-hook:
+ $(MKDIR_P) -m 0755 \
+ $(DESTDIR)$(systemunitdir)/sysinit.target.wants
+ ( cd $(DESTDIR)$(systemunitdir)/sysinit.target.wants && \
+ rm -f systemd-watchdogd.service && \
+ $(LN_S) ../systemd-watchdogd.service systemd-watchdogd.service )
+
+INSTALL_DATA_HOOKS += \
+ watchdogd-install-data-hook
+endif
+
+# ------------------------------------------------------------------------------
if ENABLE_QUOTACHECK
rootlibexec_PROGRAMS += \
systemd-quotacheck
diff --git a/configure.ac b/configure.ac
index 37814de..e73c508 100644
--- a/configure.ac
+++ b/configure.ac
@@ -380,6 +380,13 @@ if test "x$enable_coredump" != "xno"; then
fi
AM_CONDITIONAL(ENABLE_COREDUMP, [test "$have_coredump" = "yes"])
+have_watchdogd=no
+AC_ARG_ENABLE(watchdogd, AS_HELP_STRING([--enable-watchdogd], [enable watchdogd hook]))
+if test "x$enable_watchdogd" == "xyes"; then
+ have_watchdogd=yes
+fi
+AM_CONDITIONAL(ENABLE_WATCHDOGD, [test "$have_watchdogd" = "yes"])
+
have_gtk=no
AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [disable GTK tools]))
if test "x$enable_gtk" != "xno"; then
diff --git a/src/99-systemd.rules.in b/src/99-systemd.rules.in
index d306f71..8cb9e41 100644
--- a/src/99-systemd.rules.in
+++ b/src/99-systemd.rules.in
@@ -12,6 +12,8 @@ SUBSYSTEM=="tty", KERNEL=="tty[a-zA-Z]*|hvc*|xvc*|hvsi*", TAG+="systemd"
KERNEL=="vport*", TAG+="systemd"
+KERNEL=="watchdog", TAG+="systemd"
+
SUBSYSTEM=="block", KERNEL!="ram*|loop*", TAG+="systemd"
SUBSYSTEM=="block", KERNEL!="ram*|loop*", ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}=="1", ENV{SYSTEMD_READY}="0"
diff --git a/src/watchdog.c b/src/watchdog.c
new file mode 100644
index 0000000..c3a1828
--- /dev/null
+++ b/src/watchdog.c
@@ -0,0 +1,76 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Michael Olbrich
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/watchdog.h>
+
+#include "watchdog.h"
+
+int watchdog_init(int *timeout, int *fd) {
+ assert(fd);
+ assert(timeout);
+
+ *fd = open("/dev/watchdog", O_RDWR);
+ if (*fd < 0) {
+ log_error("Could not open /dev/watchdog: %s", strerror(errno));
+ return -errno;
+ }
+
+ if (ioctl(*fd, WDIOC_SETTIMEOUT, timeout) < 0) {
+ log_error("Failed to set watchdog timeout: %s", strerror(errno));
+ return -errno;
+ }
+ return 0;
+}
+
+int watchdog_done(int fd) {
+ /* Write the 'magic close' character befoce closing. This should stop
+ any watchdog that can be stopped. */
+ write(fd, "V", 1);
+ close(fd);
+ return 0;
+}
+
+int watchdog_handle(int fd, usec_t reboot_monotonic, usec_t shutdown_delay) {
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (reboot_monotonic && ((reboot_monotonic + shutdown_delay) < t)) {
+ log_error("Reboot timeout exceeded! Skiping watchdog keep-alive and trying to reset.");
+ reboot(RB_AUTOBOOT);
+ return 1;
+ }
+
+ if (ioctl(fd, WDIOC_KEEPALIVE, 0) < 0) {
+ log_error("watchdog keep-alive ioctl failed: %s", strerror(errno));
+ return -errno;
+ }
+ return 0;
+}
diff --git a/src/watchdog.h b/src/watchdog.h
new file mode 100644
index 0000000..49ce3e2
--- /dev/null
+++ b/src/watchdog.h
@@ -0,0 +1,32 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#ifndef foowatchdoghfoo
+#define foowatchdoghfoo
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Michael Olbrich
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "util.h"
+
+int watchdog_init(int *timeout, int *fd);
+int watchdog_done(int fd);
+
+int watchdog_handle(int fd, usec_t reboot_monotonic, usec_t shutdown_delay);
+
+#endif
diff --git a/src/watchdogd.c b/src/watchdogd.c
new file mode 100644
index 0000000..8949ca9
--- /dev/null
+++ b/src/watchdogd.c
@@ -0,0 +1,227 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Michael Olbrich
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+#include <dbus/dbus.h>
+
+#include "log.h"
+#include "util.h"
+#include "dbus-common.h"
+#include "sd-daemon.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static bool nowayout = false;
+static int timeout = 10;
+static bool running;
+static usec_t reset_delay_usec = 60 * USEC_PER_SEC;
+
+static int help(void) {
+
+ printf("%s [OPTIONS...]\n\n"
+ "Handles /dev/watchdog and monitors systemd to check when a hardware\n"
+ "reset is necessary\n\n"
+ " -h --help Show this help\n"
+ " -N --nowayout If possible never disable the watchdog. By default,\n"
+ " a graceful shutdown (triggered by SIGUSR1) stops\n"
+ " the watchdog\n"
+ " -R --reset-delay=REBOOT The amount of time systemd gets to reboot the\n"
+ " system gracefully in seconds [default %d]\n"
+ " -T --timeout=TIMEOUT Watchdog timeout in seconds [default %d]\n",
+ program_invocation_short_name, reset_delay_usec/USEC_PER_SEC, timeout);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "nowayout", required_argument, NULL, 'N' },
+ { "reset-delay", required_argument, NULL, 'R' },
+ { "timeout", required_argument, NULL, 'T' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hT:R:", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case 'N':
+ nowayout = true;
+ break;
+
+ case 'R':
+ reset_delay_usec = atoi(optarg) * USEC_PER_SEC;
+ break;
+
+ case 'T':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ log_error("Unknown option code %c", c);
+ help();
+ return -EINVAL;
+ }
+ }
+
+ return 1;
+}
+
+static int bus_init(DBusConnection **bus, DBusMessage **m) {
+ int r;
+ DBusError error;
+ const char *iface = "org.freedesktop.systemd1.Manager";
+ const char *property = "WatchdogRebootTimestampMonotonic";
+
+ assert(bus);
+ assert(m);
+
+ dbus_error_init(&error);
+
+ if ((r = bus_connect(DBUS_BUS_SYSTEM, bus, NULL, &error)) < 0) {
+ log_error("Failed to get D-Bus connection: %s", bus_error_message(&error));
+ return -EINVAL;
+ }
+
+ if (!(*m = dbus_message_new_method_call("org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.DBus.Properties",
+ "Get"))) {
+ log_error("Could not allocate message.");
+ return -ENOMEM;
+ }
+
+ if (!dbus_message_append_args(*m,
+ DBUS_TYPE_STRING, &iface,
+ DBUS_TYPE_STRING, &property,
+ DBUS_TYPE_INVALID)) {
+ log_error("Could not attach target and flag information to message.");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bus_get_timestamp(DBusConnection *bus, DBusMessage *m, usec_t *timestamp) {
+ DBusMessage *reply;
+ DBusMessageIter iter, sub;
+ DBusError error;
+ int r = 0;
+
+ assert(bus);
+ assert(m);
+ assert(timestamp);
+
+ reply = dbus_connection_send_with_reply_and_block(bus, m, -1, &error);
+ if (!reply) {
+ log_error("Error fetching reboot timestamp: %s",
+ bus_error_message(&error));
+ return -EINVAL;
+ }
+
+ if (!dbus_message_iter_init(reply, &iter)) {
+ log_error("Failed to parse reply (init).");
+ r = -EINVAL;
+ goto finish;
+ }
+
+ dbus_message_iter_recurse(&iter, &sub);
+
+ r = bus_iter_get_basic_and_next(&sub, DBUS_TYPE_UINT64, timestamp, false);
+ if (r < 0)
+ log_error("Failed to parse reply (value).");
+
+finish:
+ dbus_message_unref(reply);
+ return r;
+}
+
+static void sig_handler(int sig) {
+ running = false;
+ if (sig == SIGUSR1)
+ watchdog_done(watchdog_fd);
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+ struct timespec ts;
+ usec_t watchdog_usec;
+ DBusConnection *bus = NULL;
+ DBusMessage *m = NULL;
+
+ log_set_target(LOG_TARGET_AUTO);
+ log_parse_environment();
+ log_open();
+
+ umask(0022);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+
+ if (bus_init(&bus, &m) != 0)
+ return EXIT_FAILURE;
+
+ if (watchdog_init(&timeout, &watchdog_fd) != 0)
+ return EXIT_FAILURE;
+
+ if (!nowayout)
+ signal(SIGUSR1, sig_handler);
+
+ log_info("Watchdog timeout set to %ds", timeout);
+ sd_notify(false,
+ "READY=1\n"
+ "STATUS=Watchdog started.\n");
+
+ timeout /= 2;
+ running = true;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ while (running) {
+ r = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, 0);
+ if (r == EINTR)
+ continue;
+
+ ts.tv_sec += timeout;
+
+ if (bus_get_timestamp(bus, m, &watchdog_usec) != 0)
+ continue;
+
+ /* keep-alive failed. There is nothing we can do but quit */
+ if (watchdog_handle(watchdog_fd, watchdog_usec, reset_delay_usec) < 0)
+ return EXIT_FAILURE;
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/units/.gitignore b/units/.gitignore
index 94412d5..e53de0b 100644
--- a/units/.gitignore
+++ b/units/.gitignore
@@ -1,3 +1,4 @@
+/systemd-watchdogd.service
/systemd-journald.service
user at .service
systemd-logind.service
diff --git a/units/systemd-watchdogd.service.in b/units/systemd-watchdogd.service.in
new file mode 100644
index 0000000..3ef38b1
--- /dev/null
+++ b/units/systemd-watchdogd.service.in
@@ -0,0 +1,18 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Watchdog Daemon
+DefaultDependencies=no
+BindTo=dev-watchdog.device
+After=dev-watchdog.device
+Before=sysinit.target
+
+[Service]
+Type=notify
+ExecStart=@rootlibexecdir@/systemd-watchdogd --timeout=100 --reset-delay=60
+KillSignal=SIGUSR1
--
1.7.7.3
More information about the systemd-devel
mailing list