[systemd-devel] [PATCH 2/3] service: add watchdog restart/reboot timeouts

Michael Olbrich m.olbrich at pengutronix.de
Fri Nov 4 07:38:41 PDT 2011


This patch adds the WatchdogRestartSec and WatchdogRebootSec
properties to services. Systemd will restart the service / reboot the
system if the watchdog timeout has not been updated for the configured
amount of time.
This functionality is only enabled if the watchdog timeout is set at
least once.
---
 man/systemd.service.xml          |   32 ++++++++++++++++++++++++++++++++
 src/dbus-service.c               |    4 ++++
 src/load-fragment-gperf.gperf.m4 |    2 ++
 src/service.c                    |   28 ++++++++++++++++++++++++++++
 src/service.h                    |    4 ++++
 5 files changed, 70 insertions(+), 0 deletions(-)

diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 7b6f12d..6d2d548 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -460,6 +460,38 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>WatchdogRestartSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before restarting a service. This
+                                is activated with the first
+                                <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+                                call with "WATCHDOG=1". If the time
+                                between two such calls is larger than
+                                the configured time then the service
+                                is restarted. Defaults to 0s, meaning
+                                watchdog triggered restart is
+                                disabled.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>WatchdogRebootSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before rebooting the system. This
+                                is basically the same as
+                                <varname>WatchdogRestartSec=</varname>
+                                but the whole system is rebooted
+                                instead of just restarting the
+                                service. The typical use-case is to
+                                set this to
+                                <varname>WatchdogRestartSec</varname>
+                                + <varname>TimeoutSec</varname> to
+                                reboot in case the service restart
+                                fails. Defaults to 0s, meaning
+                                watchdog triggered reboot is
+                                disabled.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Restart=</varname></term>
                                 <listitem><para>Configures whether the
                                 main service process shall be
diff --git a/src/dbus-service.c b/src/dbus-service.c
index 2f53484..145667e 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRestartUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRebootUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
@@ -107,6 +109,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
                 { "org.freedesktop.systemd1.Service", "NotifyAccess",           bus_service_append_notify_access, "s", &u->service.notify_access       },
                 { "org.freedesktop.systemd1.Service", "RestartUSec",            bus_property_append_usec,   "t", &u->service.restart_usec              },
                 { "org.freedesktop.systemd1.Service", "TimeoutUSec",            bus_property_append_usec,   "t", &u->service.timeout_usec              },
+                { "org.freedesktop.systemd1.Service", "WatchdogRestartUSec",    bus_property_append_usec,   "t", &u->service.watchdog_restart_usec     },
+                { "org.freedesktop.systemd1.Service", "WatchdogRebootUSec",     bus_property_append_usec,   "t", &u->service.watchdog_reboot_usec      },
                 { "org.freedesktop.systemd1.Service", "WatchdogTimestamp",      bus_property_append_usec,   "t", &u->service.watchdog_timestamp.realtime },
                 { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
                 BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE],  "ExecStartPre"),
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index 41797d2..cde21d2 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -131,6 +131,8 @@ Service.ExecStop,                config_parse_exec,                  SERVICE_EXE
 Service.ExecStopPost,            config_parse_exec,                  SERVICE_EXEC_STOP_POST,        offsetof(Service, exec_command)
 Service.RestartSec,              config_parse_usec,                  0,                             offsetof(Service, restart_usec)
 Service.TimeoutSec,              config_parse_usec,                  0,                             offsetof(Service, timeout_usec)
+Service.WatchdogRestartSec,      config_parse_usec,                  0,                             offsetof(Service, watchdog_restart_usec)
+Service.WatchdogRebootSec,       config_parse_usec,                  0,                             offsetof(Service, watchdog_reboot_usec)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
diff --git a/src/service.c b/src/service.c
index 2bd47da..0b4f5f1 100644
--- a/src/service.c
+++ b/src/service.c
@@ -112,6 +112,10 @@ static void service_init(Unit *u) {
 
         s->timeout_usec = DEFAULT_TIMEOUT_USEC;
         s->restart_usec = DEFAULT_RESTART_USEC;
+
+        s->watchdog_restart_watch.type = WATCH_INVALID;
+        s->watchdog_reboot_watch.type = WATCH_INVALID;
+
         s->timer_watch.type = WATCH_INVALID;
 #ifdef HAVE_SYSV_COMPAT
         s->sysv_start_priority = -1;
@@ -197,14 +201,27 @@ static void service_connection_unref(Service *s) {
 static void service_stop_watchdog(Service *s) {
         assert(s);
 
+        unit_unwatch_timer(UNIT(s), &s->watchdog_restart_watch);
+        unit_unwatch_timer(UNIT(s), &s->watchdog_reboot_watch);
         s->watchdog_timestamp.realtime = 0;
         s->watchdog_timestamp.monotonic = 0;
 }
 
 static void service_reset_watchdog(Service *s) {
+        int r;
         assert(s);
 
         dual_timestamp_get(&s->watchdog_timestamp);
+        if (s->watchdog_restart_usec) {
+                r = unit_watch_timer(UNIT(s), s->watchdog_restart_usec, &s->watchdog_restart_watch);
+                if (r < 0)
+                        log_warning("%s failed to install watchdog restart timer: %s", s->meta.id, strerror(-r));
+        }
+        if (s->watchdog_restart_usec) {
+                r = unit_watch_timer(UNIT(s), s->watchdog_reboot_usec, &s->watchdog_reboot_watch);
+                if (r < 0)
+                        log_warning("%s failed to install watchdog reboot timer: %s", s->meta.id, strerror(-r));
+        }
 }
 
 static void service_done(Unit *u) {
@@ -2830,6 +2847,17 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         assert(s);
         assert(elapsed == 1);
 
+        if (w == &s->watchdog_restart_watch) {
+                log_error("%s watchdog timeout: restarting service...", u->meta.id);
+                manager_add_job(u->meta.manager, JOB_RESTART, u, JOB_FAIL, true, 0, 0);
+                return;
+        }
+        if (w == &s->watchdog_reboot_watch) {
+                log_error("%s watchdog timeout: rebooting...", u->meta.id);
+                manager_add_job_by_name(u->meta.manager, JOB_START, "reboot.target", JOB_REPLACE, true, 0, 0);
+                return;
+        }
+
         assert(w == &s->timer_watch);
 
         switch (s->state) {
diff --git a/src/service.h b/src/service.h
index 3801e84..cab18c2 100644
--- a/src/service.h
+++ b/src/service.h
@@ -99,6 +99,10 @@ struct Service {
         usec_t timeout_usec;
 
         dual_timestamp watchdog_timestamp;
+        usec_t watchdog_restart_usec;
+        usec_t watchdog_reboot_usec;
+        Watch watchdog_restart_watch;
+        Watch watchdog_reboot_watch;
 
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
-- 
1.7.7



More information about the systemd-devel mailing list