[systemd-devel] [PATCH v2 2/3] service: add watchdog restart/reboot timeouts

Michael Olbrich m.olbrich at pengutronix.de
Tue Jan 31 06:00:43 PST 2012


This patch adds the WatchdogRestartSec and WatchdogRebootSec
properties to services. Systemd will restart the service / reboot the
system if the watchdog timeout has not been updated for the configured
amount of time.
This functionality is only enabled if the watchdog timeout is set at
least once.
---
changes in v2:
 - adapt to changes from "d200735 dbus: more efficient implementation of properties"
 - stop timers in service_done() and restart in service_coldplug()

 man/systemd.service.xml          |   32 ++++++++++++++++++++++++++++
 src/dbus-service.c               |    4 +++
 src/load-fragment-gperf.gperf.m4 |    2 +
 src/service.c                    |   42 ++++++++++++++++++++++++++++++++++++++
 src/service.h                    |    4 +++
 5 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 0baddd1..48f63ce 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -460,6 +460,38 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>WatchdogRestartSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before restarting a service. This
+                                is activated with the first
+                                <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+                                call with "WATCHDOG=1". If the time
+                                between two such calls is larger than
+                                the configured time then the service
+                                is restarted. Defaults to 0s, meaning
+                                watchdog triggered restart is
+                                disabled.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>WatchdogRebootSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before rebooting the system. This
+                                is basically the same as
+                                <varname>WatchdogRestartSec=</varname>
+                                but the whole system is rebooted
+                                instead of just restarting the
+                                service. The typical use-case is to
+                                set this to
+                                <varname>WatchdogRestartSec</varname>
+                                + <varname>TimeoutSec</varname> to
+                                reboot in case the service restart
+                                fails. Defaults to 0s, meaning
+                                watchdog triggered reboot is
+                                disabled.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Restart=</varname></term>
                                 <listitem><para>Configures whether the
                                 main service process shall be
diff --git a/src/dbus-service.c b/src/dbus-service.c
index d7529ec..c219aba 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRestartUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRebootUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
@@ -115,6 +117,8 @@ static const BusProperty bus_service_properties[] = {
         { "NotifyAccess",           bus_service_append_notify_access, "s", offsetof(Service, notify_access)             },
         { "RestartUSec",            bus_property_append_usec,         "t", offsetof(Service, restart_usec)              },
         { "TimeoutUSec",            bus_property_append_usec,         "t", offsetof(Service, timeout_usec)              },
+        { "WatchdogRestartUSec",    bus_property_append_usec,         "t", offsetof(Service, watchdog_restart_usec)     },
+        { "WatchdogRebootUSec",     bus_property_append_usec,         "t", offsetof(Service, watchdog_reboot_usec)      },
         { "WatchdogTimestamp",      bus_property_append_usec,         "t", offsetof(Service, watchdog_timestamp.realtime)},
         { "WatchdogTimestampMonotonic",bus_property_append_usec,      "t", offsetof(Service, watchdog_timestamp.monotonic)},
         BUS_EXEC_COMMAND_PROPERTY("ExecStartPre",  offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]),  true ),
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index 8ca799e..824241b 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -134,6 +134,8 @@ Service.ExecStop,                config_parse_exec,                  SERVICE_EXE
 Service.ExecStopPost,            config_parse_exec,                  SERVICE_EXEC_STOP_POST,        offsetof(Service, exec_command)
 Service.RestartSec,              config_parse_usec,                  0,                             offsetof(Service, restart_usec)
 Service.TimeoutSec,              config_parse_usec,                  0,                             offsetof(Service, timeout_usec)
+Service.WatchdogRestartSec,      config_parse_usec,                  0,                             offsetof(Service, watchdog_restart_usec)
+Service.WatchdogRebootSec,       config_parse_usec,                  0,                             offsetof(Service, watchdog_reboot_usec)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
diff --git a/src/service.c b/src/service.c
index e107179..ecc1410 100644
--- a/src/service.c
+++ b/src/service.c
@@ -112,6 +112,10 @@ static void service_init(Unit *u) {
 
         s->timeout_usec = DEFAULT_TIMEOUT_USEC;
         s->restart_usec = DEFAULT_RESTART_USEC;
+
+        s->watchdog_restart_watch.type = WATCH_INVALID;
+        s->watchdog_reboot_watch.type = WATCH_INVALID;
+
         s->timer_watch.type = WATCH_INVALID;
 #ifdef HAVE_SYSV_COMPAT
         s->sysv_start_priority = -1;
@@ -208,14 +212,33 @@ static void service_connection_unref(Service *s) {
 static void service_stop_watchdog(Service *s) {
         assert(s);
 
+        unit_unwatch_timer(UNIT(s), &s->watchdog_restart_watch);
+        unit_unwatch_timer(UNIT(s), &s->watchdog_reboot_watch);
         s->watchdog_timestamp.realtime = 0;
         s->watchdog_timestamp.monotonic = 0;
 }
 
+static void service_setup_watchdog_timer(Service *s, usec_t offset) {
+        int r;
+        assert(s);
+
+        if (s->watchdog_restart_usec) {
+                r = unit_watch_timer(UNIT(s), s->watchdog_restart_usec - offset, &s->watchdog_restart_watch);
+                if (r < 0)
+                        log_warning("%s failed to install watchdog restart timer: %s", UNIT(s)->id, strerror(-r));
+        }
+        if (s->watchdog_restart_usec) {
+                r = unit_watch_timer(UNIT(s), s->watchdog_reboot_usec - offset, &s->watchdog_reboot_watch);
+                if (r < 0)
+                        log_warning("%s failed to install watchdog reboot timer: %s", UNIT(s)->id, strerror(-r));
+        }
+}
+
 static void service_reset_watchdog(Service *s) {
         assert(s);
 
         dual_timestamp_get(&s->watchdog_timestamp);
+        service_setup_watchdog_timer(s, 0);
 }
 
 static void service_done(Unit *u) {
@@ -259,6 +282,8 @@ static void service_done(Unit *u) {
 
         unit_ref_unset(&s->accept_socket);
 
+        service_stop_watchdog(s);
+
         unit_unwatch_timer(u, &s->timer_watch);
 }
 
@@ -1566,6 +1591,12 @@ static int service_coldplug(Unit *u) {
 
                 service_set_state(s, s->deserialized_state);
         }
+        if (dual_timestamp_is_set(&s->watchdog_timestamp)) {
+                dual_timestamp t;
+
+                dual_timestamp_get(&t);
+                service_setup_watchdog_timer(s,  t.monotonic - s->watchdog_timestamp.monotonic);
+        }
 
         return 0;
 }
@@ -2896,6 +2927,17 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         assert(s);
         assert(elapsed == 1);
 
+        if (w == &s->watchdog_restart_watch) {
+                log_error("%s watchdog timeout: restarting service...", u->id);
+                manager_add_job(u->manager, JOB_RESTART, u, JOB_FAIL, true, 0, 0);
+                return;
+        }
+        if (w == &s->watchdog_reboot_watch) {
+                log_error("%s watchdog timeout: rebooting...", u->id);
+                manager_add_job_by_name(u->manager, JOB_START, "reboot.target", JOB_REPLACE, true, 0, 0);
+                return;
+        }
+
         assert(w == &s->timer_watch);
 
         switch (s->state) {
diff --git a/src/service.h b/src/service.h
index dbae68b..d6a8290 100644
--- a/src/service.h
+++ b/src/service.h
@@ -101,6 +101,10 @@ struct Service {
         usec_t timeout_usec;
 
         dual_timestamp watchdog_timestamp;
+        usec_t watchdog_restart_usec;
+        usec_t watchdog_reboot_usec;
+        Watch watchdog_restart_watch;
+        Watch watchdog_reboot_watch;
 
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
-- 
1.7.7.3



More information about the systemd-devel mailing list