[systemd-devel] [PATCH 1/3] introduce WatchdogSec and hook up the watchdog with the existing failure logic

Michael Olbrich m.olbrich at pengutronix.de
Fri Feb 3 12:14:23 PST 2012


---
 man/systemd.service.xml          |   17 +++++++++++++++++
 src/dbus-service.c               |    2 ++
 src/load-fragment-gperf.gperf.m4 |    1 +
 src/service.c                    |   30 ++++++++++++++++++++++++++++++
 src/service.h                    |    3 +++
 5 files changed, 53 insertions(+), 0 deletions(-)

diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 0baddd1..bf6a5ea 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -460,6 +460,23 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>WatchdogSec=</varname></term>
+                                <listitem><para>Configures the watchdog
+                                timeout for a service. This is activated
+                                with the first
+                                <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+                                call with "WATCHDOG=1". If the time
+                                between two such calls is larger than
+                                the configured time then the service
+                                enters a failure state. By setting
+                                <term><varname>Restart=</varname></term>
+                                to <option>on-failure</option> or
+                                <option>always</option> the service can
+                                be restarted. Defaults to 0s, which
+                                disables this feature.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Restart=</varname></term>
                                 <listitem><para>Configures whether the
                                 main service process shall be
diff --git a/src/dbus-service.c b/src/dbus-service.c
index 738dc7b..fedfc1d 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,7 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
@@ -119,6 +120,7 @@ static const BusProperty bus_service_properties[] = {
         { "NotifyAccess",           bus_service_append_notify_access, "s", offsetof(Service, notify_access)                },
         { "RestartUSec",            bus_property_append_usec,         "t", offsetof(Service, restart_usec)                 },
         { "TimeoutUSec",            bus_property_append_usec,         "t", offsetof(Service, timeout_usec)                 },
+        { "WatchdogUSec",           bus_property_append_usec,         "t", offsetof(Service, watchdog_usec)                },
         { "WatchdogTimestamp",      bus_property_append_usec,         "t", offsetof(Service, watchdog_timestamp.realtime)  },
         { "WatchdogTimestampMonotonic",bus_property_append_usec,      "t", offsetof(Service, watchdog_timestamp.monotonic) },
         BUS_EXEC_COMMAND_PROPERTY("ExecStartPre",  offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]),  true ),
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index 14c0606..9191f90 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -134,6 +134,7 @@ Service.ExecStop,                config_parse_exec,                  SERVICE_EXE
 Service.ExecStopPost,            config_parse_exec,                  SERVICE_EXEC_STOP_POST,        offsetof(Service, exec_command)
 Service.RestartSec,              config_parse_usec,                  0,                             offsetof(Service, restart_usec)
 Service.TimeoutSec,              config_parse_usec,                  0,                             offsetof(Service, timeout_usec)
+Service.WatchdogSec,             config_parse_usec,                  0,                             offsetof(Service, watchdog_usec)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
diff --git a/src/service.c b/src/service.c
index b6bbfab..f030ccf 100644
--- a/src/service.c
+++ b/src/service.c
@@ -112,6 +112,9 @@ static void service_init(Unit *u) {
 
         s->timeout_usec = DEFAULT_TIMEOUT_USEC;
         s->restart_usec = DEFAULT_RESTART_USEC;
+
+        s->watchdog_watch.type = WATCH_INVALID;
+
         s->timer_watch.type = WATCH_INVALID;
 #ifdef HAVE_SYSV_COMPAT
         s->sysv_start_priority = -1;
@@ -208,14 +211,27 @@ static void service_connection_unref(Service *s) {
 static void service_stop_watchdog(Service *s) {
         assert(s);
 
+        unit_unwatch_timer(UNIT(s), &s->watchdog_watch);
         s->watchdog_timestamp.realtime = 0;
         s->watchdog_timestamp.monotonic = 0;
 }
 
+static void service_setup_watchdog_timer(Service *s, usec_t offset) {
+        int r;
+        assert(s);
+
+        if (s->watchdog_usec) {
+                r = unit_watch_timer(UNIT(s), s->watchdog_usec - offset, &s->watchdog_watch);
+                if (r < 0)
+                        log_warning("%s failed to install watchdog timer: %s", UNIT(s)->id, strerror(-r));
+        }
+}
+
 static void service_reset_watchdog(Service *s) {
         assert(s);
 
         dual_timestamp_get(&s->watchdog_timestamp);
+        service_setup_watchdog_timer(s, 0);
 }
 
 static void service_done(Unit *u) {
@@ -259,6 +275,8 @@ static void service_done(Unit *u) {
 
         unit_ref_unset(&s->accept_socket);
 
+        service_stop_watchdog(s);
+
         unit_unwatch_timer(u, &s->timer_watch);
 }
 
@@ -1570,6 +1588,12 @@ static int service_coldplug(Unit *u) {
 
                 service_set_state(s, s->deserialized_state);
         }
+        if (dual_timestamp_is_set(&s->watchdog_timestamp)) {
+                usec_t t;
+
+                t = now(CLOCK_MONOTONIC);
+                service_setup_watchdog_timer(s,  t - s->watchdog_timestamp.monotonic);
+        }
 
         return 0;
 }
@@ -2922,6 +2946,12 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         assert(s);
         assert(elapsed == 1);
 
+        if (w == &s->watchdog_watch) {
+                log_error("%s watchdog timeout: ...", u->id);
+                service_enter_dead(s, SERVICE_FAILURE_WATCHDOG_TIMEOUT, true);
+                return;
+        }
+
         assert(w == &s->timer_watch);
 
         switch (s->state) {
diff --git a/src/service.h b/src/service.h
index b1e8b90..32341f0 100644
--- a/src/service.h
+++ b/src/service.h
@@ -95,6 +95,7 @@ typedef enum ServiceResult {
         SERVICE_FAILURE_EXIT_CODE,
         SERVICE_FAILURE_SIGNAL,
         SERVICE_FAILURE_CORE_DUMP,
+        SERVICE_FAILURE_WATCHDOG_TIMEOUT,
         _SERVICE_RESULT_MAX,
         _SERVICE_RESULT_INVALID = -1
 } ServiceResult;
@@ -112,6 +113,8 @@ struct Service {
         usec_t timeout_usec;
 
         dual_timestamp watchdog_timestamp;
+        usec_t watchdog_usec;
+        Watch watchdog_watch;
 
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
-- 
1.7.8.3



More information about the systemd-devel mailing list