[systemd-devel] [PATCH 2/4] WIP: service: add watchdog restart/reboot timeouts

Michael Olbrich m.olbrich at pengutronix.de
Mon Oct 24 09:04:03 PDT 2011


This patch adds the WatchdogRestartUSec and WatchdogRebootUSec
properties to services. Systemd will restart the service / reboot the
system if the watchdog timeout has not been updated for the configured
amount of time.
This functionality is only enabled if the watchdog timeout is set at
least once.
---
 man/systemd.service.xml          |   28 ++++++++++++++++++++++++++++
 src/dbus-service.c               |    4 ++++
 src/load-fragment-gperf.gperf.m4 |    2 ++
 src/service.c                    |   27 +++++++++++++++++++++++++++
 src/service.h                    |    4 ++++
 5 files changed, 65 insertions(+), 0 deletions(-)

diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 7b6f12d..90989cf 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -460,6 +460,34 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>WatchdogRestartUSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before restarting a service. This
+                                is activated with the first
+                                <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+                                call with "WATCHDOG=1". If the time
+                                between two such calls is larger than
+                                the configured time then the service
+                                is restarted.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>WatchdogRebootUSec=</varname></term>
+                                <listitem><para>Configures the time to
+                                wait before rebooting the system. This
+                                is basically the same as
+                                <varname>WatchdogRestartUSec=</varname>
+                                but the whole system is rebooted
+                                instead of just restarting the
+                                service. The typical use-case is to
+                                set this to
+                                <varname>WatchdogRestartUSec</varname>
+                                + <varname>TimeoutSec</varname> to
+                                reboot in case the service restart
+                                fails.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Restart=</varname></term>
                                 <listitem><para>Configures whether the
                                 main service process shall be
diff --git a/src/dbus-service.c b/src/dbus-service.c
index 5a740de..34eabdf 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRestartUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogRebootUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
@@ -106,6 +108,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
                 { "org.freedesktop.systemd1.Service", "NotifyAccess",           bus_service_append_notify_access, "s", &u->service.notify_access       },
                 { "org.freedesktop.systemd1.Service", "RestartUSec",            bus_property_append_usec,   "t", &u->service.restart_usec              },
                 { "org.freedesktop.systemd1.Service", "TimeoutUSec",            bus_property_append_usec,   "t", &u->service.timeout_usec              },
+                { "org.freedesktop.systemd1.Service", "WatchdogRestartUSec",    bus_property_append_usec,   "t", &u->service.watchdog_restart_usec     },
+                { "org.freedesktop.systemd1.Service", "WatchdogRebootUSec",     bus_property_append_usec,   "t", &u->service.watchdog_reboot_usec      },
                 { "org.freedesktop.systemd1.Service", "WatchdogTimestamp",      bus_property_append_usec,   "t", &u->service.watchdog_timestamp.realtime },
                 { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
                 BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE],  "ExecStartPre"),
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index 41797d2..cde21d2 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -131,6 +131,8 @@ Service.ExecStop,                config_parse_exec,                  SERVICE_EXE
 Service.ExecStopPost,            config_parse_exec,                  SERVICE_EXEC_STOP_POST,        offsetof(Service, exec_command)
 Service.RestartSec,              config_parse_usec,                  0,                             offsetof(Service, restart_usec)
 Service.TimeoutSec,              config_parse_usec,                  0,                             offsetof(Service, timeout_usec)
+Service.WatchdogRestartSec,      config_parse_usec,                  0,                             offsetof(Service, watchdog_restart_usec)
+Service.WatchdogRebootSec,       config_parse_usec,                  0,                             offsetof(Service, watchdog_reboot_usec)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
diff --git a/src/service.c b/src/service.c
index 0d68d8d..b0c775c 100644
--- a/src/service.c
+++ b/src/service.c
@@ -112,6 +112,11 @@ static void service_init(Unit *u) {
 
         s->timeout_usec = DEFAULT_TIMEOUT_USEC;
         s->restart_usec = DEFAULT_RESTART_USEC;
+
+        s->watchdog_restart_usec = DEFAULT_TIMEOUT_USEC;
+        s->watchdog_restart_watch.type = WATCH_INVALID;
+        s->watchdog_reboot_watch.type = WATCH_INVALID;
+
         s->timer_watch.type = WATCH_INVALID;
 #ifdef HAVE_SYSV_COMPAT
         s->sysv_start_priority = -1;
@@ -197,14 +202,25 @@ static void service_connection_unref(Service *s) {
 static void service_stop_watchdog(Service *s) {
         assert(s);
 
+        unit_unwatch_timer(UNIT(s), &s->watchdog_restart_watch);
+        unit_unwatch_timer(UNIT(s), &s->watchdog_reboot_watch);
         s->watchdog_timestamp.realtime = 0;
         s->watchdog_timestamp.monotonic = 0;
 }
 
 static void service_reset_watchdog(Service *s) {
+        int r;
         assert(s);
 
         dual_timestamp_get(&s->watchdog_timestamp);
+        if (s->watchdog_restart_usec) {
+                if ((r = unit_watch_timer(UNIT(s), s->watchdog_restart_usec, &s->watchdog_restart_watch)) < 0)
+                        log_warning("%s failed to install watchdog restart timer: %s", s->meta.id, strerror(-r));
+        }
+        if (s->watchdog_restart_usec) {
+                if ((r = unit_watch_timer(UNIT(s), s->watchdog_reboot_usec, &s->watchdog_reboot_watch)) < 0)
+                        log_warning("%s failed to install watchdog reboot timer: %s", s->meta.id, strerror(-r));
+        }
 }
 
 static void service_done(Unit *u) {
@@ -2829,6 +2845,17 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         assert(s);
         assert(elapsed == 1);
 
+        if (w == &s->watchdog_restart_watch) {
+                log_error("%s watchdog timeout: restarting service...", u->meta.id);
+                manager_add_job(u->meta.manager, JOB_RESTART, u, JOB_FAIL, true, 0, 0);
+                return;
+        }
+        if (w == &s->watchdog_reboot_watch) {
+                log_error("%s watchdog timeout: rebooting...", u->meta.id);
+                manager_add_job_by_name(u->meta.manager, JOB_START, "reboot.target", JOB_REPLACE, true, 0, 0);
+                return;
+        }
+
         assert(w == &s->timer_watch);
 
         switch (s->state) {
diff --git a/src/service.h b/src/service.h
index 3801e84..cab18c2 100644
--- a/src/service.h
+++ b/src/service.h
@@ -99,6 +99,10 @@ struct Service {
         usec_t timeout_usec;
 
         dual_timestamp watchdog_timestamp;
+        usec_t watchdog_restart_usec;
+        usec_t watchdog_reboot_usec;
+        Watch watchdog_restart_watch;
+        Watch watchdog_reboot_watch;
 
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
-- 
1.7.7



More information about the systemd-devel mailing list