[systemd-devel] [PATCH v3 2/4] service: add watchdog restart/reboot timeouts
Michael Olbrich
m.olbrich at pengutronix.de
Wed Feb 1 08:17:13 PST 2012
This patch adds the WatchdogRestartSec and WatchdogRebootSec
properties to services. Systemd will restart the service / reboot the
system if the watchdog timeout has not been updated for the configured
amount of time.
This functionality is only enabled if the watchdog timeout is set at
least once.
---
changes in v3:
- fix typo in service_timer_event (reboot vs. restart)
- use now(CLOCK_MONOTONIC) in service_coldplug()
changes in v2:
- adapt to changes from "d200735 dbus: more efficient implementation of properties"
- stop timers in service_done() and restart in service_coldplug()
man/systemd.service.xml | 32 ++++++++++++++++++++++++++++
src/dbus-service.c | 4 +++
src/load-fragment-gperf.gperf.m4 | 2 +
src/service.c | 42 ++++++++++++++++++++++++++++++++++++++
src/service.h | 4 +++
5 files changed, 84 insertions(+), 0 deletions(-)
diff --git a/man/systemd.service.xml b/man/systemd.service.xml
index 0baddd1..48f63ce 100644
--- a/man/systemd.service.xml
+++ b/man/systemd.service.xml
@@ -460,6 +460,38 @@
</varlistentry>
<varlistentry>
+ <term><varname>WatchdogRestartSec=</varname></term>
+ <listitem><para>Configures the time to
+ wait before restarting a service. This
+ is activated with the first
+ <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+ call with "WATCHDOG=1". If the time
+ between two such calls is larger than
+ the configured time then the service
+ is restarted. Defaults to 0s, meaning
+ watchdog triggered restart is
+ disabled.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>WatchdogRebootSec=</varname></term>
+ <listitem><para>Configures the time to
+ wait before rebooting the system. This
+ is basically the same as
+ <varname>WatchdogRestartSec=</varname>
+ but the whole system is rebooted
+ instead of just restarting the
+ service. The typical use-case is to
+ set this to
+ <varname>WatchdogRestartSec</varname>
+ + <varname>TimeoutSec</varname> to
+ reboot in case the service restart
+ fails. Defaults to 0s, meaning
+ watchdog triggered reboot is
+ disabled.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>Restart=</varname></term>
<listitem><para>Configures whether the
main service process shall be
diff --git a/src/dbus-service.c b/src/dbus-service.c
index d7529ec..c219aba 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
" <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
" <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
" <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+ " <property name=\"WatchdogRestartUSec\" type=\"t\" access=\"read\"/>\n" \
+ " <property name=\"WatchdogRebootUSec\" type=\"t\" access=\"read\"/>\n" \
" <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
" <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \
@@ -115,6 +117,8 @@ static const BusProperty bus_service_properties[] = {
{ "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) },
{ "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) },
{ "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) },
+ { "WatchdogRestartUSec", bus_property_append_usec, "t", offsetof(Service, watchdog_restart_usec) },
+ { "WatchdogRebootUSec", bus_property_append_usec, "t", offsetof(Service, watchdog_reboot_usec) },
{ "WatchdogTimestamp", bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.realtime)},
{ "WatchdogTimestampMonotonic",bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.monotonic)},
BUS_EXEC_COMMAND_PROPERTY("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), true ),
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index 14c0606..f4d7523 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -134,6 +134,8 @@ Service.ExecStop, config_parse_exec, SERVICE_EXE
Service.ExecStopPost, config_parse_exec, SERVICE_EXEC_STOP_POST, offsetof(Service, exec_command)
Service.RestartSec, config_parse_usec, 0, offsetof(Service, restart_usec)
Service.TimeoutSec, config_parse_usec, 0, offsetof(Service, timeout_usec)
+Service.WatchdogRestartSec, config_parse_usec, 0, offsetof(Service, watchdog_restart_usec)
+Service.WatchdogRebootSec, config_parse_usec, 0, offsetof(Service, watchdog_reboot_usec)
Service.Type, config_parse_service_type, 0, offsetof(Service, type)
Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
diff --git a/src/service.c b/src/service.c
index e107179..1e9bf98 100644
--- a/src/service.c
+++ b/src/service.c
@@ -112,6 +112,10 @@ static void service_init(Unit *u) {
s->timeout_usec = DEFAULT_TIMEOUT_USEC;
s->restart_usec = DEFAULT_RESTART_USEC;
+
+ s->watchdog_restart_watch.type = WATCH_INVALID;
+ s->watchdog_reboot_watch.type = WATCH_INVALID;
+
s->timer_watch.type = WATCH_INVALID;
#ifdef HAVE_SYSV_COMPAT
s->sysv_start_priority = -1;
@@ -208,14 +212,33 @@ static void service_connection_unref(Service *s) {
static void service_stop_watchdog(Service *s) {
assert(s);
+ unit_unwatch_timer(UNIT(s), &s->watchdog_restart_watch);
+ unit_unwatch_timer(UNIT(s), &s->watchdog_reboot_watch);
s->watchdog_timestamp.realtime = 0;
s->watchdog_timestamp.monotonic = 0;
}
+static void service_setup_watchdog_timer(Service *s, usec_t offset) {
+ int r;
+ assert(s);
+
+ if (s->watchdog_restart_usec) {
+ r = unit_watch_timer(UNIT(s), s->watchdog_restart_usec - offset, &s->watchdog_restart_watch);
+ if (r < 0)
+ log_warning("%s failed to install watchdog restart timer: %s", UNIT(s)->id, strerror(-r));
+ }
+ if (s->watchdog_reboot_usec) {
+ r = unit_watch_timer(UNIT(s), s->watchdog_reboot_usec - offset, &s->watchdog_reboot_watch);
+ if (r < 0)
+ log_warning("%s failed to install watchdog reboot timer: %s", UNIT(s)->id, strerror(-r));
+ }
+}
+
static void service_reset_watchdog(Service *s) {
assert(s);
dual_timestamp_get(&s->watchdog_timestamp);
+ service_setup_watchdog_timer(s, 0);
}
static void service_done(Unit *u) {
@@ -259,6 +282,8 @@ static void service_done(Unit *u) {
unit_ref_unset(&s->accept_socket);
+ service_stop_watchdog(s);
+
unit_unwatch_timer(u, &s->timer_watch);
}
@@ -1566,6 +1591,12 @@ static int service_coldplug(Unit *u) {
service_set_state(s, s->deserialized_state);
}
+ if (dual_timestamp_is_set(&s->watchdog_timestamp)) {
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+ service_setup_watchdog_timer(s, t - s->watchdog_timestamp.monotonic);
+ }
return 0;
}
@@ -2896,6 +2927,17 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
assert(s);
assert(elapsed == 1);
+ if (w == &s->watchdog_restart_watch) {
+ log_error("%s watchdog timeout: restarting service...", u->id);
+ manager_add_job(u->manager, JOB_RESTART, u, JOB_FAIL, true, 0, 0);
+ return;
+ }
+ if (w == &s->watchdog_reboot_watch) {
+ log_error("%s watchdog timeout: rebooting...", u->id);
+ manager_add_job_by_name(u->manager, JOB_START, "reboot.target", JOB_REPLACE, true, 0, 0);
+ return;
+ }
+
assert(w == &s->timer_watch);
switch (s->state) {
diff --git a/src/service.h b/src/service.h
index dbae68b..d6a8290 100644
--- a/src/service.h
+++ b/src/service.h
@@ -101,6 +101,10 @@ struct Service {
usec_t timeout_usec;
dual_timestamp watchdog_timestamp;
+ usec_t watchdog_restart_usec;
+ usec_t watchdog_reboot_usec;
+ Watch watchdog_restart_watch;
+ Watch watchdog_reboot_watch;
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
ExecContext exec_context;
--
1.7.7.3
More information about the systemd-devel
mailing list