[systemd-devel] [PATCH REPOST 1/4] service: add watchdog timestamp
Michael Olbrich
m.olbrich at pengutronix.de
Thu Jan 5 07:00:25 PST 2012
This patch adds WatchdogTimestamp[Monotonic] to the systemd service
D-Bus API. The timestamp is updated to the current time when the
service calls 'sd_nofity("WATCHDOG=1\n")'.
Using a timestamp instead of an 'alive' flag has two advantages:
1. No timeout is needed to define when a service is no longer alive.
This simplifies both configuration (no timeout value) and
implementation (no timeout event).
2. It is more robust. A 'dead' service might not be detected should
systemd 'forget' to reset an 'alive' flag. It is much less likely
to get a valid new timestamp if a service died.
---
man/sd_notify.xml | 12 ++++++++++++
src/dbus-service.c | 5 +++++
src/sd-daemon.h | 5 +++++
src/service.c | 20 ++++++++++++++++++++
src/service.h | 2 ++
5 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/man/sd_notify.xml b/man/sd_notify.xml
index c3791ce..1f7160f 100644
--- a/man/sd_notify.xml
+++ b/man/sd_notify.xml
@@ -151,6 +151,18 @@
itself. Example:
"MAINPID=4711"</para></listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>WATCHDOG=1</term>
+
+ <listitem><para>Tells systemd to
+ update the watchdog timestamp.
+ Services using this feature should do
+ this in regular intervals. A watchdog
+ framework can use the timestamps to
+ detect failed
+ services.</para></listitem>
+ </varlistentry>
</variablelist>
<para>It is recommended to prefix variable names that
diff --git a/src/dbus-service.c b/src/dbus-service.c
index 3486623..2f53484 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
" <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
" <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
" <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+ " <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
+ " <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \
BUS_EXEC_COMMAND_INTERFACE("ExecStart") \
BUS_EXEC_COMMAND_INTERFACE("ExecStartPost") \
@@ -87,6 +89,7 @@ const char bus_service_invalidating_properties[] =
"ExecStop\0"
"ExecStopPost\0"
"ExecMain\0"
+ "WatchdogTimestamp\0"
"MainPID\0"
"ControlPID\0"
"StatusText\0";
@@ -104,6 +107,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
{ "org.freedesktop.systemd1.Service", "NotifyAccess", bus_service_append_notify_access, "s", &u->service.notify_access },
{ "org.freedesktop.systemd1.Service", "RestartUSec", bus_property_append_usec, "t", &u->service.restart_usec },
{ "org.freedesktop.systemd1.Service", "TimeoutUSec", bus_property_append_usec, "t", &u->service.timeout_usec },
+ { "org.freedesktop.systemd1.Service", "WatchdogTimestamp", bus_property_append_usec, "t", &u->service.watchdog_timestamp.realtime },
+ { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE], "ExecStartPre"),
BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START], "ExecStart"),
BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_POST], "ExecStartPost"),
diff --git a/src/sd-daemon.h b/src/sd-daemon.h
index 46dc7fd..17536f7 100644
--- a/src/sd-daemon.h
+++ b/src/sd-daemon.h
@@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path);
MAINPID=... The main pid of a daemon, in case systemd did not
fork off the process itself. Example: "MAINPID=4711"
+ WATCHDOG=1 Tells systemd to update the watchdog timestamp.
+ Services using this feature should do this in
+ regular intervals. A watchdog framework can use the
+ timestamps to detect failed services.
+
Daemons can choose to send additional variables. However, it is
recommended to prefix variable names not listed above with X_.
diff --git a/src/service.c b/src/service.c
index feecbbe..4093cc7 100644
--- a/src/service.c
+++ b/src/service.c
@@ -194,6 +194,19 @@ static void service_connection_unref(Service *s) {
s->accept_socket = NULL;
}
+static void service_stop_watchdog(Service *s) {
+ assert(s);
+
+ s->watchdog_timestamp.realtime = 0;
+ s->watchdog_timestamp.monotonic = 0;
+}
+
+static void service_reset_watchdog(Service *s) {
+ assert(s);
+
+ dual_timestamp_get(&s->watchdog_timestamp);
+}
+
static void service_done(Unit *u) {
Service *s = SERVICE(u);
@@ -1539,6 +1552,9 @@ static void service_set_state(Service *s, ServiceState state) {
service_connection_unref(s);
}
+ if (state == SERVICE_STOP)
+ service_stop_watchdog(s);
+
/* For the inactive states unit_notify() will trim the cgroup,
* but for exit we have to do that ourselves... */
if (state == SERVICE_EXITED && s->meta.manager->n_reloading <= 0)
@@ -3129,6 +3145,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
}
}
+ if (strv_find(tags, "WATCHDOG=1")) {
+ log_debug("%s: got WATCHDOG=1", u->meta.id);
+ service_reset_watchdog(s);
+ }
/* Notify clients about changed status or main pid */
unit_add_to_dbus_queue(u);
diff --git a/src/service.h b/src/service.h
index 2102826..bee0e9e 100644
--- a/src/service.h
+++ b/src/service.h
@@ -99,6 +99,8 @@ struct Service {
usec_t restart_usec;
usec_t timeout_usec;
+ dual_timestamp watchdog_timestamp;
+
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
ExecContext exec_context;
--
1.7.7.3
More information about the systemd-devel
mailing list