[systemd-devel] [PATCH 1/4] WIP: service: add watchdog timestamp

Michael Olbrich m.olbrich at pengutronix.de
Mon Oct 24 09:04:02 PDT 2011


This patch adds WatchdogTimestamp[Monotonic] to the systemd service
D-Bus API. The timestamp is updated to the current time when the
service calls 'sd_nofity("WATCHDOG=1\n")'.
Using a timestamp instead of an 'alive' flag has two advantages:
1. No timeout is needed to define when a service is no longer alive.
   This simplifies both configuration (no timeout value) and
   implementation (no timeout event).
2. It is more robust. A 'dead' service might not be detected should
    systemd 'forget' to reset an 'alive' flag. It is much less likely
    to get a valid new timestamp if a service died.
---
 src/dbus-service.c |    4 ++++
 src/sd-daemon.h    |    5 +++++
 src/service.c      |   20 ++++++++++++++++++++
 src/service.h      |    2 ++
 4 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/src/dbus-service.c b/src/dbus-service.c
index 3486623..5a740de 100644
--- a/src/dbus-service.c
+++ b/src/dbus-service.c
@@ -43,6 +43,8 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
         BUS_EXEC_COMMAND_INTERFACE("ExecStart")                         \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPost")                     \
@@ -104,6 +106,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
                 { "org.freedesktop.systemd1.Service", "NotifyAccess",           bus_service_append_notify_access, "s", &u->service.notify_access       },
                 { "org.freedesktop.systemd1.Service", "RestartUSec",            bus_property_append_usec,   "t", &u->service.restart_usec              },
                 { "org.freedesktop.systemd1.Service", "TimeoutUSec",            bus_property_append_usec,   "t", &u->service.timeout_usec              },
+                { "org.freedesktop.systemd1.Service", "WatchdogTimestamp",      bus_property_append_usec,   "t", &u->service.watchdog_timestamp.realtime },
+                { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
                 BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE],  "ExecStartPre"),
                 BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START],      "ExecStart"),
                 BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_POST], "ExecStartPost"),
diff --git a/src/sd-daemon.h b/src/sd-daemon.h
index 46dc7fd..17536f7 100644
--- a/src/sd-daemon.h
+++ b/src/sd-daemon.h
@@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path);
      MAINPID=...  The main pid of a daemon, in case systemd did not
                   fork off the process itself. Example: "MAINPID=4711"
 
+     WATCHDOG=1   Tells systemd to update the watchdog timestamp.
+                  Services using this feature should do this in
+                  regular intervals. A watchdog framework can use the
+                  timestamps to detect failed services.
+
   Daemons can choose to send additional variables. However, it is
   recommended to prefix variable names not listed above with X_.
 
diff --git a/src/service.c b/src/service.c
index 6184390..0d68d8d 100644
--- a/src/service.c
+++ b/src/service.c
@@ -194,6 +194,19 @@ static void service_connection_unref(Service *s) {
         s->accept_socket = NULL;
 }
 
+static void service_stop_watchdog(Service *s) {
+        assert(s);
+
+        s->watchdog_timestamp.realtime = 0;
+        s->watchdog_timestamp.monotonic = 0;
+}
+
+static void service_reset_watchdog(Service *s) {
+        assert(s);
+
+        dual_timestamp_get(&s->watchdog_timestamp);
+}
+
 static void service_done(Unit *u) {
         Service *s = SERVICE(u);
 
@@ -1522,6 +1535,9 @@ static void service_set_state(Service *s, ServiceState state) {
                 service_connection_unref(s);
         }
 
+        if (state == SERVICE_STOP)
+                service_stop_watchdog(s);
+
         /* For the inactive states unit_notify() will trim the cgroup,
          * but for exit we have to do that ourselves... */
         if (state == SERVICE_EXITED && s->meta.manager->n_reloading <= 0)
@@ -2993,6 +3009,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
                 }
 
         }
+        if (strv_find(tags, "WATCHDOG=1")) {
+                log_debug("%s: got WATCHDOG=1", u->meta.id);
+                service_reset_watchdog(s);
+        }
 
         /* Notify clients about changed status or main pid */
         unit_add_to_dbus_queue(u);
diff --git a/src/service.h b/src/service.h
index e28f74b..3801e84 100644
--- a/src/service.h
+++ b/src/service.h
@@ -98,6 +98,8 @@ struct Service {
         usec_t restart_usec;
         usec_t timeout_usec;
 
+        dual_timestamp watchdog_timestamp;
+
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
 
-- 
1.7.7



More information about the systemd-devel mailing list