[systemd-devel] [PATCH 1/3] service: add watchdog timestamp

Michael Olbrich m.olbrich at pengutronix.de
Fri Nov 4 07:43:26 PDT 2011


On Fri, Nov 04, 2011 at 03:38:40PM +0100, Michael Olbrich wrote:
> This patch adds WatchdogTimestamp[Monotonic] to the systemd service
> D-Bus API. The timestamp is updated to the current time when the
> service calls 'sd_nofity("WATCHDOG=1\n")'.
> Using a timestamp instead of an 'alive' flag has two advantages:
> 1. No timeout is needed to define when a service is no longer alive.
>    This simplifies both configuration (no timeout value) and
>    implementation (no timeout event).
> 2. It is more robust. A 'dead' service might not be detected should
>     systemd 'forget' to reset an 'alive' flag. It is much less likely
>     to get a valid new timestamp if a service died.
> ---

Hmmm, somehow git didn't let me annotate the patches, so I'll do it like
this.

Documentation updated as requested. I've also added WatchdogTimestamp to
bus_service_invalidating_properties. Is this correct or is there another
way to handle property updates?

Michael

>  man/sd_notify.xml  |   12 ++++++++++++
>  src/dbus-service.c |    5 +++++
>  src/sd-daemon.h    |    5 +++++
>  src/service.c      |   20 ++++++++++++++++++++
>  src/service.h      |    2 ++
>  5 files changed, 44 insertions(+), 0 deletions(-)
> 
> diff --git a/man/sd_notify.xml b/man/sd_notify.xml
> index dd0ba93..4a81d7a 100644
> --- a/man/sd_notify.xml
> +++ b/man/sd_notify.xml
> @@ -151,6 +151,18 @@
>                                  itself. Example:
>                                  "MAINPID=4711"</para></listitem>
>                          </varlistentry>
> +
> +                        <varlistentry>
> +                                <term>WATCHDOG=1</term>
> +
> +                                <listitem><para>Tells systemd to
> +                                update the watchdog timestamp.
> +                                Services using this feature should do
> +                                this in regular intervals. A watchdog
> +                                framework can use the timestamps to
> +                                detect failed
> +                                services.</para></listitem>
> +                        </varlistentry>
>                  </variablelist>
>  
>                  <para>It is recommended to prefix variable names that
> diff --git a/src/dbus-service.c b/src/dbus-service.c
> index 3486623..2f53484 100644
> --- a/src/dbus-service.c
> +++ b/src/dbus-service.c
> @@ -43,6 +43,8 @@
>          "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
>          "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
>          "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
> +        "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
> +        "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
>          BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
>          BUS_EXEC_COMMAND_INTERFACE("ExecStart")                         \
>          BUS_EXEC_COMMAND_INTERFACE("ExecStartPost")                     \
> @@ -87,6 +89,7 @@ const char bus_service_invalidating_properties[] =
>          "ExecStop\0"
>          "ExecStopPost\0"
>          "ExecMain\0"
> +        "WatchdogTimestamp\0"
>          "MainPID\0"
>          "ControlPID\0"
>          "StatusText\0";
> @@ -104,6 +107,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
>                  { "org.freedesktop.systemd1.Service", "NotifyAccess",           bus_service_append_notify_access, "s", &u->service.notify_access       },
>                  { "org.freedesktop.systemd1.Service", "RestartUSec",            bus_property_append_usec,   "t", &u->service.restart_usec              },
>                  { "org.freedesktop.systemd1.Service", "TimeoutUSec",            bus_property_append_usec,   "t", &u->service.timeout_usec              },
> +                { "org.freedesktop.systemd1.Service", "WatchdogTimestamp",      bus_property_append_usec,   "t", &u->service.watchdog_timestamp.realtime },
> +                { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
>                  BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE],  "ExecStartPre"),
>                  BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START],      "ExecStart"),
>                  BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_POST], "ExecStartPost"),
> diff --git a/src/sd-daemon.h b/src/sd-daemon.h
> index 46dc7fd..17536f7 100644
> --- a/src/sd-daemon.h
> +++ b/src/sd-daemon.h
> @@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path);
>       MAINPID=...  The main pid of a daemon, in case systemd did not
>                    fork off the process itself. Example: "MAINPID=4711"
>  
> +     WATCHDOG=1   Tells systemd to update the watchdog timestamp.
> +                  Services using this feature should do this in
> +                  regular intervals. A watchdog framework can use the
> +                  timestamps to detect failed services.
> +
>    Daemons can choose to send additional variables. However, it is
>    recommended to prefix variable names not listed above with X_.
>  
> diff --git a/src/service.c b/src/service.c
> index eb475d9..2bd47da 100644
> --- a/src/service.c
> +++ b/src/service.c
> @@ -194,6 +194,19 @@ static void service_connection_unref(Service *s) {
>          s->accept_socket = NULL;
>  }
>  
> +static void service_stop_watchdog(Service *s) {
> +        assert(s);
> +
> +        s->watchdog_timestamp.realtime = 0;
> +        s->watchdog_timestamp.monotonic = 0;
> +}
> +
> +static void service_reset_watchdog(Service *s) {
> +        assert(s);
> +
> +        dual_timestamp_get(&s->watchdog_timestamp);
> +}
> +
>  static void service_done(Unit *u) {
>          Service *s = SERVICE(u);
>  
> @@ -1523,6 +1536,9 @@ static void service_set_state(Service *s, ServiceState state) {
>                  service_connection_unref(s);
>          }
>  
> +        if (state == SERVICE_STOP)
> +                service_stop_watchdog(s);
> +
>          /* For the inactive states unit_notify() will trim the cgroup,
>           * but for exit we have to do that ourselves... */
>          if (state == SERVICE_EXITED && s->meta.manager->n_reloading <= 0)
> @@ -2994,6 +3010,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
>                  }
>  
>          }
> +        if (strv_find(tags, "WATCHDOG=1")) {
> +                log_debug("%s: got WATCHDOG=1", u->meta.id);
> +                service_reset_watchdog(s);
> +        }
>  
>          /* Notify clients about changed status or main pid */
>          unit_add_to_dbus_queue(u);
> diff --git a/src/service.h b/src/service.h
> index e28f74b..3801e84 100644
> --- a/src/service.h
> +++ b/src/service.h
> @@ -98,6 +98,8 @@ struct Service {
>          usec_t restart_usec;
>          usec_t timeout_usec;
>  
> +        dual_timestamp watchdog_timestamp;
> +
>          ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
>          ExecContext exec_context;
>  
> -- 
> 1.7.7
> 
> _______________________________________________
> systemd-devel mailing list
> systemd-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/systemd-devel
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |


More information about the systemd-devel mailing list