[systemd-devel] [PATCH 2/3] service: add watchdog restart/reboot timeouts

Michael Olbrich m.olbrich at pengutronix.de
Fri Nov 4 07:45:16 PDT 2011


On Fri, Nov 04, 2011 at 03:38:41PM +0100, Michael Olbrich wrote:
> This patch adds the WatchdogRestartSec and WatchdogRebootSec
> properties to services. Systemd will restart the service / reboot the
> system if the watchdog timeout has not been updated for the configured
> amount of time.
> This functionality is only enabled if the watchdog timeout is set at
> least once.
> ---

Documentation fixed as requested: it's Sec not USec and I've added comments
about the default value (0 == disabled).

Michael

>  man/systemd.service.xml          |   32 ++++++++++++++++++++++++++++++++
>  src/dbus-service.c               |    4 ++++
>  src/load-fragment-gperf.gperf.m4 |    2 ++
>  src/service.c                    |   28 ++++++++++++++++++++++++++++
>  src/service.h                    |    4 ++++
>  5 files changed, 70 insertions(+), 0 deletions(-)
> 
> diff --git a/man/systemd.service.xml b/man/systemd.service.xml
> index 7b6f12d..6d2d548 100644
> --- a/man/systemd.service.xml
> +++ b/man/systemd.service.xml
> @@ -460,6 +460,38 @@
>                          </varlistentry>
>  
>                          <varlistentry>
> +                                <term><varname>WatchdogRestartSec=</varname></term>
> +                                <listitem><para>Configures the time to
> +                                wait before restarting a service. This
> +                                is activated with the first
> +                                <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
> +                                call with "WATCHDOG=1". If the time
> +                                between two such calls is larger than
> +                                the configured time then the service
> +                                is restarted. Defaults to 0s, meaning
> +                                watchdog triggered restart is
> +                                disabled.</para></listitem>
> +                        </varlistentry>
> +
> +                        <varlistentry>
> +                                <term><varname>WatchdogRebootSec=</varname></term>
> +                                <listitem><para>Configures the time to
> +                                wait before rebooting the system. This
> +                                is basically the same as
> +                                <varname>WatchdogRestartSec=</varname>
> +                                but the whole system is rebooted
> +                                instead of just restarting the
> +                                service. The typical use-case is to
> +                                set this to
> +                                <varname>WatchdogRestartSec</varname>
> +                                + <varname>TimeoutSec</varname> to
> +                                reboot in case the service restart
> +                                fails. Defaults to 0s, meaning
> +                                watchdog triggered reboot is
> +                                disabled.</para></listitem>
> +                        </varlistentry>
> +
> +                        <varlistentry>
>                                  <term><varname>Restart=</varname></term>
>                                  <listitem><para>Configures whether the
>                                  main service process shall be
> diff --git a/src/dbus-service.c b/src/dbus-service.c
> index 2f53484..145667e 100644
> --- a/src/dbus-service.c
> +++ b/src/dbus-service.c
> @@ -43,6 +43,8 @@
>          "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
>          "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
>          "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
> +        "  <property name=\"WatchdogRestartUSec\" type=\"t\" access=\"read\"/>\n" \
> +        "  <property name=\"WatchdogRebootUSec\" type=\"t\" access=\"read\"/>\n" \
>          "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
>          "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
>          BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
> @@ -107,6 +109,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio
>                  { "org.freedesktop.systemd1.Service", "NotifyAccess",           bus_service_append_notify_access, "s", &u->service.notify_access       },
>                  { "org.freedesktop.systemd1.Service", "RestartUSec",            bus_property_append_usec,   "t", &u->service.restart_usec              },
>                  { "org.freedesktop.systemd1.Service", "TimeoutUSec",            bus_property_append_usec,   "t", &u->service.timeout_usec              },
> +                { "org.freedesktop.systemd1.Service", "WatchdogRestartUSec",    bus_property_append_usec,   "t", &u->service.watchdog_restart_usec     },
> +                { "org.freedesktop.systemd1.Service", "WatchdogRebootUSec",     bus_property_append_usec,   "t", &u->service.watchdog_reboot_usec      },
>                  { "org.freedesktop.systemd1.Service", "WatchdogTimestamp",      bus_property_append_usec,   "t", &u->service.watchdog_timestamp.realtime },
>                  { "org.freedesktop.systemd1.Service", "WatchdogTimestampMonotonic",bus_property_append_usec,"t", &u->service.watchdog_timestamp.monotonic },
>                  BUS_EXEC_COMMAND_PROPERTY("org.freedesktop.systemd1.Service", u->service.exec_command[SERVICE_EXEC_START_PRE],  "ExecStartPre"),
> diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
> index 41797d2..cde21d2 100644
> --- a/src/load-fragment-gperf.gperf.m4
> +++ b/src/load-fragment-gperf.gperf.m4
> @@ -131,6 +131,8 @@ Service.ExecStop,                config_parse_exec,                  SERVICE_EXE
>  Service.ExecStopPost,            config_parse_exec,                  SERVICE_EXEC_STOP_POST,        offsetof(Service, exec_command)
>  Service.RestartSec,              config_parse_usec,                  0,                             offsetof(Service, restart_usec)
>  Service.TimeoutSec,              config_parse_usec,                  0,                             offsetof(Service, timeout_usec)
> +Service.WatchdogRestartSec,      config_parse_usec,                  0,                             offsetof(Service, watchdog_restart_usec)
> +Service.WatchdogRebootSec,       config_parse_usec,                  0,                             offsetof(Service, watchdog_reboot_usec)
>  Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
>  Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
>  Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
> diff --git a/src/service.c b/src/service.c
> index 2bd47da..0b4f5f1 100644
> --- a/src/service.c
> +++ b/src/service.c
> @@ -112,6 +112,10 @@ static void service_init(Unit *u) {
>  
>          s->timeout_usec = DEFAULT_TIMEOUT_USEC;
>          s->restart_usec = DEFAULT_RESTART_USEC;
> +
> +        s->watchdog_restart_watch.type = WATCH_INVALID;
> +        s->watchdog_reboot_watch.type = WATCH_INVALID;
> +
>          s->timer_watch.type = WATCH_INVALID;
>  #ifdef HAVE_SYSV_COMPAT
>          s->sysv_start_priority = -1;
> @@ -197,14 +201,27 @@ static void service_connection_unref(Service *s) {
>  static void service_stop_watchdog(Service *s) {
>          assert(s);
>  
> +        unit_unwatch_timer(UNIT(s), &s->watchdog_restart_watch);
> +        unit_unwatch_timer(UNIT(s), &s->watchdog_reboot_watch);
>          s->watchdog_timestamp.realtime = 0;
>          s->watchdog_timestamp.monotonic = 0;
>  }
>  
>  static void service_reset_watchdog(Service *s) {
> +        int r;
>          assert(s);
>  
>          dual_timestamp_get(&s->watchdog_timestamp);
> +        if (s->watchdog_restart_usec) {
> +                r = unit_watch_timer(UNIT(s), s->watchdog_restart_usec, &s->watchdog_restart_watch);
> +                if (r < 0)
> +                        log_warning("%s failed to install watchdog restart timer: %s", s->meta.id, strerror(-r));
> +        }
> +        if (s->watchdog_restart_usec) {
> +                r = unit_watch_timer(UNIT(s), s->watchdog_reboot_usec, &s->watchdog_reboot_watch);
> +                if (r < 0)
> +                        log_warning("%s failed to install watchdog reboot timer: %s", s->meta.id, strerror(-r));
> +        }
>  }
>  
>  static void service_done(Unit *u) {
> @@ -2830,6 +2847,17 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
>          assert(s);
>          assert(elapsed == 1);
>  
> +        if (w == &s->watchdog_restart_watch) {
> +                log_error("%s watchdog timeout: restarting service...", u->meta.id);
> +                manager_add_job(u->meta.manager, JOB_RESTART, u, JOB_FAIL, true, 0, 0);
> +                return;
> +        }
> +        if (w == &s->watchdog_reboot_watch) {
> +                log_error("%s watchdog timeout: rebooting...", u->meta.id);
> +                manager_add_job_by_name(u->meta.manager, JOB_START, "reboot.target", JOB_REPLACE, true, 0, 0);
> +                return;
> +        }
> +
>          assert(w == &s->timer_watch);
>  
>          switch (s->state) {
> diff --git a/src/service.h b/src/service.h
> index 3801e84..cab18c2 100644
> --- a/src/service.h
> +++ b/src/service.h
> @@ -99,6 +99,10 @@ struct Service {
>          usec_t timeout_usec;
>  
>          dual_timestamp watchdog_timestamp;
> +        usec_t watchdog_restart_usec;
> +        usec_t watchdog_reboot_usec;
> +        Watch watchdog_restart_watch;
> +        Watch watchdog_reboot_watch;
>  
>          ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
>          ExecContext exec_context;
> -- 
> 1.7.7
> 
> _______________________________________________
> systemd-devel mailing list
> systemd-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/systemd-devel
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |


More information about the systemd-devel mailing list