[systemd-devel] [PATCH] core: send sigabrt on watchdog timeout

Umut Tezduyar Lindskog umut.tezduyar at axis.com
Sat Oct 25 14:58:34 PDT 2014


if sigabrt doesn't do the job, follow regular shutdown
routine, sigterm > sigkill.

Umut:
- I have done basic testing with suppressing sigabrt,
  suppressing both sigabrt & sigterm on client application.
- Documentation needs to be updated. I wasn't sure if we
should mention it in KillMode, KillSignal or WatchdogSec
---
 TODO               |  2 --
 src/core/busname.c |  1 +
 src/core/mount.c   |  1 +
 src/core/scope.c   |  1 +
 src/core/service.c | 38 +++++++++++++++++++++++++++-----------
 src/core/service.h |  1 +
 src/core/socket.c  |  1 +
 src/core/swap.c    |  1 +
 src/core/unit.c    |  3 ++-
 src/core/unit.h    |  2 +-
 10 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/TODO b/TODO
index ed00661..08f8d20 100644
--- a/TODO
+++ b/TODO
@@ -54,8 +54,6 @@ Features:
 
 * consider showing the unit names during boot up in the status output, not just the unit descriptions
 
-* send SIGABRT when a service watchdog is triggered, by default, so that we acquire a backtrace of the hang.
-
 * shouldn't RouteMetric= in networkd's [DHCP] section move to [Network]?
 
 * dhcp: do we allow configuring dhcp routes on interfaces that are not the one we got the dhcp info from?
diff --git a/src/core/busname.c b/src/core/busname.c
index 22d2a6d..c97b0a8 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -447,6 +447,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f
         r = unit_kill_context(UNIT(n),
                               &kill_context,
                               state != BUSNAME_SIGTERM,
+                              false,
                               -1,
                               n->control_pid,
                               false);
diff --git a/src/core/mount.c b/src/core/mount.c
index e284357..4b3a73b 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -776,6 +776,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
                         UNIT(m),
                         &m->kill_context,
                         state != MOUNT_MOUNTING_SIGTERM && state != MOUNT_UNMOUNTING_SIGTERM && state != MOUNT_REMOUNTING_SIGTERM,
+                        false,
                         -1,
                         m->control_pid,
                         false);
diff --git a/src/core/scope.c b/src/core/scope.c
index e8f9e8d..4745d15 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -244,6 +244,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
                                 UNIT(s),
                                 &s->kill_context,
                                 state != SCOPE_STOP_SIGTERM,
+                                false,
                                 -1, -1, false);
                 if (r < 0)
                         goto fail;
diff --git a/src/core/service.c b/src/core/service.c
index d160c4e..76618ff 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -56,6 +56,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
         [SERVICE_EXITED] = UNIT_ACTIVE,
         [SERVICE_RELOAD] = UNIT_RELOADING,
         [SERVICE_STOP] = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
         [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
         [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
         [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
@@ -76,6 +77,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
         [SERVICE_EXITED] = UNIT_ACTIVE,
         [SERVICE_RELOAD] = UNIT_RELOADING,
         [SERVICE_STOP] = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING,
         [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
         [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
         [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
@@ -663,7 +665,7 @@ static void service_set_state(Service *s, ServiceState state) {
                     SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                     SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                    SERVICE_STOP_POST,
+                    SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
                     SERVICE_AUTO_RESTART))
                 s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -672,7 +674,7 @@ static void service_set_state(Service *s, ServiceState state) {
                     SERVICE_START, SERVICE_START_POST,
                     SERVICE_RUNNING, SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                    SERVICE_STOP_POST,
+                    SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
                 service_unwatch_main_pid(s);
                 s->main_command = NULL;
@@ -682,7 +684,7 @@ static void service_set_state(Service *s, ServiceState state) {
                     SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                     SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                    SERVICE_STOP_POST,
+                    SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
                 service_unwatch_control_pid(s);
                 s->control_command = NULL;
@@ -696,7 +698,7 @@ static void service_set_state(Service *s, ServiceState state) {
                     SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                     SERVICE_RUNNING, SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
-                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
+                    SERVICE_STOP_SIGABRT, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
             !(state == SERVICE_DEAD && UNIT(s)->job)) {
                 service_close_socket_fd(s);
                 service_connection_unref(s);
@@ -750,7 +752,7 @@ static int service_coldplug(Unit *u) {
                            SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                            SERVICE_RELOAD,
                            SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                           SERVICE_STOP_POST,
+                           SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                            SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
 
                         usec_t k;
@@ -779,7 +781,7 @@ static int service_coldplug(Unit *u) {
                             SERVICE_START, SERVICE_START_POST,
                             SERVICE_RUNNING, SERVICE_RELOAD,
                             SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                            SERVICE_STOP_POST,
+                            SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                             SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
                         r = unit_watch_pid(UNIT(s), s->main_pid);
                         if (r < 0)
@@ -791,7 +793,7 @@ static int service_coldplug(Unit *u) {
                            SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                            SERVICE_RELOAD,
                            SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
-                           SERVICE_STOP_POST,
+                           SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
                            SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
                         r = unit_watch_pid(UNIT(s), s->control_pid);
                         if (r < 0)
@@ -1181,7 +1183,9 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
         r = unit_kill_context(
                         UNIT(s),
                         &s->kill_context,
-                        state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM,
+                        state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM &&
+                        state != SERVICE_STOP_SIGABRT,
+                        state == SERVICE_STOP_SIGABRT,
                         s->main_pid,
                         s->control_pid,
                         s->main_pid_alien);
@@ -1197,7 +1201,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
                 }
 
                 service_set_state(s, state);
-        } else if (state == SERVICE_STOP_SIGTERM)
+        } else if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGABRT)
                 service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
         else if (state == SERVICE_STOP_SIGKILL)
                 service_enter_stop_post(s, SERVICE_SUCCESS);
@@ -1211,7 +1215,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
 fail:
         log_warning_unit(UNIT(s)->id, "%s failed to kill processes: %s", UNIT(s)->id, strerror(-r));
 
-        if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL)
+        if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL ||
+            state == SERVICE_STOP_SIGABRT)
                 service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES);
         else
                 service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
@@ -1637,6 +1642,7 @@ static int service_start(Unit *u) {
         /* We cannot fulfill this request right now, try again later
          * please! */
         if (s->state == SERVICE_STOP ||
+            s->state == SERVICE_STOP_SIGABRT ||
             s->state == SERVICE_STOP_SIGTERM ||
             s->state == SERVICE_STOP_SIGKILL ||
             s->state == SERVICE_STOP_POST ||
@@ -1695,6 +1701,7 @@ static int service_stop(Unit *u) {
 
         /* Already on it */
         if (s->state == SERVICE_STOP ||
+            s->state == SERVICE_STOP_SIGABRT ||
             s->state == SERVICE_STOP_SIGTERM ||
             s->state == SERVICE_STOP_SIGKILL ||
             s->state == SERVICE_STOP_POST ||
@@ -2126,6 +2133,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
                 service_enter_running(s, SERVICE_SUCCESS);
                 break;
 
+        case SERVICE_STOP_SIGABRT:
         case SERVICE_STOP_SIGTERM:
         case SERVICE_STOP_SIGKILL:
 
@@ -2252,6 +2260,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 service_enter_running(s, f);
                                 break;
 
+                        case SERVICE_STOP_SIGABRT:
                         case SERVICE_STOP_SIGTERM:
                         case SERVICE_STOP_SIGKILL:
 
@@ -2392,6 +2401,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
                                 break;
 
+                        case SERVICE_STOP_SIGABRT:
                         case SERVICE_STOP_SIGTERM:
                         case SERVICE_STOP_SIGKILL:
                                 if (main_pid_good(s) <= 0)
@@ -2461,6 +2471,12 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
                 service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
                 break;
 
+        case SERVICE_STOP_SIGABRT:
+                log_warning_unit(UNIT(s)->id,
+                                 "%s stop-sigabrt timed out. Terminating.", UNIT(s)->id);
+                service_enter_signal(s, SERVICE_STOP_SIGTERM, s->result);
+                break;
+
         case SERVICE_STOP_SIGTERM:
                 if (s->kill_context.send_sigkill) {
                         log_warning_unit(UNIT(s)->id, "%s stop-sigterm timed out. Killing.", UNIT(s)->id);
@@ -2528,7 +2544,7 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
         log_error_unit(UNIT(s)->id, "%s watchdog timeout (limit %s)!", UNIT(s)->id,
                        format_timespan(t, sizeof(t), s->watchdog_usec, 1));
 
-        service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_WATCHDOG);
+        service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG);
 
         return 0;
 }
diff --git a/src/core/service.h b/src/core/service.h
index 0db0c4d..54fbe46 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -39,6 +39,7 @@ typedef enum ServiceState {
         SERVICE_EXITED,            /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
         SERVICE_RELOAD,
         SERVICE_STOP,              /* No STOP_PRE state, instead just register multiple STOP executables */
+        SERVICE_STOP_SIGABRT,      /* Watchdog timeout */
         SERVICE_STOP_SIGTERM,
         SERVICE_STOP_SIGKILL,
         SERVICE_STOP_POST,
diff --git a/src/core/socket.c b/src/core/socket.c
index 9004cb4..70ff481 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1579,6 +1579,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
                         UNIT(s),
                         &s->kill_context,
                         state != SOCKET_STOP_PRE_SIGTERM && state != SOCKET_FINAL_SIGTERM,
+                        false,
                         -1,
                         s->control_pid,
                         false);
diff --git a/src/core/swap.c b/src/core/swap.c
index b2ca048..3448c1e 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -711,6 +711,7 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
                         UNIT(s),
                         &s->kill_context,
                         state != SWAP_ACTIVATING_SIGTERM && state != SWAP_DEACTIVATING_SIGTERM,
+                        false,
                         -1,
                         s->control_pid,
                         false);
diff --git a/src/core/unit.c b/src/core/unit.c
index e40e6f2..66804c9 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3305,6 +3305,7 @@ int unit_kill_context(
                 Unit *u,
                 KillContext *c,
                 bool sigkill,
+                bool sigabrt,
                 pid_t main_pid,
                 pid_t control_pid,
                 bool main_pid_alien) {
@@ -3317,7 +3318,7 @@ int unit_kill_context(
         if (c->kill_mode == KILL_NONE)
                 return 0;
 
-        sig = sigkill ? SIGKILL : c->kill_signal;
+        sig = sigkill ? SIGKILL : (sigabrt ? SIGABRT : c->kill_signal);
 
         if (main_pid > 0) {
                 r = kill_and_sigcont(main_pid, sig);
diff --git a/src/core/unit.h b/src/core/unit.h
index 43ab4d1..b4b8fa1 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -572,7 +572,7 @@ int unit_write_drop_in_private_format(Unit *u, UnitSetPropertiesMode mode, const
 
 int unit_remove_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name);
 
-int unit_kill_context(Unit *u, KillContext *c, bool sigkill, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
+int unit_kill_context(Unit *u, KillContext *c, bool sigkill, bool sigabrt, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
 
 int unit_make_transient(Unit *u);
 
-- 
2.1.1



More information about the systemd-devel mailing list