[systemd-commits] stable Branch 'v208-stable' - 5 commits - src/core src/login src/run src/shared

Michal Sekletar msekleta at kemper.freedesktop.org
Thu Mar 6 04:22:58 PST 2014


 src/core/dbus-scope.c      |   91 ++++++++++++++++++++++++++---
 src/core/dbus-scope.h      |    2 
 src/core/manager.c         |    2 
 src/core/scope.c           |  105 +++++++++++++++++++++++++--------
 src/core/scope.h           |    5 +
 src/core/service.c         |  140 +++++++++++++++++++++++++++------------------
 src/core/unit.c            |  112 +++++++++++++++++++++++++++++++++++-
 src/core/unit.h            |    9 ++
 src/login/logind-dbus.c    |   65 ++++++++++++--------
 src/login/logind-session.c |  130 ++++++++++++++++++++++++++++++++---------
 src/login/logind-session.h |    4 +
 src/login/logind-user.c    |   23 ++++---
 src/login/logind-user.h    |    1 
 src/login/logind.c         |   23 ++++++-
 src/login/logind.h         |    4 -
 src/login/pam-module.c     |   11 ++-
 src/run/run.c              |    8 ++
 src/shared/dbus-common.c   |   42 +++++++++++++
 src/shared/dbus-common.h   |    2 
 19 files changed, 614 insertions(+), 165 deletions(-)

New commits:
commit a0ef58e3e421909661c615ee6b067a9c2cd9f955
Author: Michal Sekletar <msekleta at redhat.com>
Date:   Tue Mar 4 17:00:54 2014 +0100

    logind: given that we can now relatively safely shutdown sessions copes without working cgroup empty notifications there's no need to set the stop timeout of sessions scopes low
    
    Based-on: a50df72b37ce2a7caf7775c70d18c3f9504b9e80

diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
index c1c3d7f..e1f0c9a 100644
--- a/src/login/logind-dbus.c
+++ b/src/login/logind-dbus.c
@@ -2548,10 +2548,9 @@ int manager_start_scope(
                 DBusError *error,
                 char **job) {
 
-        const char *timeout_stop_property = "TimeoutStopUSec", *send_sighup_property = "SendSIGHUP", *pids_property = "PIDs", *after_property = "After";
+        const char *send_sighup_property = "SendSIGHUP", *pids_property = "PIDs", *after_property = "After";
         _cleanup_dbus_message_unref_ DBusMessage *m = NULL, *reply = NULL;
         DBusMessageIter iter, sub, sub2, sub3, sub4;
-        uint64_t timeout = 500 * USEC_PER_MSEC;
         dbus_bool_t send_sighup = true;
         const char *fail = "fail";
         uint32_t u;
@@ -2631,14 +2630,6 @@ int manager_start_scope(
          * stop timeout for sessions, so that we don't wait
          * forever. */
 
-        if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||
-            !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &timeout_stop_property) ||
-            !dbus_message_iter_open_container(&sub2, DBUS_TYPE_VARIANT, "t", &sub3) ||
-            !dbus_message_iter_append_basic(&sub3, DBUS_TYPE_UINT64, &timeout) ||
-            !dbus_message_iter_close_container(&sub2, &sub3) ||
-            !dbus_message_iter_close_container(&sub, &sub2))
-                return log_oom();
-
         /* Make sure that the session shells are terminated with
          * SIGHUP since bash and friends tend to ignore SIGTERM */
         if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||

commit 1a6dce07f96e63891db22c10acf5b0edc5c4dc81
Author: Michal Sekletar <msekleta at redhat.com>
Date:   Mon Mar 3 16:07:39 2014 +0100

    logind: order all scopes after both systemd-logind.service and systemd-user-sessions.service
    
    This way at shutdown we can be sure that the sessions go away before the
    network.
    
    Based-on: ba4c5d93b73ef7bba0ae0c6bf2b36a42360f7a34

diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
index 63f0d3c..c1c3d7f 100644
--- a/src/login/logind-dbus.c
+++ b/src/login/logind-dbus.c
@@ -2544,10 +2544,11 @@ int manager_start_scope(
                 const char *slice,
                 const char *description,
                 const char *after,
+                const char *after2,
                 DBusError *error,
                 char **job) {
 
-        const char *timeout_stop_property = "TimeoutStopUSec", *send_sighup_property = "SendSIGHUP", *pids_property = "PIDs";
+        const char *timeout_stop_property = "TimeoutStopUSec", *send_sighup_property = "SendSIGHUP", *pids_property = "PIDs", *after_property = "After";
         _cleanup_dbus_message_unref_ DBusMessage *m = NULL, *reply = NULL;
         DBusMessageIter iter, sub, sub2, sub3, sub4;
         uint64_t timeout = 500 * USEC_PER_MSEC;
@@ -2602,8 +2603,6 @@ int manager_start_scope(
         }
 
         if (!isempty(after)) {
-                const char *after_property = "After";
-
                 if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||
                     !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &after_property) ||
                     !dbus_message_iter_open_container(&sub2, DBUS_TYPE_VARIANT, "as", &sub3) ||
@@ -2615,6 +2614,18 @@ int manager_start_scope(
                         return log_oom();
         }
 
+        if (!isempty(after2)) {
+                if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||
+                    !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &after_property) ||
+                    !dbus_message_iter_open_container(&sub2, DBUS_TYPE_VARIANT, "as", &sub3) ||
+                    !dbus_message_iter_open_container(&sub3, DBUS_TYPE_ARRAY, "s", &sub4) ||
+                    !dbus_message_iter_append_basic(&sub4, DBUS_TYPE_STRING, &after2) ||
+                    !dbus_message_iter_close_container(&sub3, &sub4) ||
+                    !dbus_message_iter_close_container(&sub2, &sub3) ||
+                    !dbus_message_iter_close_container(&sub, &sub2))
+                        return log_oom();
+        }
+
         /* cgroup empty notification is not available in containers
          * currently. To make this less problematic, let's shorten the
          * stop timeout for sessions, so that we don't wait
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
index 78e6d74..2bac0a2 100644
--- a/src/login/logind-session.c
+++ b/src/login/logind-session.c
@@ -518,7 +518,7 @@ static int session_start_scope(Session *s) {
                 if (!scope)
                         return log_oom();
 
-                r = manager_start_scope(s->manager, scope, s->leader, s->user->slice, description, "systemd-logind.service", &error, &job);
+                r = manager_start_scope(s->manager, scope, s->leader, s->user->slice, description, "systemd-logind.service", "systemd-user-session.service", &error, &job);
                 if (r < 0) {
                         log_error("Failed to start session scope %s: %s %s",
                                   scope, bus_error(&error, r), error.name);
diff --git a/src/login/logind.h b/src/login/logind.h
index 0d2248f..540572f 100644
--- a/src/login/logind.h
+++ b/src/login/logind.h
@@ -184,7 +184,7 @@ int manager_send_changed(Manager *manager, const char *properties);
 
 int manager_dispatch_delayed(Manager *manager);
 
-int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, const char *after, DBusError *error, char **job);
+int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, const char *after, const char *after2, DBusError *error, char **job);
 int manager_start_unit(Manager *manager, const char *unit, DBusError *error, char **job);
 int manager_stop_unit(Manager *manager, const char *unit, DBusError *error, char **job);
 int manager_abandon_scope(Manager *manager, const char *scope, DBusError *error);

commit 634acdc887a967039233e40b96e4f6ff79f86b72
Author: Michal Sekletar <msekleta at redhat.com>
Date:   Mon Mar 3 15:37:49 2014 +0100

    logind: rework session shutdown logic
    
    Simplify the shutdown logic a bit:
    
    - Keep the session FIFO around in the PAM module, even after the session
      shutdown hook has been finished. This allows logind to track precisely
      when the PAM handler goes away.
    
    - In the ReleaseSession() call start a timer, that will stop terminate
      the session when elapsed.
    
    - Never fiddle with the KillMode of scopes to configure whether user
      processes should be killed or not. Instead, simply leave the scope
      units around when we terminate a session whose processes should not be
      killed.
    
    - When killing is enabled, stop the session scope on FIFO EOF or after
      the ReleaseSession() timeout. When killing is disabled, simply tell
      PID 1 to abandon the scope.
    
    Because the scopes stay around and hence all processes are always member
    of a scope, the system shutdown logic should be more robust, as the
    scopes can be shutdown as part of the usual shutdown logic.
    
    Based-on: 5f41d1f10fd97e93517b6a762b1bec247f4d1171

diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
index 6f3e442..63f0d3c 100644
--- a/src/login/logind-dbus.c
+++ b/src/login/logind-dbus.c
@@ -1746,13 +1746,7 @@ static DBusHandlerResult manager_message_handler(
                 if (!session)
                         return bus_send_error_reply(connection, message, &error, -ENOENT);
 
-                /* We use the FIFO to detect stray sessions where the
-                process invoking PAM dies abnormally. We need to make
-                sure that that process is not killed if at the clean
-                end of the session it closes the FIFO. Hence, with
-                this call explicitly turn off the FIFO logic, so that
-                the PAM code can finish clean up on its own */
-                session_remove_fifo(session);
+                session_release(session);
 
                 reply = dbus_message_new_method_return(message);
                 if (!reply)
@@ -2550,7 +2544,6 @@ int manager_start_scope(
                 const char *slice,
                 const char *description,
                 const char *after,
-                const char *kill_mode,
                 DBusError *error,
                 char **job) {
 
@@ -2622,18 +2615,6 @@ int manager_start_scope(
                         return log_oom();
         }
 
-        if (!isempty(kill_mode)) {
-                const char *kill_mode_property = "KillMode";
-
-                if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) ||
-                    !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &kill_mode_property) ||
-                    !dbus_message_iter_open_container(&sub2, DBUS_TYPE_VARIANT, "s", &sub3) ||
-                    !dbus_message_iter_append_basic(&sub3, DBUS_TYPE_STRING, &kill_mode) ||
-                    !dbus_message_iter_close_container(&sub2, &sub3) ||
-                    !dbus_message_iter_close_container(&sub, &sub2))
-                        return log_oom();
-        }
-
         /* cgroup empty notification is not available in containers
          * currently. To make this less problematic, let's shorten the
          * stop timeout for sessions, so that we don't wait
@@ -2792,6 +2773,36 @@ int manager_stop_unit(Manager *manager, const char *unit, DBusError *error, char
         return 1;
 }
 
+int manager_abandon_scope(Manager *manager, const char *scope, DBusError *error) {
+        _cleanup_dbus_message_unref_ DBusMessage *reply = NULL;
+        _cleanup_free_ char *path = NULL;
+        int r;
+
+        assert(manager);
+        assert(scope);
+
+        path = unit_dbus_path_from_name(scope);
+        if (!path)
+                return -ENOMEM;
+
+        r = bus_method_call_with_reply(
+                manager->bus,
+                "org.freedesktop.systemd1",
+                path,
+                "org.freedesktop.systemd1.Scope",
+                "Abandon",
+                &reply,
+                error,
+                DBUS_TYPE_INVALID);
+
+        if (r < 0) {
+                log_error("Failed to abandon scope %s", scope);
+                return r;
+        }
+
+        return 1;
+}
+
 int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, DBusError *error) {
         _cleanup_dbus_message_unref_ DBusMessage *reply = NULL;
         const char *w;
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
index 27aa335..78e6d74 100644
--- a/src/login/logind-session.c
+++ b/src/login/logind-session.c
@@ -24,6 +24,7 @@
 #include <unistd.h>
 #include <sys/epoll.h>
 #include <fcntl.h>
+#include <sys/timerfd.h>
 
 #include <systemd/sd-id128.h>
 #include <systemd/sd-messages.h>
@@ -36,6 +37,8 @@
 #include "dbus-common.h"
 #include "logind-session.h"
 
+#define RELEASE_SEC 20
+
 static unsigned devt_hash_func(const void *p) {
         uint64_t u = *(const dev_t*)p;
 
@@ -505,7 +508,6 @@ static int session_start_scope(Session *s) {
 
         if (!s->scope) {
                 _cleanup_free_ char *description = NULL;
-                const char *kill_mode;
                 char *scope, *job;
 
                 description = strjoin("Session ", s->id, " of user ", s->user->name, NULL);
@@ -516,9 +518,7 @@ static int session_start_scope(Session *s) {
                 if (!scope)
                         return log_oom();
 
-                kill_mode = manager_shall_kill(s->manager, s->user->name) ? "control-group" : "none";
-
-                r = manager_start_scope(s->manager, scope, s->leader, s->user->slice, description, "systemd-user-sessions.service", kill_mode, &error, &job);
+                r = manager_start_scope(s->manager, scope, s->leader, s->user->slice, description, "systemd-logind.service", &error, &job);
                 if (r < 0) {
                         log_error("Failed to start session scope %s: %s %s",
                                   scope, bus_error(&error, r), error.name);
@@ -579,23 +579,22 @@ int session_start(Session *s) {
 
         s->started = true;
 
-        /* Save session data */
+        /* Save data */
         session_save(s);
         user_save(s->user);
+        if (s->seat)
+                seat_save(s->seat);
 
+        /* Send signals */
         session_send_signal(s, true);
 
         if (s->seat) {
-                seat_save(s->seat);
-
                 if (s->seat->active == s)
                         seat_send_changed(s->seat, "Sessions\0ActiveSession\0");
                 else
                         seat_send_changed(s->seat, "Sessions\0");
         }
 
-        user_send_changed(s->user, "Sessions\0");
-
         return 0;
 }
 
@@ -611,15 +610,24 @@ static int session_stop_scope(Session *s) {
         if (!s->scope)
                 return 0;
 
-        r = manager_stop_unit(s->manager, s->scope, &error, &job);
-        if (r < 0) {
-                log_error("Failed to stop session scope: %s", bus_error(&error, r));
-                dbus_error_free(&error);
-                return r;
-        }
+        if (manager_shall_kill(s->manager, s->user->name)) {
+                r = manager_stop_unit(s->manager, s->scope, &error, &job);
+                if (r < 0) {
+                        log_error("Failed to stop session scope: %s", bus_error(&error, r));
+                        dbus_error_free(&error);
+                        return r;
+                }
 
-        free(s->scope_job);
-        s->scope_job = job;
+                free(s->scope_job);
+                s->scope_job = job;
+        } else {
+                r = manager_abandon_scope(s->manager, s->scope, &error);
+                if (r < 0) {
+                        log_error("Failed to abandon session scope: %s", bus_error(&error, r));
+                        dbus_error_free(&error);
+                        return r;
+                }
+        }
 
         return 0;
 }
@@ -644,6 +652,19 @@ static int session_unlink_x11_socket(Session *s) {
         return r < 0 ? -errno : 0;
 }
 
+static void session_close_timer_fd(Session *s) {
+        assert(s);
+
+        if (s->timer_fd <= 0)
+                return;
+
+        hashmap_remove(s->manager->timer_fds, INT_TO_PTR(s->timer_fd + 1));
+        epoll_ctl(s->manager->epoll_fd, EPOLL_CTL_DEL, s->timer_fd, NULL);
+
+        close_nointr(s->timer_fd);
+        s->timer_fd = -1;
+}
+
 int session_stop(Session *s) {
         int r;
 
@@ -652,11 +673,18 @@ int session_stop(Session *s) {
         if (!s->user)
                 return -ESTALE;
 
+        session_close_timer_fd(s);
+
+        /* We are going down, don't care about FIFOs anymore */
+        session_remove_fifo(s);
+
         /* Kill cgroup */
         r = session_stop_scope(s);
 
         session_save(s);
 
+        s->stopping = true;
+
         return r;
 }
 
@@ -678,6 +706,8 @@ int session_finalize(Session *s) {
                            "MESSAGE=Removed session %s.", s->id,
                            NULL);
 
+        session_close_timer_fd(s);
+
         /* Kill session devices */
         while ((sd = hashmap_first(s->devices)))
                 session_device_free(sd);
@@ -698,16 +728,64 @@ int session_finalize(Session *s) {
                 if (s->seat->active == s)
                         seat_set_active(s->seat, NULL);
 
-                seat_send_changed(s->seat, "Sessions\0");
                 seat_save(s->seat);
+                seat_send_changed(s->seat, "Sessions\0");
         }
 
-        user_send_changed(s->user, "Sessions\0");
         user_save(s->user);
+        user_send_changed(s->user, "Sessions\0");
 
         return r;
 }
 
+void session_release(Session *s) {
+        int r;
+
+        struct itimerspec its = { .it_value.tv_sec = RELEASE_SEC };
+        struct epoll_event ev = {};
+
+        assert(s);
+
+        if (!s->started || s->stopping)
+                return;
+
+        if (s->timer_fd >= 0)
+                return;
+
+        s->timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+        if (s->timer_fd < 0) {
+                log_error("Failed to create session release timer fd");
+                goto out;
+        }
+
+        r = hashmap_put(s->manager->timer_fds, INT_TO_PTR(s->timer_fd + 1), s);
+        if (r < 0) {
+                log_error("Failed to store session release timer fd");
+                goto out;
+        }
+
+        ev.events = EPOLLONESHOT;
+        ev.data.u32 = FD_OTHER_BASE + s->timer_fd;
+
+        r = epoll_ctl(s->manager->epoll_fd, EPOLL_CTL_ADD, s->timer_fd, &ev);
+        if (r < 0) {
+                log_error("Failed to add session release timer fd to epoll instance");
+                goto out;
+        }
+
+        r = timerfd_settime(s->timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
+        if (r < 0) {
+                log_error("Failed to arm timer : %m");
+                goto out;
+        }
+
+out:
+        if (s->timer_fd >= 0) {
+                close_nointr(s->timer_fd);
+                s->timer_fd = -1;
+        }
+}
+
 bool session_is_active(Session *s) {
         assert(s);
 
@@ -904,8 +982,6 @@ void session_remove_fifo(Session *s) {
 }
 
 int session_check_gc(Session *s, bool drop_not_started) {
-        int r;
-
         assert(s);
 
         if (drop_not_started && !s->started)
@@ -915,11 +991,7 @@ int session_check_gc(Session *s, bool drop_not_started) {
                 return 0;
 
         if (s->fifo_fd >= 0) {
-                r = pipe_eof(s->fifo_fd);
-                if (r < 0)
-                        return r;
-
-                if (r == 0)
+                if (pipe_eof(s->fifo_fd) <= 0)
                         return 1;
         }
 
@@ -945,15 +1017,15 @@ void session_add_to_gc_queue(Session *s) {
 SessionState session_get_state(Session *s) {
         assert(s);
 
+        if (s->stopping || s->timer_fd >= 0)
+                return SESSION_CLOSING;
+
         if (s->closing)
                 return SESSION_CLOSING;
 
         if (s->scope_job)
                 return SESSION_OPENING;
 
-        if (s->fifo_fd < 0)
-                return SESSION_CLOSING;
-
         if (session_is_active(s))
                 return SESSION_ACTIVE;
 
diff --git a/src/login/logind-session.h b/src/login/logind-session.h
index f175a89..9b76582 100644
--- a/src/login/logind-session.h
+++ b/src/login/logind-session.h
@@ -98,11 +98,14 @@ struct Session {
         int fifo_fd;
         char *fifo_path;
 
+        int timer_fd;
+
         bool idle_hint;
         dual_timestamp idle_hint_timestamp;
 
         bool in_gc_queue:1;
         bool started:1;
+        bool stopping:1;
         bool closing:1;
 
         DBusMessage *create_message;
@@ -130,6 +133,7 @@ void session_remove_fifo(Session *s);
 int session_start(Session *s);
 int session_stop(Session *s);
 int session_finalize(Session *s);
+void session_release(Session *s);
 int session_save(Session *s);
 int session_load(Session *s);
 int session_kill(Session *s, KillWho who, int signo);
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
index ddf19eb..3aeac13 100644
--- a/src/login/logind-user.c
+++ b/src/login/logind-user.c
@@ -529,6 +529,8 @@ int user_stop(User *u) {
         if (k < 0)
                 r = k;
 
+        u->stopping = true;
+
         user_save(u);
 
         return r;
@@ -650,22 +652,27 @@ void user_add_to_gc_queue(User *u) {
 
 UserState user_get_state(User *u) {
         Session *i;
-        bool all_closing = true;
 
         assert(u);
 
+        if (u->stopping)
+                return USER_CLOSING;
+
         if (u->slice_job || u->service_job)
                 return USER_OPENING;
 
-        LIST_FOREACH(sessions_by_user, i, u->sessions) {
-                if (session_is_active(i))
-                        return USER_ACTIVE;
-                if (session_get_state(i) != SESSION_CLOSING)
-                        all_closing = false;
-        }
+        if (u->sessions) {
+                bool all_closing = true;
+
+                LIST_FOREACH(sessions_by_user, i, u->sessions) {
+                        if (session_is_active(i))
+                                return USER_ACTIVE;
+                        if (session_get_state(i) != SESSION_CLOSING)
+                                all_closing = false;
+                }
 
-        if (u->sessions)
                 return all_closing ? USER_CLOSING : USER_ONLINE;
+        }
 
         if (user_check_linger_file(u) > 0)
                 return USER_LINGERING;
diff --git a/src/login/logind-user.h b/src/login/logind-user.h
index a36f456..a12532e 100644
--- a/src/login/logind-user.h
+++ b/src/login/logind-user.h
@@ -61,6 +61,7 @@ struct User {
 
         bool in_gc_queue:1;
         bool started:1;
+        bool stopping:1;
 
         LIST_HEAD(Session, sessions);
         LIST_FIELDS(User, gc_queue);
diff --git a/src/login/logind.c b/src/login/logind.c
index 0628032..5180be7 100644
--- a/src/login/logind.c
+++ b/src/login/logind.c
@@ -80,10 +80,11 @@ Manager *manager_new(void) {
         m->session_fds = hashmap_new(trivial_hash_func, trivial_compare_func);
         m->inhibitor_fds = hashmap_new(trivial_hash_func, trivial_compare_func);
         m->button_fds = hashmap_new(trivial_hash_func, trivial_compare_func);
+        m->timer_fds = hashmap_new(trivial_hash_func, trivial_compare_func);
 
         if (!m->devices || !m->seats || !m->sessions || !m->users || !m->inhibitors || !m->buttons || !m->busnames ||
             !m->user_units || !m->session_units ||
-            !m->session_fds || !m->inhibitor_fds || !m->button_fds) {
+            !m->session_fds || !m->inhibitor_fds || !m->button_fds || !m->timer_fds) {
                 manager_free(m);
                 return NULL;
         }
@@ -149,6 +150,7 @@ void manager_free(Manager *m) {
         hashmap_free(m->session_fds);
         hashmap_free(m->inhibitor_fds);
         hashmap_free(m->button_fds);
+        hashmap_free(m->timer_fds);
 
         if (m->console_active_fd >= 0)
                 close_nointr_nofail(m->console_active_fd);
@@ -620,6 +622,13 @@ static void manager_dispatch_other(Manager *m, int fd) {
                 return;
         }
 
+        s = hashmap_get(m->timer_fds, INT_TO_PTR(fd + 1));
+        if (s) {
+                assert(s->timer_fd == fd);
+                session_stop(s);
+                return;
+        }
+
         i = hashmap_get(m->inhibitor_fds, INT_TO_PTR(fd + 1));
         if (i) {
                 assert(i->fifo_fd == fd);
@@ -942,8 +951,12 @@ void manager_gc(Manager *m, bool drop_not_started) {
                 LIST_REMOVE(Session, gc_queue, m->session_gc_queue, session);
                 session->in_gc_queue = false;
 
-                if (session_check_gc(session, drop_not_started) == 0) {
+                /* First, if we are not closing yet, initiate stopping */
+                if (!session_check_gc(session, drop_not_started) &&
+                    session_get_state(session) != SESSION_CLOSING)
                         session_stop(session);
+
+                if (!session_check_gc(session, drop_not_started)) {
                         session_finalize(session);
                         session_free(session);
                 }
@@ -953,8 +966,11 @@ void manager_gc(Manager *m, bool drop_not_started) {
                 LIST_REMOVE(User, gc_queue, m->user_gc_queue, user);
                 user->in_gc_queue = false;
 
-                if (user_check_gc(user, drop_not_started) == 0) {
+                if (!user_check_gc(user, drop_not_started) &&
+                    user_get_state(user) != USER_CLOSING)
                         user_stop(user);
+
+                if (!user_check_gc(user, drop_not_started)) {
                         user_finalize(user);
                         user_free(user);
                 }
@@ -1032,6 +1048,7 @@ finish:
 
         return r;
 }
+
 int manager_startup(Manager *m) {
         int r;
         Seat *seat;
diff --git a/src/login/logind.h b/src/login/logind.h
index 9e6296c..0d2248f 100644
--- a/src/login/logind.h
+++ b/src/login/logind.h
@@ -88,6 +88,7 @@ struct Manager {
         Hashmap *session_fds;
         Hashmap *inhibitor_fds;
         Hashmap *button_fds;
+        Hashmap *timer_fds;
 
         usec_t inhibit_delay_max;
 
@@ -183,9 +184,10 @@ int manager_send_changed(Manager *manager, const char *properties);
 
 int manager_dispatch_delayed(Manager *manager);
 
-int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, const char *after, const char *kill_mode, DBusError *error, char **job);
+int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, const char *after, DBusError *error, char **job);
 int manager_start_unit(Manager *manager, const char *unit, DBusError *error, char **job);
 int manager_stop_unit(Manager *manager, const char *unit, DBusError *error, char **job);
+int manager_abandon_scope(Manager *manager, const char *scope, DBusError *error);
 int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, DBusError *error);
 int manager_unit_is_active(Manager *manager, const char *unit);
 
diff --git a/src/login/pam-module.c b/src/login/pam-module.c
index 5baf1b7..6259450 100644
--- a/src/login/pam-module.c
+++ b/src/login/pam-module.c
@@ -489,7 +489,7 @@ _public_ PAM_EXTERN int pam_sm_close_session(
                 int flags,
                 int argc, const char **argv) {
 
-        const void *p = NULL, *existing = NULL;
+        const void *existing = NULL;
         const char *id;
         DBusConnection *bus = NULL;
         DBusMessage *m = NULL, *reply = NULL;
@@ -546,12 +546,15 @@ _public_ PAM_EXTERN int pam_sm_close_session(
                 }
         }
 
+
+        /* Note that we are knowingly leaking the FIFO fd here. This
+         * way, logind can watch us die. If we closed it here it would
+         * not have any clue when that is completed. Given that one
+         * cannot really have multiple PAM sessions open from the same
+         * process this means we will leak one FD at max. */
         r = PAM_SUCCESS;
 
 finish:
-        pam_get_data(handle, "systemd.session-fd", &p);
-        if (p)
-                close_nointr(PTR_TO_INT(p) - 1);
 
         dbus_error_free(&error);
 

commit d9eb14931b12ded622f5aa4bd547e6075d10100f
Author: Michal Sekletar <msekleta at redhat.com>
Date:   Thu Feb 27 18:16:19 2014 +0100

    core: watch SIGCHLD more closely to track processes of units with no reliable cgroup empty notifier
    
    When a process dies that we can associate with a specific unit, start
    watching all other processes of that unit, so that we can associate
    those processes with the unit too.
    
    Also, for service units start doing this as soon as we get the first
    SIGCHLD for either control or main process, so that we can follow the
    processes of the service from one to the other, as long as process that
    remain are processes of the ones we watched that died and got reassigned
    to us as parent.
    
    Similar, for scope units start doing this as soon as the scope
    controller abandons the unit, and thus management entirely reverts to
    systemd. To abandon a unit introduce a new Abandon() scope unit method
    call.
    
    Based-on: a911bb9ab27ac0eb3bbf4e8b4109e5da9b88eee3

diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index b576f76..58dd9ff 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -30,6 +30,7 @@
 
 #define BUS_SCOPE_INTERFACE                                             \
         " <interface name=\"org.freedesktop.systemd1.Scope\">\n"        \
+        "  <method name=\"Abandon\"/>\n"                                \
         BUS_UNIT_CGROUP_INTERFACE                                       \
         "  <property name=\"Controller\" type=\"s\" access=\"read\"/>\n"\
         "  <property name=\"TimeoutStopUSec\" type=\"t\" access=\"read\"/>\n" \
@@ -66,19 +67,40 @@ static const BusProperty bus_scope_properties[] = {
 
 DBusHandlerResult bus_scope_message_handler(Unit *u, DBusConnection *c, DBusMessage *message) {
         Scope *s = SCOPE(u);
+        _cleanup_dbus_message_unref_ DBusMessage *reply = NULL;
 
-        const BusBoundProperties bps[] = {
+        SELINUX_UNIT_ACCESS_CHECK(u, c, message, "status");
+
+        if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Scope", "Abandon")) {
+                int r;
+
+                r = scope_abandon(s);
+                if (r < 0)
+                        log_error("Failed to mark scope %s as abandoned : %s", UNIT(s)->id, strerror(-r));
+
+                reply = dbus_message_new_method_return(message);
+                if (!reply)
+                        goto oom;
+        } else {
+                const BusBoundProperties bps[] = {
                 { "org.freedesktop.systemd1.Unit",  bus_unit_properties,           u },
                 { "org.freedesktop.systemd1.Scope", bus_unit_cgroup_properties,    u },
                 { "org.freedesktop.systemd1.Scope", bus_scope_properties,          s },
                 { "org.freedesktop.systemd1.Scope", bus_cgroup_context_properties, &s->cgroup_context },
                 { "org.freedesktop.systemd1.Scope", bus_kill_context_properties,   &s->kill_context   },
                 {}
-        };
+                };
 
-        SELINUX_UNIT_ACCESS_CHECK(u, c, message, "status");
+               return  bus_default_message_handler(c, message, INTROSPECTION, INTERFACES_LIST, bps);
+        }
+
+        if (reply)
+                if (!bus_maybe_send_reply(c, message, reply))
+                        goto oom;
 
-        return bus_default_message_handler(c, message, INTROSPECTION, INTERFACES_LIST, bps);
+        return DBUS_HANDLER_RESULT_HANDLED;
+oom:
+        return DBUS_HANDLER_RESULT_NEED_MEMORY;
 }
 
 static int bus_scope_set_transient_property(
@@ -102,10 +124,6 @@ static int bus_scope_set_transient_property(
                     dbus_message_iter_get_element_type(i) != DBUS_TYPE_UINT32)
                         return -EINVAL;
 
-                r = set_ensure_allocated(&s->pids, trivial_hash_func, trivial_compare_func);
-                if (r < 0)
-                        return r;
-
                 dbus_message_iter_recurse(i, &sub);
                 while (dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_UINT32) {
                         uint32_t pid;
@@ -116,7 +134,7 @@ static int bus_scope_set_transient_property(
                                 return -EINVAL;
 
                         if (mode != UNIT_CHECK) {
-                                r = set_put(s->pids, LONG_TO_PTR(pid));
+                                r = unit_watch_pid(UNIT(s), pid);
                                 if (r < 0 && r != -EEXIST)
                                         return r;
                         }
diff --git a/src/core/manager.c b/src/core/manager.c
index 69ad4b5..e7b5234 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1389,7 +1389,7 @@ static int manager_dispatch_sigchld(Manager *m) {
                 log_debug_unit(u->id,
                                "Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
 
-                hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
+                unit_unwatch_pid(u, si.si_pid);
                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
         }
 
diff --git a/src/core/scope.c b/src/core/scope.c
index e75fc2b..22bdfb2 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -35,6 +35,7 @@
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = UNIT_INACTIVE,
         [SCOPE_RUNNING] = UNIT_ACTIVE,
+        [SCOPE_ABANDONED] = UNIT_ACTIVE,
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
         [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
         [SCOPE_FAILED] = UNIT_FAILED
@@ -67,9 +68,6 @@ static void scope_done(Unit *u) {
         free(s->controller);
         s->controller = NULL;
 
-        set_free(s->pids);
-        s->pids = NULL;
-
         unit_unwatch_timer(u, &s->timer_watch);
 }
 
@@ -84,6 +82,9 @@ static void scope_set_state(Scope *s, ScopeState state) {
             state != SCOPE_STOP_SIGKILL)
                 unit_unwatch_timer(UNIT(s), &s->timer_watch);
 
+        if (state == SCOPE_DEAD || state == SCOPE_FAILED)
+                unit_unwatch_all_pids(UNIT(s));
+
         if (state != old_state)
                 log_debug("%s changed %s -> %s",
                           UNIT(s)->id,
@@ -115,7 +116,7 @@ static int scope_verify(Scope *s) {
         if (UNIT(s)->load_state != UNIT_LOADED)
                 return 0;
 
-        if (set_size(s->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) {
+        if (set_size(UNIT(s)->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) {
                 log_error_unit(UNIT(s)->id, "Scope %s has no PIDs. Refusing.", UNIT(s)->id);
                 return -EINVAL;
         }
@@ -169,6 +170,9 @@ static int scope_coldplug(Unit *u) {
                                 return r;
                 }
 
+                if (s->deserialized_state != SCOPE_DEAD && s->deserialized_state != SCOPE_FAILED)
+                        unit_watch_all_pids(UNIT(s));
+
                 scope_set_state(s, s->deserialized_state);
         }
 
@@ -209,6 +213,8 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
         if (f != SCOPE_SUCCESS)
                 s->result = f;
 
+        unit_watch_all_pids(UNIT(s));
+
         /* If we have a controller set let's ask the controller nicely
          * to terminate the scope, instead of us going directly into
          * SIGTERM beserk mode */
@@ -271,13 +277,10 @@ static int scope_start(Unit *u) {
                 return r;
         }
 
-        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, s->pids);
+        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, UNIT(s)->pids);
         if (r < 0)
                 return r;
 
-        set_free(s->pids);
-        s->pids = NULL;
-
         s->result = SCOPE_SUCCESS;
 
         scope_set_state(s, SCOPE_RUNNING);
@@ -288,13 +291,13 @@ static int scope_stop(Unit *u) {
         Scope *s = SCOPE(u);
 
         assert(s);
-        assert(s->state == SCOPE_RUNNING);
 
         if (s->state == SCOPE_STOP_SIGTERM ||
             s->state == SCOPE_STOP_SIGKILL)
                 return 0;
 
-        assert(s->state == SCOPE_RUNNING);
+        assert(s->state == SCOPE_RUNNING ||
+               s->state == SCOPE_ABANDONED);
 
         scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS);
         return 0;
@@ -358,7 +361,7 @@ static bool scope_check_gc(Unit *u) {
         /* Never clean up scopes that still have a process around,
          * even if the scope is formally dead. */
 
-        if (UNIT(s)->cgroup_path) {
+        if (u->cgroup_path) {
                 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true);
                 if (r <= 0)
                         return true;
@@ -367,6 +370,33 @@ static bool scope_check_gc(Unit *u) {
         return false;
 }
 
+static void scope_notify_cgroup_empty_event(Unit *u) {
+        Scope *s = SCOPE(u);
+
+        assert(u);
+
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+
+        if (s->state == SCOPE_RUNNING || s->state == SCOPE_ABANDONED ||
+            s->state == SCOPE_STOP_SIGTERM || SCOPE_STOP_SIGKILL)
+                scope_enter_dead(s, SCOPE_SUCCESS);
+}
+
+static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+        /* If we get a SIGCHLD event for one of the processes we were
+           interested in, then we look for others to watch, under the
+           assumption that we'll sooner or later get a SIGCHLD for
+           them, as the original process we watched was probably the
+           parent of them, and they are hence now our children. */
+
+        unit_tidy_watch_pids(u, 0, 0);
+        unit_watch_all_pids(u);
+
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(u->pids))
+                scope_notify_cgroup_empty_event(u);
+}
+
 static void scope_timer_event(Unit *u, uint64_t elapsed, Watch*w) {
         Scope *s = SCOPE(u);
 
@@ -397,24 +427,30 @@ static void scope_timer_event(Unit *u, uint64_t elapsed, Watch*w) {
         }
 }
 
-static void scope_notify_cgroup_empty_event(Unit *u) {
-        Scope *s = SCOPE(u);
-        assert(u);
+int scope_abandon(Scope *s) {
+        assert(s);
 
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+        if (s->state != SCOPE_RUNNING && s->state != SCOPE_ABANDONED)
+                return -ESTALE;
 
-        switch (s->state) {
+        free(s->controller);
+        s->controller = NULL;
 
-        case SCOPE_RUNNING:
-        case SCOPE_STOP_SIGTERM:
-        case SCOPE_STOP_SIGKILL:
-                scope_enter_dead(s, SCOPE_SUCCESS);
+        /* The client is no longer watching the remaining processes,
+         * so let's step in here, under the assumption that the
+         * remaining processes will be sooner or later reassigned to
+         * us as parent. */
 
-                break;
+        unit_tidy_watch_pids(UNIT(s), 0, 0);
+        unit_watch_all_pids(UNIT(s));
 
-        default:
-                ;
-        }
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(UNIT(s)->pids))
+                scope_notify_cgroup_empty_event(UNIT(s));
+        else
+                scope_set_state(s, SCOPE_ABANDONED);
+
+        return 0;
 }
 
 _pure_ static UnitActiveState scope_active_state(Unit *u) {
@@ -432,6 +468,7 @@ _pure_ static const char *scope_sub_state_to_string(Unit *u) {
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = "dead",
         [SCOPE_RUNNING] = "running",
+        [SCOPE_ABANDONED] = "abandoned",
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
         [SCOPE_STOP_SIGKILL] = "stop-sigkill",
         [SCOPE_FAILED] = "failed",
@@ -481,6 +518,8 @@ const UnitVTable scope_vtable = {
 
         .check_gc = scope_check_gc,
 
+        .sigchld_event = scope_sigchld_event,
+
         .timer_event = scope_timer_event,
 
         .reset_failed = scope_reset_failed,
diff --git a/src/core/scope.h b/src/core/scope.h
index b4bafa7..1e9f201 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -29,6 +29,7 @@ typedef struct Scope Scope;
 typedef enum ScopeState {
         SCOPE_DEAD,
         SCOPE_RUNNING,
+        SCOPE_ABANDONED,
         SCOPE_STOP_SIGTERM,
         SCOPE_STOP_SIGKILL,
         SCOPE_FAILED,
@@ -57,13 +58,13 @@ struct Scope {
 
         char *controller;
 
-        Set *pids;
-
         Watch timer_watch;
 };
 
 extern const UnitVTable scope_vtable;
 
+int scope_abandon(Scope *s);
+
 const char* scope_state_to_string(ScopeState i) _const_;
 ScopeState scope_state_from_string(const char *s) _pure_;
 
diff --git a/src/core/service.c b/src/core/service.c
index f0acda1..41e5cb5 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1546,6 +1546,11 @@ static void service_set_state(Service *s, ServiceState state) {
                 s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
         }
 
+        if (state == SERVICE_DEAD ||
+            state == SERVICE_FAILED ||
+            state == SERVICE_AUTO_RESTART)
+                unit_unwatch_all_pids(UNIT(s));
+
         if (state != SERVICE_START_PRE &&
             state != SERVICE_START &&
             state != SERVICE_START_POST &&
@@ -1661,8 +1666,14 @@ static int service_coldplug(Unit *u) {
                                         return r;
                         }
 
+                if (s->deserialized_state != SERVICE_DEAD &&
+                    s->deserialized_state != SERVICE_FAILED &&
+                    s->deserialized_state != SERVICE_AUTO_RESTART)
+                        unit_watch_all_pids(UNIT(s));
+
                 if (s->deserialized_state == SERVICE_START_POST ||
-                    s->deserialized_state == SERVICE_RUNNING)
+                    s->deserialized_state == SERVICE_RUNNING ||
+                    s->deserialized_state == SERVICE_RELOAD)
                         service_handle_watchdog(s);
 
                 service_set_state(s, s->deserialized_state);
@@ -1970,6 +1981,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
+        unit_watch_all_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
         if (s->control_command) {
@@ -2010,6 +2022,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
         if (f != SERVICE_SUCCESS)
                 s->result = f;
 
+        unit_watch_all_pids(UNIT(s));
+
         r = unit_kill_context(
                         UNIT(s),
                         &s->kill_context,
@@ -2055,6 +2069,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
+        unit_watch_all_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP];
         if (s->control_command) {
@@ -2961,6 +2976,62 @@ fail:
         service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
 }
 
+static void service_notify_cgroup_empty_event(Unit *u) {
+        Service *s = SERVICE(u);
+
+        assert(u);
+
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+
+        switch (s->state) {
+
+                /* Waiting for SIGCHLD is usually more interesting,
+                 * because it includes return codes/signals. Which is
+                 * why we ignore the cgroup events for most cases,
+                 * except when we don't know pid which to expect the
+                 * SIGCHLD for. */
+
+        case SERVICE_START:
+        case SERVICE_START_POST:
+                /* If we were hoping for the daemon to write its PID file,
+                 * we can give up now. */
+                if (s->pid_file_pathspec) {
+                        log_warning_unit(u->id,
+                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
+                        service_unwatch_pid_file(s);
+                        if (s->state == SERVICE_START)
+                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
+                        else
+                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+                }
+                break;
+
+        case SERVICE_RUNNING:
+                /* service_enter_running() will figure out what to do */
+                service_enter_running(s, SERVICE_SUCCESS);
+                break;
+
+        case SERVICE_STOP_SIGTERM:
+        case SERVICE_STOP_SIGKILL:
+
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_stop_post(s, SERVICE_SUCCESS);
+
+                break;
+
+        case SERVICE_STOP_POST:
+        case SERVICE_FINAL_SIGTERM:
+        case SERVICE_FINAL_SIGKILL:
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_dead(s, SERVICE_SUCCESS, true);
+
+                break;
+
+        default:
+                ;
+        }
+}
+
 static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         Service *s = SERVICE(u);
         ServiceResult f;
@@ -3229,6 +3300,18 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* Notify clients about changed exit status */
         unit_add_to_dbus_queue(u);
+
+        /* We got one SIGCHLD for the service, let's watch all
+         * processes that are now running of the service, and watch
+         * that. Among the PIDs we then watch will be children
+         * reassigned to us, which hopefully allows us to identify
+         * when all children are gone */
+        unit_tidy_watch_pids(u, s->main_pid, s->control_pid);
+        unit_watch_all_pids(u);
+
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(u->pids))
+                service_notify_cgroup_empty_event(u);
 }
 
 static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
@@ -3332,61 +3415,6 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         }
 }
 
-static void service_notify_cgroup_empty_event(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(u);
-
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
-
-        switch (s->state) {
-
-                /* Waiting for SIGCHLD is usually more interesting,
-                 * because it includes return codes/signals. Which is
-                 * why we ignore the cgroup events for most cases,
-                 * except when we don't know pid which to expect the
-                 * SIGCHLD for. */
-
-        case SERVICE_START:
-        case SERVICE_START_POST:
-                /* If we were hoping for the daemon to write its PID file,
-                 * we can give up now. */
-                if (s->pid_file_pathspec) {
-                        log_warning_unit(u->id,
-                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
-                        service_unwatch_pid_file(s);
-                        if (s->state == SERVICE_START)
-                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
-                        else
-                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
-                }
-                break;
-
-        case SERVICE_RUNNING:
-                /* service_enter_running() will figure out what to do */
-                service_enter_running(s, SERVICE_SUCCESS);
-                break;
-
-        case SERVICE_STOP_SIGTERM:
-        case SERVICE_STOP_SIGKILL:
-
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
-                        service_enter_stop_post(s, SERVICE_SUCCESS);
-
-                break;
-
-        case SERVICE_FINAL_SIGTERM:
-        case SERVICE_FINAL_SIGKILL:
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
-                        service_enter_dead(s, SERVICE_SUCCESS, true);
-
-                break;
-
-        default:
-                ;
-        }
-}
-
 static void service_notify_message(Unit *u, pid_t pid, char **tags) {
         Service *s = SERVICE(u);
         const char *e;
diff --git a/src/core/unit.c b/src/core/unit.c
index de34ddc..57a406d 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -469,6 +469,8 @@ void unit_free(Unit *u) {
 
         set_free_free(u->names);
 
+        unit_unwatch_all_pids(u);
+
         condition_free_list(u->conditions);
 
         unit_ref_unset(&u->slice);
@@ -1656,13 +1658,25 @@ void unit_unwatch_fd(Unit *u, Watch *w) {
 }
 
 int unit_watch_pid(Unit *u, pid_t pid) {
+        int q, r;
+
         assert(u);
         assert(pid >= 1);
 
+        r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
         /* Watch a specific PID. We only support one unit watching
          * each PID for now. */
 
-        return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        r = set_put(u->pids, LONG_TO_PTR(pid));
+
+        q = hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        if (q < 0)
+                return q;
+
+        return r;
 }
 
 void unit_unwatch_pid(Unit *u, pid_t pid) {
@@ -1670,6 +1684,102 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
         assert(pid >= 1);
 
         hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        set_remove(u->pids, LONG_TO_PTR(pid));
+}
+
+static int watch_pids_in_path(Unit *u, const char *path) {
+        _cleanup_closedir_ DIR *d = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        int ret = 0, r;
+
+        assert(u);
+        assert(path);
+
+        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */
+
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+        if (r >= 0) {
+                pid_t pid;
+
+                while ((r = cg_read_pid(f, &pid)) > 0) {
+                        r = unit_watch_pid(u, pid);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+        if (r >= 0) {
+                char *fn;
+
+                while ((r = cg_read_subgroup(d, &fn)) > 0) {
+                        _cleanup_free_ char *p = NULL;
+
+                        p = strjoin(path, "/", fn, NULL);
+                        free(fn);
+
+                        if (!p)
+                                return -ENOMEM;
+
+                        r = watch_pids_in_path(u, p);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        return ret;
+}
+
+
+int unit_watch_all_pids(Unit *u) {
+        assert(u);
+
+        if (!u->cgroup_path)
+                return -ENOENT;
+
+        /* Adds all PIDs from our cgroup to the set of PIDs we watch */
+
+        return watch_pids_in_path(u, u->cgroup_path);
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        SET_FOREACH(e, u->pids, i)
+                hashmap_remove_value(u->manager->watch_pids, e, u);
+
+        set_free(u->pids);
+        u->pids = NULL;
+}
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        /* Cleans dead PIDs from our list */
+
+        SET_FOREACH(e, u->pids, i) {
+                pid_t pid = PTR_TO_LONG(e);
+
+                if (pid == except1 || pid == except2)
+                        continue;
+
+                if (kill(pid, 0) < 0 && errno == ESRCH)
+                        set_remove(u->pids, e);
+        }
 }
 
 int unit_watch_timer(Unit *u, clockid_t clock_id, bool relative, usec_t usec, Watch *w) {
diff --git a/src/core/unit.h b/src/core/unit.h
index 6dd750f..6dff25e 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -198,6 +198,11 @@ struct Unit {
         /* CGroup realize members queue */
         LIST_FIELDS(Unit, cgroup_queue);
 
+        /* PIDs we keep an eye on. Note that a unit might have many
+         * more, but these are the ones we care enough about to
+         * process SIGCHLD for */
+        Set *pids;
+
         /* Used during GC sweeps */
         unsigned gc_marker;
 
@@ -531,6 +536,10 @@ void unit_unwatch_fd(Unit *u, Watch *w);
 
 int unit_watch_pid(Unit *u, pid_t pid);
 void unit_unwatch_pid(Unit *u, pid_t pid);
+int unit_watch_all_pids(Unit *u);
+void unit_unwatch_all_pids(Unit *u);
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2);
 
 int unit_watch_timer(Unit *u, clockid_t, bool relative, usec_t usec, Watch *w);
 void unit_unwatch_timer(Unit *u, Watch *w);

commit 151079d27da70b27845569fce5856dfada8fd467
Author: Michal Sekletar <msekleta at redhat.com>
Date:   Thu Feb 27 17:56:16 2014 +0100

    core: introduce new stop protocol for unit scopes
    
    By specifiy a Controller property when creating the scope a client can
    specify a bus name that will be notified with a RequestStop bus signal
    when the scope has been asked to shut down, instead of sending SIGTERM
    to the scope processes themselves.
    
    https://bugzilla.redhat.com/show_bug.cgi?id=1032695
    
    Based-on: 2d4a39e759c4ab846ad8a546abeddd40bc8d736e

diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index 783a969..b576f76 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -31,10 +31,12 @@
 #define BUS_SCOPE_INTERFACE                                             \
         " <interface name=\"org.freedesktop.systemd1.Scope\">\n"        \
         BUS_UNIT_CGROUP_INTERFACE                                       \
+        "  <property name=\"Controller\" type=\"s\" access=\"read\"/>\n"\
         "  <property name=\"TimeoutStopUSec\" type=\"t\" access=\"read\"/>\n" \
         BUS_KILL_CONTEXT_INTERFACE                                      \
         BUS_CGROUP_CONTEXT_INTERFACE                                    \
         "  <property name=\"Result\" type=\"s\" access=\"read\"/>\n"    \
+        "  <signal name=\"RequestStop\"/>\n"                            \
         " </interface>\n"
 
 #define INTROSPECTION                                                   \
@@ -56,6 +58,7 @@ const char bus_scope_interface[] _introspect_("Scope") = BUS_SCOPE_INTERFACE;
 static DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_scope_append_scope_result, scope_result, ScopeResult);
 
 static const BusProperty bus_scope_properties[] = {
+        { "Controller",             bus_property_append_string,    "s", offsetof(Scope, controller)        },
         { "TimeoutStopUSec",        bus_property_append_usec,      "t", offsetof(Scope, timeout_stop_usec) },
         { "Result",                 bus_scope_append_scope_result, "s", offsetof(Scope, result)            },
         {}
@@ -127,6 +130,31 @@ static int bus_scope_set_transient_property(
 
                 return 1;
 
+        } else if (streq(name, "Controller")) {
+                const char *controller;
+
+                if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING)
+                        return -EINVAL;
+
+                dbus_message_iter_get_basic(i, &controller);
+
+                if (!isempty(controller) && !bus_service_name_is_valid(controller))
+                        return -EINVAL;
+
+                if (mode != UNIT_CHECK) {
+                        char *c = NULL;
+
+                        if (!isempty(controller)) {
+                                c = strdup(controller);
+                                if (!c)
+                                        return -ENOMEM;
+                        }
+
+                        free(s->controller);
+                        s->controller = c;
+                }
+
+                return 1;
         } else if (streq(name, "TimeoutStopUSec")) {
 
                 if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_UINT64)
@@ -187,3 +215,30 @@ int bus_scope_commit_properties(Unit *u) {
         unit_realize_cgroup(u);
         return 0;
 }
+
+int bus_scope_send_request_stop(Scope *s) {
+        _cleanup_dbus_message_unref_ DBusMessage *m = NULL;
+        _cleanup_free_ char *p = NULL;
+        int r;
+
+        assert(s);
+
+        if (!s->controller)
+                return 0;
+
+        p = unit_dbus_path(UNIT(s));
+        if (!p)
+                return -ENOMEM;
+
+        m = dbus_message_new_signal(p,
+                                    "org.freedesktop.systemd1.Scope",
+                                    "RequestStop");
+        if (!m)
+                return 0;
+
+        r = dbus_message_set_destination(m, s->controller);
+        if (!r)
+                return 0;
+
+        return dbus_connection_send(UNIT(s)->manager->api_bus, m, NULL);
+}
diff --git a/src/core/dbus-scope.h b/src/core/dbus-scope.h
index e6836f1..34720f2 100644
--- a/src/core/dbus-scope.h
+++ b/src/core/dbus-scope.h
@@ -30,4 +30,6 @@ DBusHandlerResult bus_scope_message_handler(Unit *u, DBusConnection *c, DBusMess
 int bus_scope_set_property(Unit *u, const char *name, DBusMessageIter *i, UnitSetPropertiesMode mode, DBusError *error);
 int bus_scope_commit_properties(Unit *u);
 
+int bus_scope_send_request_stop(Scope *s);
+
 extern const char bus_scope_interface[];
diff --git a/src/core/scope.c b/src/core/scope.c
index 41da3b9..e75fc2b 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -64,6 +64,9 @@ static void scope_done(Unit *u) {
 
         cgroup_context_done(&s->cgroup_context);
 
+        free(s->controller);
+        s->controller = NULL;
+
         set_free(s->pids);
         s->pids = NULL;
 
@@ -198,6 +201,7 @@ static void scope_enter_dead(Scope *s, ScopeResult f) {
 }
 
 static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
+        bool skip_signal = false;
         int r;
 
         assert(s);
@@ -205,13 +209,23 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
         if (f != SCOPE_SUCCESS)
                 s->result = f;
 
-        r = unit_kill_context(
+        /* If we have a controller set let's ask the controller nicely
+         * to terminate the scope, instead of us going directly into
+         * SIGTERM beserk mode */
+        if (state == SCOPE_STOP_SIGTERM)
+                skip_signal = bus_scope_send_request_stop(s) > 0;
+
+        if (!skip_signal) {
+                r = unit_kill_context(
                         UNIT(s),
                         &s->kill_context,
                         state != SCOPE_STOP_SIGTERM,
                         -1, -1, false);
-        if (r < 0)
-                goto fail;
+
+                if (r < 0)
+                        goto fail;
+        } else
+                r = 1;
 
         if (r > 0) {
                 if (s->timeout_stop_usec > 0) {
diff --git a/src/core/scope.h b/src/core/scope.h
index 2a3dcb7..b4bafa7 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -55,6 +55,8 @@ struct Scope {
 
         usec_t timeout_stop_usec;
 
+        char *controller;
+
         Set *pids;
 
         Watch timer_watch;
diff --git a/src/run/run.c b/src/run/run.c
index a6abead..93e3f88 100644
--- a/src/run/run.c
+++ b/src/run/run.c
@@ -315,6 +315,14 @@ static int start_transient_scope(
         if (r < 0)
                 return r;
 
+        {
+                const char *unique_id;
+                sd_bus_get_unique_name(bus, &unique_id);
+                r = sd_bus_message_append(m, "(sv)", "Controller", "s", unique_id);
+                if (r < 0)
+                        return r;
+        }
+
         r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid());
         if (r < 0)
                 return r;
diff --git a/src/shared/dbus-common.c b/src/shared/dbus-common.c
index 3ba2d87..8a68708 100644
--- a/src/shared/dbus-common.c
+++ b/src/shared/dbus-common.c
@@ -1428,3 +1428,45 @@ const char *bus_message_get_sender_with_fallback(DBusMessage *m) {
 
         return ":no-sender";
 }
+
+bool bus_service_name_is_valid(const char *p) {
+        const char *q;
+        bool dot, found_dot = false, unique;
+
+        if (isempty(p))
+                return false;
+
+        unique = p[0] == ':';
+
+        for (dot = true, q = unique ? p+1 : p; *q; q++)
+                if (*q == '.') {
+                        if (dot)
+                                return false;
+
+                        found_dot = dot = true;
+                } else {
+                        bool good;
+
+                        good =
+                                (*q >= 'a' && *q <= 'z') ||
+                                (*q >= 'A' && *q <= 'Z') ||
+                                ((!dot || unique) && *q >= '0' && *q <= '9') ||
+                                *q == '_' || *q == '-';
+
+                        if (!good)
+                                return false;
+
+                        dot = false;
+                }
+
+        if (q - p > 255)
+                return false;
+
+        if (dot)
+                return false;
+
+        if (!found_dot)
+                return false;
+
+        return true;
+}
diff --git a/src/shared/dbus-common.h b/src/shared/dbus-common.h
index 9752f08..8d01d14 100644
--- a/src/shared/dbus-common.h
+++ b/src/shared/dbus-common.h
@@ -242,5 +242,7 @@ const char *bus_message_get_sender_with_fallback(DBusMessage *m);
 
 void bus_message_unrefp(DBusMessage **reply);
 
+bool bus_service_name_is_valid(const char *p);
+
 #define _cleanup_dbus_message_unref_ __attribute__((cleanup(bus_message_unrefp)))
 #define _cleanup_dbus_error_free_ __attribute__((cleanup(dbus_error_free)))



More information about the systemd-commits mailing list