[systemd-devel] [PATCH 5/5] core/cgroup: Add cgroup.populated inotify watches, when available.

Dimitri John Ledkov dimitri.j.ledkov at intel.com
Fri May 29 13:32:18 PDT 2015


---
 src/core/cgroup.c        | 81 ++++++++++++++++++++++++++++++++++++++++++++++--
 src/core/manager.c       |  7 ++++-
 src/core/manager.h       |  3 ++
 src/core/service.c       |  2 +-
 src/shared/cgroup-util.c | 28 +++++++++++++----
 src/shared/cgroup-util.h |  6 ++--
 src/test/test-cgroup.c   | 12 +++----
 7 files changed, 120 insertions(+), 19 deletions(-)

diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 6474e08..a1d7d93 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -30,6 +30,8 @@
 
 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
+static int cgroup_populated_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+
 void cgroup_context_init(CGroupContext *c) {
         assert(c);
 
@@ -629,6 +631,7 @@ static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
         CGroupContext *c;
         int r;
+        int wd = -1;
 
         assert(u);
 
@@ -655,10 +658,13 @@ static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
         }
 
         /* First, create our own group */
-        r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
+        r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path, u->manager->cgroup_populated_inotify_fd, &wd);
         if (r < 0)
                 return log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path);
 
+        if (wd > 0 && hashmap_put(u->manager->cgroup_populated_by_wd, INT_TO_PTR(wd), u->cgroup_path) < 0)
+                inotify_rm_watch(u->manager->cgroup_populated_inotify_fd, wd);
+
         /* Keep track that this is now realized */
         u->cgroup_realized = true;
         u->cgroup_realized_mask = mask;
@@ -893,6 +899,7 @@ pid_t unit_search_main_pid(Unit *u) {
 
 int manager_setup_cgroup(Manager *m) {
         _cleanup_free_ char *path = NULL;
+        _cleanup_free_ char *sane_behavior = NULL;
         int r;
 
         assert(m);
@@ -944,7 +951,7 @@ int manager_setup_cgroup(Manager *m) {
                 }
 
                 /* 4. Make sure we are in the root cgroup */
-                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
+                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0, -1, NULL);
                 if (r < 0)
                         return log_error_errno(r, "Failed to create root cgroup hierarchy: %m");
 
@@ -957,6 +964,23 @@ int manager_setup_cgroup(Manager *m) {
 
                 /* 6.  Always enable hierarchical support if it exists... */
                 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
+
+                /* 7. Create inotify fd for cgroup.populated files, if
+                 * supported on unified cgroups. Insane ones have
+                 * cgroup.sane_behavior set to 0.*/
+                r = cg_get_attribute(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, "cgroup.sane_behavior", &sane_behavior);
+                if (r == -ENOENT) {
+                        m->cgroup_populated_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+                        if (m->cgroup_populated_inotify_fd < 0)
+                                return log_error_errno(errno, "inotify_init1() failed: %m");
+
+                        r = sd_event_add_io(m->event, &m->cgroup_populated_event_source, m->cgroup_populated_inotify_fd, EPOLLIN, cgroup_populated_dispatch_io, m);
+                        if (r < 0)
+                                return log_error_errno(errno, "Failed to create inotify event source: %m");
+                        /* TODO what priority to set? */
+                        (void) sd_event_source_set_description(m->cgroup_populated_event_source, "cgroup-populated");
+                }
+
         }
 
         /* 7. Figure out which controllers are supported */
@@ -975,10 +999,63 @@ void manager_shutdown_cgroup(Manager *m, bool delete) {
 
         m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
 
+        m->cgroup_populated_inotify_fd = safe_close(m->cgroup_populated_inotify_fd);
+        hashmap_free(m->cgroup_populated_by_wd);
+
         free(m->cgroup_root);
         m->cgroup_root = NULL;
 }
 
+static int cgroup_populated_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        char *path;
+        _cleanup_free_ char *v = NULL;
+        Manager *m = userdata;
+        int r;
+        int populated = -1;
+
+        assert(m);
+        assert(revents & (EPOLLPRI | EPOLLIN));
+
+        if (fd != m->cgroup_populated_inotify_fd)
+                return 0;
+
+        for (;;) {
+                union inotify_event_buffer buffer;
+                struct inotify_event *e;
+                ssize_t l;
+
+                l = read(fd, &buffer, sizeof(buffer));
+                if (l < 0) {
+                        if (errno == EAGAIN || errno == EINTR)
+                                break;
+
+                        log_error_errno(errno, "Failed to read cgroup_populated inotify: %m");
+                        break;
+                }
+
+                FOREACH_INOTIFY_EVENT(e, buffer, l) {
+                        path = hashmap_get(m->cgroup_populated_by_wd, INT_TO_PTR(e->wd));
+                        if (!path)
+                                continue;
+
+                        r = cg_get_attribute(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.populated", &v);
+                        if (r < 0)
+                                continue;
+
+                        r = safe_atoi(v, &populated);
+                        if (r < 0)
+                                continue;
+
+                        if (populated == 0) {
+                                manager_notify_cgroup_empty(m, path);
+                                log_info("manager_notify_cgroup_empty %s", path);
+                        }
+                }
+        }
+
+        return 0;
+}
+
 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
         char *p;
         Unit *u;
diff --git a/src/core/manager.c b/src/core/manager.c
index ae473d0..9ef3945 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -568,7 +568,7 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) {
 
         m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
 
-        m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd = m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->utab_inotify_fd = -1;
+        m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd = m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->utab_inotify_fd = m->cgroup_populated_inotify_fd = -1;
         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
 
         m->ask_password_inotify_fd = -1;
@@ -595,6 +595,10 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) {
         if (r < 0)
                 goto fail;
 
+        r = hashmap_ensure_allocated(&m->cgroup_populated_by_wd, NULL);
+        if (r < 0)
+                goto fail;
+
         r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
         if (r < 0)
                 goto fail;
@@ -968,6 +972,7 @@ Manager* manager_free(Manager *m) {
         strv_free(m->environment);
 
         hashmap_free(m->cgroup_unit);
+        hashmap_free(m->cgroup_populated_by_wd);
         set_free_free(m->unit_path_cache);
 
         free(m->switch_root);
diff --git a/src/core/manager.h b/src/core/manager.h
index 4ef869d..9ad9cd9 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -215,6 +215,9 @@ struct Manager {
 
         /* Data specific to the cgroup subsystem */
         Hashmap *cgroup_unit;
+        int cgroup_populated_inotify_fd;
+        sd_event_source *cgroup_populated_event_source;
+        Hashmap *cgroup_populated_by_wd;
         CGroupControllerMask cgroup_supported;
         char *cgroup_root;
 
diff --git a/src/core/service.c b/src/core/service.c
index c7e6577..a3011a4 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1176,7 +1176,7 @@ static int service_spawn(
 
         if (is_control && UNIT(s)->cgroup_path) {
                 path = strjoina(UNIT(s)->cgroup_path, "/control");
-                cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+                cg_create(SYSTEMD_CGROUP_CONTROLLER, path, -1, NULL);
         } else
                 path = UNIT(s)->cgroup_path;
 
diff --git a/src/shared/cgroup-util.c b/src/shared/cgroup-util.c
index eae9f5d..14ef94b 100644
--- a/src/shared/cgroup-util.c
+++ b/src/shared/cgroup-util.c
@@ -591,8 +591,9 @@ int cg_delete(const char *controller, const char *path) {
         return r == -ENOENT ? 0 : r;
 }
 
-int cg_create(const char *controller, const char *path) {
+int cg_create(const char *controller, const char *path, const int inotify_fd, int *wd) {
         _cleanup_free_ char *fs = NULL;
+        _cleanup_free_ char *populated = NULL;
         int r;
 
         r = cg_get_path_and_check(controller, path, NULL, &fs);
@@ -611,15 +612,30 @@ int cg_create(const char *controller, const char *path) {
                 return -errno;
         }
 
+        if (inotify_fd < 0 || !wd)
+                goto out;
+
+        r = cg_get_path(controller, path, "cgroup.populated", &populated);
+        if (r < 0) {
+                log_warning_errno(r, "Failed to get 'cgroup.populated' file, ignoring: %m");
+                goto out;
+        }
+
+        *wd = inotify_add_watch(inotify_fd, populated, IN_MODIFY);
+
+        if (*wd < 0)
+                log_warning_errno(errno, "Failed to add 'cgroup.populated' watch, ignoring: %m");
+
+out:
         return 1;
 }
 
-int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid, const int inotify_fd, int *wd) {
         int r, q;
 
         assert(pid >= 0);
 
-        r = cg_create(controller, path);
+        r = cg_create(controller, path, inotify_fd, wd);
         if (r < 0)
                 return r;
 
@@ -1745,7 +1761,7 @@ static const char mask_names[] =
         "memory\0"
         "devices\0";
 
-int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) {
+int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path, const int inotify_fd, int *wd) {
         CGroupControllerMask bit = 1;
         const char *n;
         int r;
@@ -1755,14 +1771,14 @@ int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask ma
          * in all others */
 
         /* First create the cgroup in our own hierarchy. */
-        r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+        r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path, inotify_fd, wd);
         if (r < 0)
                 return r;
 
         /* Then, do the same in the other hierarchies */
         NULSTR_FOREACH(n, mask_names) {
                 if (mask & bit)
-                        cg_create(n, path);
+                        cg_create(n, path, -1, NULL);
                 else if (supported & bit)
                         cg_trim(n, path, true);
 
diff --git a/src/shared/cgroup-util.h b/src/shared/cgroup-util.h
index cbf7201..2c5bdad 100644
--- a/src/shared/cgroup-util.h
+++ b/src/shared/cgroup-util.h
@@ -79,10 +79,10 @@ int cg_trim(const char *controller, const char *path, bool delete_root);
 int cg_rmdir(const char *controller, const char *path);
 int cg_delete(const char *controller, const char *path);
 
-int cg_create(const char *controller, const char *path);
+int cg_create(const char *controller, const char *path, const int inotify_fd, int *wd);
 int cg_attach(const char *controller, const char *path, pid_t pid);
 int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
-int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid, const int inotify_fd, int *wd);
 
 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
@@ -128,7 +128,7 @@ int cg_slice_to_path(const char *unit, char **ret);
 
 typedef const char* (*cg_migrate_callback_t)(CGroupControllerMask mask, void *userdata);
 
-int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path);
+int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path, const int inotify_fd, int *wd);
 int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
 int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
 int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c
index 4be69a4..720d02e 100644
--- a/src/test/test-cgroup.c
+++ b/src/test/test-cgroup.c
@@ -30,11 +30,11 @@ int main(int argc, char*argv[]) {
         char *path;
         char *c, *p;
 
-        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0);
-        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0);
-        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0);
-        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-c") == 0);
-        assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0) == 0);
+        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a", -1, NULL) == 0);
+        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a", -1, NULL) == 0);
+        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b", -1, NULL) == 0);
+        assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-c", -1, NULL) == 0);
+        assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, -1, NULL) == 0);
 
         assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid(), &path) == 0);
         assert_se(streq(path, "/test-b"));
@@ -46,7 +46,7 @@ int main(int argc, char*argv[]) {
         assert_se(path_equal(path, "/test-a"));
         free(path);
 
-        assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-d", 0) == 0);
+        assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-d", 0, -1, NULL) == 0);
 
         assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid(), &path) == 0);
         assert_se(path_equal(path, "/test-b/test-d"));
-- 
2.1.4



More information about the systemd-devel mailing list