[systemd-commits] 6 commits - man/systemd.exec.xml src/cgroup-util.c src/cgroup-util.h src/dbus-unit.c src/dbus-unit.h src/load-fragment.c src/load-fragment-gperf.gperf.m4 src/load-fragment.h src/logind-user.c src/manager.c src/readahead-common.c src/systemctl.c src/tmpfiles.c src/unit.c src/util.c src/util.h

Lennart Poettering lennart at kemper.freedesktop.org
Sun Aug 21 12:01:30 PDT 2011


 man/systemd.exec.xml             |   73 ++++++++++++++-
 src/cgroup-util.c                |   90 +++++++++++++++++--
 src/cgroup-util.h                |    2 
 src/dbus-unit.c                  |   34 +++++++
 src/dbus-unit.h                  |    3 
 src/load-fragment-gperf.gperf.m4 |    3 
 src/load-fragment.c              |  178 ++++++++++++++++++++++++++++++++++++++-
 src/load-fragment.h              |    2 
 src/logind-user.c                |    2 
 src/manager.c                    |    2 
 src/readahead-common.c           |   51 ++++-------
 src/systemctl.c                  |   24 +++++
 src/tmpfiles.c                   |    2 
 src/unit.c                       |   12 ++
 src/util.c                       |  123 ++++++++++++++++++++++++--
 src/util.h                       |    6 +
 16 files changed, 540 insertions(+), 67 deletions(-)

New commits:
commit e27796a03065fac50fad07d5b42682ecb1e17d46
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sun Aug 21 21:00:41 2011 +0200

    cgroup: when trimming cgroup trees, honour sticky bit of tasks file

diff --git a/src/cgroup-util.c b/src/cgroup-util.c
index ec48ea6..5fb2484 100644
--- a/src/cgroup-util.c
+++ b/src/cgroup-util.c
@@ -27,6 +27,7 @@
 #include <dirent.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <ftw.h>
 
 #include "cgroup-util.h"
 #include "log.h"
@@ -554,20 +555,70 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
         return 0;
 }
 
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+        char *p;
+        bool is_sticky;
+
+        if (typeflag != FTW_DP)
+                return 0;
+
+        if (ftwbuf->level < 1)
+                return 0;
+
+        p = strappend(path, "/tasks");
+        if (!p) {
+                errno = ENOMEM;
+                return 1;
+        }
+
+        is_sticky = file_is_sticky(p) > 0;
+        free(p);
+
+        if (is_sticky)
+                return 0;
+
+        rmdir(path);
+        return 0;
+}
+
 int cg_trim(const char *controller, const char *path, bool delete_root) {
         char *fs;
-        int r;
+        int r = 0;
 
         assert(controller);
         assert(path);
 
-        if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
+        r = cg_get_path(controller, path, NULL, &fs);
+        if (r < 0)
                 return r;
 
-        r = rm_rf(fs, true, delete_root, true);
+        errno = 0;
+        if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
+                r = errno ? -errno : -EIO;
+
+        if (delete_root) {
+                bool is_sticky;
+                char *p;
+
+                p = strappend(fs, "/tasks");
+                if (!p) {
+                        free(fs);
+                        return -ENOMEM;
+                }
+
+                is_sticky = file_is_sticky(p) > 0;
+                free(p);
+
+                if (!is_sticky)
+                        if (rmdir(fs) < 0 && errno != ENOENT) {
+                                if (r == 0)
+                                        r = -errno;
+                        }
+        }
+
         free(fs);
 
-        return r == -ENOENT ? 0 : r;
+        return r;
 }
 
 int cg_delete(const char *controller, const char *path) {
diff --git a/src/util.c b/src/util.c
index ecfe450..e31e0f5 100644
--- a/src/util.c
+++ b/src/util.c
@@ -3475,7 +3475,7 @@ int rm_rf(const char *path, bool only_dirs, bool delete_root, bool honour_sticky
                 if (honour_sticky && file_is_sticky(path) > 0)
                         return r;
 
-                if (rmdir(path) < 0) {
+                if (rmdir(path) < 0 && errno != ENOENT) {
                         if (r == 0)
                                 r = -errno;
                 }

commit b4454c5edfab70fd1c6a0ea6836a0c2ddb61ef25
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sun Aug 21 20:20:41 2011 +0200

    readahead: make use of block_get_whole_disk() where applicable

diff --git a/src/readahead-common.c b/src/readahead-common.c
index f0d57b4..67214ec 100644
--- a/src/readahead-common.c
+++ b/src/readahead-common.c
@@ -212,12 +212,10 @@ finish:
 
 int bump_request_nr(const char *p) {
         struct stat st;
-        struct udev *udev = NULL;
-        struct udev_device *udev_device = NULL, *look_at = NULL;
-        const char *nr_requests;
         uint64_t u;
-        char nr[64], *ap = NULL;
+        char *ap = NULL, *line = NULL;
         int r;
+        dev_t d;
 
         assert(p);
 
@@ -227,54 +225,45 @@ int bump_request_nr(const char *p) {
         if (major(st.st_dev) == 0)
                 return 0;
 
-        if (!(udev = udev_new()))
-                return -ENOMEM;
+        d = st.st_dev;
+        block_get_whole_disk(d, &d);
 
-        if (!(udev_device = udev_device_new_from_devnum(udev, 'b', st.st_dev))) {
-                r = -ENOENT;
+        if (asprintf(&ap, "/sys/dev/block/%u:%u/queue/nr_requests", major(d), minor(d)) < 0) {
+                r= -ENOMEM;
                 goto finish;
         }
 
-        look_at = udev_device;
-        if (!(nr_requests = udev_device_get_sysattr_value(look_at, "queue/nr_requests"))) {
-
-                /* Hmm, if the block device doesn't have a queue
-                 * subdir, the let's look in the parent */
-                look_at = udev_device_get_parent(udev_device);
-                nr_requests = udev_device_get_sysattr_value(look_at, "queue/nr_requests");
-        }
-
-        if (!nr_requests) {
-                r = -ENOENT;
+        r = read_one_line_file(ap, &line);
+        if (r < 0) {
+                if (r == -ENOENT)
+                        r = 0;
                 goto finish;
         }
 
-        if (safe_atou64(nr_requests, &u) >= 0 && u >= BUMP_REQUEST_NR) {
+        r = safe_atou64(line, &u);
+        if (r >= 0 && u >= BUMP_REQUEST_NR) {
                 r = 0;
                 goto finish;
         }
 
-        if (asprintf(&ap, "%s/queue/nr_requests", udev_device_get_syspath(look_at)) < 0) {
+        free(line);
+        line = NULL;
+
+        if (asprintf(&line, "%lu", (unsigned long) BUMP_REQUEST_NR) < 0) {
                 r = -ENOMEM;
                 goto finish;
         }
 
-        snprintf(nr, sizeof(nr), "%lu", (unsigned long) BUMP_REQUEST_NR);
-
-        if ((r = write_one_line_file(ap, nr)) < 0)
+        r = write_one_line_file(ap, line);
+        if (r < 0)
                 goto finish;
 
-        log_info("Bumped block_nr parameter of %s to %lu. This is a temporary hack and should be removed one day.", udev_device_get_devnode(look_at), (unsigned long) BUMP_REQUEST_NR);
+        log_info("Bumped block_nr parameter of %u:%u to %lu. This is a temporary hack and should be removed one day.", major(d), minor(d), (unsigned long) BUMP_REQUEST_NR);
         r = 1;
 
 finish:
-        if (udev_device)
-                udev_device_unref(udev_device);
-
-        if (udev)
-                udev_unref(udev);
-
         free(ap);
+        free(line);
 
         return r;
 }

commit ad293f5a94d8124ece7d1cb860952a87b1c8d98f
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sun Aug 21 20:05:51 2011 +0200

    cgroup: honour sticky bit when trimming cgroup trees

diff --git a/src/cgroup-util.c b/src/cgroup-util.c
index fd0ac98..ec48ea6 100644
--- a/src/cgroup-util.c
+++ b/src/cgroup-util.c
@@ -153,17 +153,38 @@ int cg_read_subgroup(DIR *d, char **fn) {
         return 0;
 }
 
-int cg_rmdir(const char *controller, const char *path) {
+int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
         char *p;
         int r;
 
-        if ((r = cg_get_path(controller, path, NULL, &p)) < 0)
+        r = cg_get_path(controller, path, NULL, &p);
+        if (r < 0)
                 return r;
 
+        if (honour_sticky) {
+                char *tasks;
+
+                /* If the sticky bit is set don't remove the directory */
+
+                tasks = strappend(p, "/tasks");
+                if (!tasks) {
+                        free(p);
+                        return -ENOMEM;
+                }
+
+                r = file_is_sticky(tasks);
+                free(tasks);
+
+                if (r > 0) {
+                        free(p);
+                        return 0;
+                }
+        }
+
         r = rmdir(p);
         free(p);
 
-        return r < 0 ? -errno : 0;
+        return (r < 0 && errno != ENOENT) ? -errno : 0;
 }
 
 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
@@ -302,7 +323,7 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si
                 ret = r;
 
         if (rem)
-                if ((r = cg_rmdir(controller, path)) < 0) {
+                if ((r = cg_rmdir(controller, path, true)) < 0) {
                         if (ret >= 0 &&
                             r != -ENOENT &&
                             r != -EBUSY)
@@ -466,7 +487,7 @@ int cg_migrate_recursive(const char *controller, const char *from, const char *t
                 ret = r;
 
         if (rem)
-                if ((r = cg_rmdir(controller, from)) < 0) {
+                if ((r = cg_rmdir(controller, from, true)) < 0) {
                         if (ret >= 0 &&
                             r != -ENOENT &&
                             r != -EBUSY)
@@ -543,7 +564,7 @@ int cg_trim(const char *controller, const char *path, bool delete_root) {
         if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
                 return r;
 
-        r = rm_rf(fs, true, delete_root);
+        r = rm_rf(fs, true, delete_root, true);
         free(fs);
 
         return r == -ENOENT ? 0 : r;
diff --git a/src/cgroup-util.h b/src/cgroup-util.h
index d142af3..f09373b 100644
--- a/src/cgroup-util.h
+++ b/src/cgroup-util.h
@@ -52,7 +52,7 @@ int cg_get_by_pid(const char *controller, pid_t pid, char **path);
 
 int cg_trim(const char *controller, const char *path, bool delete_root);
 
-int cg_rmdir(const char *controller, const char *path);
+int cg_rmdir(const char *controller, const char *path, bool honour_sticky);
 int cg_delete(const char *controller, const char *path);
 
 int cg_create(const char *controller, const char *path);
diff --git a/src/logind-user.c b/src/logind-user.c
index 613a5c3..56c7de4 100644
--- a/src/logind-user.c
+++ b/src/logind-user.c
@@ -409,7 +409,7 @@ static int user_remove_runtime_path(User *u) {
         if (!u->runtime_path)
                 return 0;
 
-        r = rm_rf(u->runtime_path, false, true);
+        r = rm_rf(u->runtime_path, false, true, false);
         if (r < 0)
                 log_error("Failed to remove runtime directory %s: %s", u->runtime_path, strerror(-r));
 
diff --git a/src/manager.c b/src/manager.c
index a189479..163f69c 100644
--- a/src/manager.c
+++ b/src/manager.c
@@ -3073,7 +3073,7 @@ void manager_undo_generators(Manager *m) {
                 return;
 
         strv_remove(m->lookup_paths.unit_path, m->generator_unit_path);
-        rm_rf(m->generator_unit_path, false, true);
+        rm_rf(m->generator_unit_path, false, true, false);
 
         free(m->generator_unit_path);
         m->generator_unit_path = NULL;
diff --git a/src/tmpfiles.c b/src/tmpfiles.c
index 421a915..a6b8f85 100644
--- a/src/tmpfiles.c
+++ b/src/tmpfiles.c
@@ -586,7 +586,7 @@ static int remove_item(Item *i, const char *instance) {
 
         case TRUNCATE_DIRECTORY:
         case RECURSIVE_REMOVE_PATH:
-                if ((r = rm_rf(instance, false, i->type == RECURSIVE_REMOVE_PATH)) < 0 &&
+                if ((r = rm_rf(instance, false, i->type == RECURSIVE_REMOVE_PATH, false)) < 0 &&
                     r != -ENOENT) {
                         log_error("rm_rf(%s): %s", instance, strerror(-r));
                         return r;
diff --git a/src/util.c b/src/util.c
index 017b995..ecfe450 100644
--- a/src/util.c
+++ b/src/util.c
@@ -3354,7 +3354,7 @@ int get_ctty(pid_t pid, dev_t *_devnr, char **r) {
         return 0;
 }
 
-static int rm_rf_children(int fd, bool only_dirs) {
+static int rm_rf_children(int fd, bool only_dirs, bool honour_sticky) {
         DIR *d;
         int ret = 0;
 
@@ -3371,7 +3371,7 @@ static int rm_rf_children(int fd, bool only_dirs) {
 
         for (;;) {
                 struct dirent buf, *de;
-                bool is_dir;
+                bool is_dir, keep_around = false;
                 int r;
 
                 if ((r = readdir_r(d, &buf, &de)) != 0) {
@@ -3395,9 +3395,26 @@ static int rm_rf_children(int fd, bool only_dirs) {
                                 continue;
                         }
 
+                        if (honour_sticky)
+                                keep_around = st.st_uid == 0 && (st.st_mode & S_ISVTX);
+
                         is_dir = S_ISDIR(st.st_mode);
-                } else
+
+                } else {
+                        if (honour_sticky) {
+                                struct stat st;
+
+                                if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+                                        if (ret == 0 && errno != ENOENT)
+                                                ret = -errno;
+                                        continue;
+                                }
+
+                                keep_around = st.st_uid == 0 && (st.st_mode & S_ISVTX);
+                        }
+
                         is_dir = de->d_type == DT_DIR;
+                }
 
                 if (is_dir) {
                         int subdir_fd;
@@ -3408,16 +3425,18 @@ static int rm_rf_children(int fd, bool only_dirs) {
                                 continue;
                         }
 
-                        if ((r = rm_rf_children(subdir_fd, only_dirs)) < 0) {
+                        if ((r = rm_rf_children(subdir_fd, only_dirs, honour_sticky)) < 0) {
                                 if (ret == 0)
                                         ret = r;
                         }
 
-                        if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) {
-                                if (ret == 0 && errno != ENOENT)
-                                        ret = -errno;
-                        }
-                } else  if (!only_dirs) {
+                        if (!keep_around)
+                                if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) {
+                                        if (ret == 0 && errno != ENOENT)
+                                                ret = -errno;
+                                }
+
+                } else if (!only_dirs && !keep_around) {
 
                         if (unlinkat(fd, de->d_name, 0) < 0) {
                                 if (ret == 0 && errno != ENOENT)
@@ -3431,7 +3450,7 @@ static int rm_rf_children(int fd, bool only_dirs) {
         return ret;
 }
 
-int rm_rf(const char *path, bool only_dirs, bool delete_root) {
+int rm_rf(const char *path, bool only_dirs, bool delete_root, bool honour_sticky) {
         int fd;
         int r;
 
@@ -3449,13 +3468,18 @@ int rm_rf(const char *path, bool only_dirs, bool delete_root) {
                 return 0;
         }
 
-        r = rm_rf_children(fd, only_dirs);
+        r = rm_rf_children(fd, only_dirs, honour_sticky);
+
+        if (delete_root) {
+
+                if (honour_sticky && file_is_sticky(path) > 0)
+                        return r;
 
-        if (delete_root)
                 if (rmdir(path) < 0) {
                         if (r == 0)
                                 r = -errno;
                 }
+        }
 
         return r;
 }
@@ -5674,6 +5698,18 @@ int block_get_whole_disk(dev_t d, dev_t *ret) {
         return -ENOENT;
 }
 
+int file_is_sticky(const char *p) {
+        struct stat st;
+
+        assert(p);
+
+        if (lstat(p, &st) < 0)
+                return -errno;
+
+        return
+                st.st_uid == 0 &&
+                (st.st_mode & S_ISVTX);
+}
 
 static const char *const ioprio_class_table[] = {
         [IOPRIO_CLASS_NONE] = "none",
diff --git a/src/util.h b/src/util.h
index e23f309..b81edc8 100644
--- a/src/util.h
+++ b/src/util.h
@@ -362,7 +362,7 @@ int get_ctty(pid_t, dev_t *_devnr, char **r);
 
 int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid);
 
-int rm_rf(const char *path, bool only_dirs, bool delete_root);
+int rm_rf(const char *path, bool only_dirs, bool delete_root, bool honour_sticky);
 
 int pipe_eof(int fd);
 
@@ -467,6 +467,8 @@ bool in_charset(const char *s, const char* charset);
 
 int block_get_whole_disk(dev_t d, dev_t *ret);
 
+int file_is_sticky(const char *p);
+
 #define NULSTR_FOREACH(i, l)                                    \
         for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1)
 

commit 94959f0fa0c19ae1db0e63d9a5dfc94c660825ba
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sun Aug 21 00:28:30 2011 +0200

    exec: allow passing arbitrary path names to blkio cgroup attributes
    
    If a device node is specified, then adjust the bandwidth/weight of it,
    otherwise find the backing block device of the file system the path
    refers to and adjust its bandwidth/weight.

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 6bc8bf3..ce6833b 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -859,10 +859,15 @@
                                 processes. Takes either a single
                                 weight value (between 10 and 1000) to
                                 set the default block IO weight, or a
-                                space separated pair of a device node
-                                path and a weight value to specify the
+                                space separated pair of a file path
+                                and a weight value to specify the
                                 device specific weight value (Example:
-                                "/dev/sda 500"). This controls the
+                                "/dev/sda 500"). The file path may be
+                                specified as path to a block device
+                                node or as any other file in which
+                                case the backing block device of the
+                                file system of the file is
+                                determined. This controls the
                                 <literal>blkio.weight</literal> and
                                 <literal>blkio.weight_device</literal>
                                 control group attributes, which
@@ -879,17 +884,22 @@
                                 <term><varname>BlockIOWriteBandwidth=</varname></term>
 
                                 <listitem><para>Set the per-device
-                                overall block IO bandwith limit for the
-                                executed processes. Takes a space
-                                separated pair of a device node path
-                                and a bandwith value (in bytes per
-                                second) to specify the device specific
-                                bandwidth. If the bandwith is suffixed
-                                with K, M, G, or T the specified
-                                bandwith is parsed as Kilobytes,
-                                Megabytes, Gigabytes, resp. Terabytes
-                                (Example: "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This
-                                controls the
+                                overall block IO bandwith limit for
+                                the executed processes. Takes a space
+                                separated pair of a file path and a
+                                bandwith value (in bytes per second)
+                                to specify the device specific
+                                bandwidth. The file path may be
+                                specified as path to a block device
+                                node or as any other file in which
+                                case the backing block device of the
+                                file system of the file is determined.
+                                If the bandwith is suffixed with K, M,
+                                G, or T the specified bandwith is
+                                parsed as Kilobytes, Megabytes,
+                                Gigabytes, resp. Terabytes (Example:
+                                "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0
+                                5M"). This controls the
                                 <literal>blkio.read_bps_device</literal>
                                 and
                                 <literal>blkio.write_bps_device</literal>
diff --git a/src/load-fragment.c b/src/load-fragment.c
index b122ea4..c8b4b5a 100644
--- a/src/load-fragment.c
+++ b/src/load-fragment.c
@@ -1843,6 +1843,7 @@ int config_parse_unit_device_allow(const char *filename, unsigned line, const ch
 static int blkio_map(const char *controller, const char *name, const char *value, char **ret) {
         struct stat st;
         char **l;
+        dev_t d;
 
         assert(controller);
         assert(name);
@@ -1861,13 +1862,23 @@ static int blkio_map(const char *controller, const char *name, const char *value
                 return -errno;
         }
 
-        if (!S_ISBLK(st.st_mode)) {
-                log_warning("%s is not a block device.", l[0]);
+        if (S_ISBLK(st.st_mode))
+                d = st.st_rdev;
+        else if (major(st.st_dev) != 0) {
+                /* If this is not a device node then find the block
+                 * device this file is stored on */
+                d = st.st_dev;
+
+                /* If this is a partition, try to get the originating
+                 * block device */
+                block_get_whole_disk(d, &d);
+        } else {
+                log_warning("%s is not a block device and file system block device cannot be determined or is not local.", l[0]);
                 strv_free(l);
                 return -ENODEV;
         }
 
-        if (asprintf(ret, "%u:%u %s", major(st.st_rdev), minor(st.st_rdev), l[1]) < 0) {
+        if (asprintf(ret, "%u:%u %s", major(d), minor(d), l[1]) < 0) {
                 strv_free(l);
                 return -ENOMEM;
         }
@@ -1907,7 +1918,7 @@ int config_parse_unit_blkio_weight(const char *filename, unsigned line, const ch
                 weight = l[1];
         }
 
-        if (device && !path_startswith(device, "/dev/")) {
+        if (device && !path_is_absolute(device)) {
                 log_error("[%s:%u] Failed to parse block device node value, ignoring: %s", filename, line, rvalue);
                 strv_free(l);
                 return 0;
@@ -1965,7 +1976,7 @@ int config_parse_unit_blkio_bandwidth(const char *filename, unsigned line, const
                 return 0;
         }
 
-        if (!path_startswith(l[0], "/dev/")) {
+        if (!path_is_absolute(l[0])) {
                 log_error("[%s:%u] Failed to parse block device node value, ignoring: %s", filename, line, rvalue);
                 strv_free(l);
                 return 0;
diff --git a/src/util.c b/src/util.c
index c24c749..017b995 100644
--- a/src/util.c
+++ b/src/util.c
@@ -5614,6 +5614,67 @@ bool is_main_thread(void) {
         return cached > 0;
 }
 
+int block_get_whole_disk(dev_t d, dev_t *ret) {
+        char *p, *s;
+        int r;
+        unsigned n, m;
+
+        assert(ret);
+
+        /* If it has a queue this is good enough for us */
+        if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
+                return -ENOMEM;
+
+        r = access(p, F_OK);
+        free(p);
+
+        if (r >= 0) {
+                *ret = d;
+                return 0;
+        }
+
+        /* If it is a partition find the originating device */
+        if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
+                return -ENOMEM;
+
+        r = access(p, F_OK);
+        free(p);
+
+        if (r < 0)
+                return -ENOENT;
+
+        /* Get parent dev_t */
+        if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
+                return -ENOMEM;
+
+        r = read_one_line_file(p, &s);
+        free(p);
+
+        if (r < 0)
+                return r;
+
+        r = sscanf(s, "%u:%u", &m, &n);
+        free(s);
+
+        if (r != 2)
+                return -EINVAL;
+
+        /* Only return this if it is really good enough for us. */
+        if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
+                return -ENOMEM;
+
+        r = access(p, F_OK);
+        free(p);
+
+        if (r >= 0) {
+                *ret = makedev(m, n);
+                return 0;
+        }
+
+        return -ENOENT;
+}
+
+
 static const char *const ioprio_class_table[] = {
         [IOPRIO_CLASS_NONE] = "none",
         [IOPRIO_CLASS_RT] = "realtime",
diff --git a/src/util.h b/src/util.h
index 54873e6..e23f309 100644
--- a/src/util.h
+++ b/src/util.h
@@ -465,6 +465,8 @@ bool is_main_thread(void);
 
 bool in_charset(const char *s, const char* charset);
 
+int block_get_whole_disk(dev_t d, dev_t *ret);
+
 #define NULSTR_FOREACH(i, l)                                    \
         for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1)
 

commit 9e37286844f67ca7c59e923dd27ad193dfdda7eb
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Aug 20 01:38:10 2011 +0200

    exec: add high-level controls for blkio cgroup attributes

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 89e3369..6bc8bf3 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -797,11 +797,13 @@
                                 <term><varname>CPUShares=</varname></term>
 
                                 <listitem><para>Assign the specified
-                                overall CPU time shares to the processes executed. Takes
-                                an integer value. This controls the
+                                overall CPU time shares to the
+                                processes executed. Takes an integer
+                                value. This controls the
                                 <literal>cpu.shares</literal> control
-                                group attribute. For details about
-                                this control group attribute see <ulink
+                                group attribute, which defaults to
+                                1024. For details about this control
+                                group attribute see <ulink
                                 url="http://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para></listitem>
                         </varlistentry>
 
@@ -814,7 +816,7 @@
                                 size. Takes a memory size in bytes. If
                                 the value is suffixed with K, M, G or
                                 T the specified memory size is parsed
-                                as Kilobytes, Megabytes, Gigabytes
+                                as Kilobytes, Megabytes, Gigabytes,
                                 resp. Terabytes (to the base
                                 1024). This controls the
                                 <literal>memory.limit_in_bytes</literal>
@@ -849,6 +851,57 @@
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>BlockIOWeight=</varname></term>
+
+                                <listitem><para>Set the default or
+                                per-device overall block IO weight
+                                value for the executed
+                                processes. Takes either a single
+                                weight value (between 10 and 1000) to
+                                set the default block IO weight, or a
+                                space separated pair of a device node
+                                path and a weight value to specify the
+                                device specific weight value (Example:
+                                "/dev/sda 500"). This controls the
+                                <literal>blkio.weight</literal> and
+                                <literal>blkio.weight_device</literal>
+                                control group attributes, which
+                                default to 1000. Use this option
+                                multiple times to set weights for
+                                multiple devices. For details about
+                                these control group attributes see
+                                <ulink
+                                url="http://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt">blkio-controller.txt</ulink>.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>BlockIOReadBandwidth=</varname></term>
+                                <term><varname>BlockIOWriteBandwidth=</varname></term>
+
+                                <listitem><para>Set the per-device
+                                overall block IO bandwith limit for the
+                                executed processes. Takes a space
+                                separated pair of a device node path
+                                and a bandwith value (in bytes per
+                                second) to specify the device specific
+                                bandwidth. If the bandwith is suffixed
+                                with K, M, G, or T the specified
+                                bandwith is parsed as Kilobytes,
+                                Megabytes, Gigabytes, resp. Terabytes
+                                (Example: "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This
+                                controls the
+                                <literal>blkio.read_bps_device</literal>
+                                and
+                                <literal>blkio.write_bps_device</literal>
+                                control group attributes. Use this
+                                option multiple times to set bandwith
+                                limits for multiple devices. For
+                                details about these control group
+                                attributes see <ulink
+                                url="http://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt">blkio-controller.txt</ulink>.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>ReadWriteDirectories=</varname></term>
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4
index bfa1e3d..08223c5 100644
--- a/src/load-fragment-gperf.gperf.m4
+++ b/src/load-fragment-gperf.gperf.m4
@@ -71,6 +71,9 @@ $1.MemoryLimit,                  config_parse_unit_memory_limit,     0,
 $1.MemorySoftLimit,              config_parse_unit_memory_limit,     0,                             0
 $1.DeviceAllow,                  config_parse_unit_device_allow,     0,                             0
 $1.DeviceDeny,                   config_parse_unit_device_allow,     0,                             0
+$1.BlockIOWeight,                config_parse_unit_blkio_weight,     0,                             0
+$1.BlockIOReadBandwidth,         config_parse_unit_blkio_bandwidth,  0,                             0
+$1.BlockIOWriteBandwidth,        config_parse_unit_blkio_bandwidth,  0,                             0
 $1.ReadWriteDirectories,         config_parse_path_strv,             0,                             offsetof($1, exec_context.read_write_dirs)
 $1.ReadOnlyDirectories,          config_parse_path_strv,             0,                             offsetof($1, exec_context.read_only_dirs)
 $1.InaccessibleDirectories,      config_parse_path_strv,             0,                             offsetof($1, exec_context.inaccessible_dirs)
diff --git a/src/load-fragment.c b/src/load-fragment.c
index 28439d9..b122ea4 100644
--- a/src/load-fragment.c
+++ b/src/load-fragment.c
@@ -1743,9 +1743,13 @@ int config_parse_unit_memory_limit(const char *filename, unsigned line, const ch
 }
 
 static int device_map(const char *controller, const char *name, const char *value, char **ret) {
-        struct stat st;
         char **l;
 
+        assert(controller);
+        assert(name);
+        assert(value);
+        assert(ret);
+
         l = strv_split_quoted(value);
         if (!l)
                 return -ENOMEM;
@@ -1761,7 +1765,9 @@ static int device_map(const char *controller, const char *name, const char *valu
                 }
 
         } else {
-                if (lstat(l[0], &st) < 0) {
+                struct stat st;
+
+                if (stat(l[0], &st) < 0) {
                         log_warning("Couldn't stat device %s", l[0]);
                         strv_free(l);
                         return -errno;
@@ -1834,6 +1840,163 @@ int config_parse_unit_device_allow(const char *filename, unsigned line, const ch
         return 0;
 }
 
+static int blkio_map(const char *controller, const char *name, const char *value, char **ret) {
+        struct stat st;
+        char **l;
+
+        assert(controller);
+        assert(name);
+        assert(value);
+        assert(ret);
+
+        l = strv_split_quoted(value);
+        if (!l)
+                return -ENOMEM;
+
+        assert(strv_length(l) == 2);
+
+        if (stat(l[0], &st) < 0) {
+                log_warning("Couldn't stat device %s", l[0]);
+                strv_free(l);
+                return -errno;
+        }
+
+        if (!S_ISBLK(st.st_mode)) {
+                log_warning("%s is not a block device.", l[0]);
+                strv_free(l);
+                return -ENODEV;
+        }
+
+        if (asprintf(ret, "%u:%u %s", major(st.st_rdev), minor(st.st_rdev), l[1]) < 0) {
+                strv_free(l);
+                return -ENOMEM;
+        }
+
+        strv_free(l);
+        return 0;
+}
+
+int config_parse_unit_blkio_weight(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
+        Unit *u = data;
+        int r;
+        unsigned long ul;
+        const char *device = NULL, *weight;
+        unsigned k;
+        char *t, **l;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        l = strv_split_quoted(rvalue);
+        if (!l)
+                return -ENOMEM;
+
+        k = strv_length(l);
+        if (k < 1 || k > 2) {
+                log_error("[%s:%u] Failed to parse weight value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        if (k == 1)
+                weight = l[0];
+        else {
+                device = l[0];
+                weight = l[1];
+        }
+
+        if (device && !path_startswith(device, "/dev/")) {
+                log_error("[%s:%u] Failed to parse block device node value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        if (safe_atolu(weight, &ul) < 0 || ul < 10 || ul > 1000) {
+                log_error("[%s:%u] Failed to parse block IO weight value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        if (device)
+                r = asprintf(&t, "%s %lu", device, ul);
+        else
+                r = asprintf(&t, "%lu", ul);
+        strv_free(l);
+
+        if (r < 0)
+                return -ENOMEM;
+
+        if (device)
+                r = unit_add_cgroup_attribute(u, "blkio", "blkio.weight_device", t, blkio_map);
+        else
+                r = unit_add_cgroup_attribute(u, "blkio", "blkio.weight", t, NULL);
+        free(t);
+
+        if (r < 0) {
+                log_error("[%s:%u] Failed to add cgroup attribute value, ignoring: %s", filename, line, rvalue);
+                return 0;
+        }
+
+        return 0;
+}
+
+int config_parse_unit_blkio_bandwidth(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
+        Unit *u = data;
+        int r;
+        off_t bytes;
+        unsigned k;
+        char *t, **l;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        l = strv_split_quoted(rvalue);
+        if (!l)
+                return -ENOMEM;
+
+        k = strv_length(l);
+        if (k != 2) {
+                log_error("[%s:%u] Failed to parse bandwidth value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        if (!path_startswith(l[0], "/dev/")) {
+                log_error("[%s:%u] Failed to parse block device node value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        if (parse_bytes(l[1], &bytes) < 0 || bytes <= 0) {
+                log_error("[%s:%u] Failed to parse block IO bandwith value, ignoring: %s", filename, line, rvalue);
+                strv_free(l);
+                return 0;
+        }
+
+        r = asprintf(&t, "%s %llu", l[0], (unsigned long long) bytes);
+        strv_free(l);
+
+        if (r < 0)
+                return -ENOMEM;
+
+        r = unit_add_cgroup_attribute(u, "blkio",
+                                      streq(lvalue, "BlockIOReadBandwidth") ? "blkio.read_bps_device" : "blkio.write_bps_device",
+                                      t, blkio_map);
+        free(t);
+
+        if (r < 0) {
+                log_error("[%s:%u] Failed to add cgroup attribute value, ignoring: %s", filename, line, rvalue);
+                return 0;
+        }
+
+        return 0;
+}
+
+
 #define FOLLOW_MAX 8
 
 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
diff --git a/src/load-fragment.h b/src/load-fragment.h
index 8521ca0..fbb31f9 100644
--- a/src/load-fragment.h
+++ b/src/load-fragment.h
@@ -80,6 +80,8 @@ int config_parse_unit_cgroup_attr(const char *filename, unsigned line, const cha
 int config_parse_unit_cpu_shares(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_memory_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_device_allow(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_unit_blkio_weight(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_unit_blkio_bandwidth(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);

commit d8bbda9141f03c3a2877639e7d43b5f35767840f
Author: Lennart Poettering <lennart at poettering.net>
Date:   Sat Aug 20 00:42:50 2011 +0200

    cgroup: expose cgroup attributes as unit properties on the bus

diff --git a/src/dbus-unit.c b/src/dbus-unit.c
index b110e33..611a120 100644
--- a/src/dbus-unit.c
+++ b/src/dbus-unit.c
@@ -330,6 +330,40 @@ int bus_unit_append_cgroups(DBusMessageIter *i, const char *property, void *data
         return 0;
 }
 
+int bus_unit_append_cgroup_attrs(DBusMessageIter *i, const char *property, void *data) {
+        Unit *u = data;
+        CGroupAttribute *a;
+        DBusMessageIter sub, sub2;
+
+        if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "(sss)", &sub))
+                return -ENOMEM;
+
+        LIST_FOREACH(by_unit, a, u->meta.cgroup_attributes) {
+                char *v = NULL;
+                bool success;
+
+                if (a->map_callback)
+                        a->map_callback(a->controller, a->name, a->value, &v);
+
+                success =
+                        dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) &&
+                        dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &a->controller) &&
+                        dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &a->name) &&
+                        dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, v ? &v : &a->value) &&
+                        dbus_message_iter_close_container(&sub, &sub2);
+
+                free(v);
+
+                if (!success)
+                        return -ENOMEM;
+        }
+
+        if (!dbus_message_iter_close_container(i, &sub))
+                return -ENOMEM;
+
+        return 0;
+}
+
 int bus_unit_append_need_daemon_reload(DBusMessageIter *i, const char *property, void *data) {
         Unit *u = data;
         dbus_bool_t b;
diff --git a/src/dbus-unit.h b/src/dbus-unit.h
index b5c3010..9fed6d7 100644
--- a/src/dbus-unit.h
+++ b/src/dbus-unit.h
@@ -109,6 +109,7 @@
         "  <property name=\"IgnoreOnSnapshot\" type=\"b\" access=\"read\"/>\n" \
         "  <property name=\"DefaultControlGroup\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"ControlGroup\" type=\"as\" access=\"read\"/>\n" \
+        "  <property name=\"ControlGroupAttributes\" type=\"a(sss)\" access=\"read\"/>\n" \
         "  <property name=\"NeedDaemonReload\" type=\"b\" access=\"read\"/>\n" \
         "  <property name=\"JobTimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"ConditionTimestamp\" type=\"t\" access=\"read\"/>\n" \
@@ -169,6 +170,7 @@
         { "org.freedesktop.systemd1.Unit", "IgnoreOnSnapshot",     bus_property_append_bool,       "b",    &u->meta.ignore_on_snapshot       }, \
         { "org.freedesktop.systemd1.Unit", "DefaultControlGroup",  bus_unit_append_default_cgroup, "s",    u                                 }, \
         { "org.freedesktop.systemd1.Unit", "ControlGroup",         bus_unit_append_cgroups,        "as",   u                                 }, \
+        { "org.freedesktop.systemd1.Unit", "ControlGroupAttributes", bus_unit_append_cgroup_attrs, "a(sss)", u                               }, \
         { "org.freedesktop.systemd1.Unit", "NeedDaemonReload",     bus_unit_append_need_daemon_reload, "b", u                                }, \
         { "org.freedesktop.systemd1.Unit", "JobTimeoutUSec",       bus_property_append_usec,       "t",    &u->meta.job_timeout              }, \
         { "org.freedesktop.systemd1.Unit", "ConditionTimestamp",   bus_property_append_usec,       "t",    &u->meta.condition_timestamp.realtime }, \
@@ -191,6 +193,7 @@ int bus_unit_append_can_isolate(DBusMessageIter *i, const char *property, void *
 int bus_unit_append_job(DBusMessageIter *i, const char *property, void *data);
 int bus_unit_append_default_cgroup(DBusMessageIter *i, const char *property, void *data);
 int bus_unit_append_cgroups(DBusMessageIter *i, const char *property, void *data);
+int bus_unit_append_cgroup_attrs(DBusMessageIter *i, const char *property, void *data);
 int bus_unit_append_need_daemon_reload(DBusMessageIter *i, const char *property, void *data);
 int bus_unit_append_load_error(DBusMessageIter *i, const char *property, void *data);
 
diff --git a/src/systemctl.c b/src/systemctl.c
index bb998d3..604400d 100644
--- a/src/systemctl.c
+++ b/src/systemctl.c
@@ -2546,6 +2546,30 @@ static int print_property(const char *name, DBusMessageIter *iter) {
 
                         return 0;
 
+                } else if (dbus_message_iter_get_element_type(iter) == DBUS_TYPE_STRUCT && streq(name, "ControlGroupAttributes")) {
+                        DBusMessageIter sub, sub2;
+
+                        dbus_message_iter_recurse(iter, &sub);
+                        while (dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_STRUCT) {
+                                const char *controller, *attr, *value;
+
+                                dbus_message_iter_recurse(&sub, &sub2);
+
+                                if (bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &controller, true) >= 0 &&
+                                    bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &attr, true) >= 0 &&
+                                    bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &value, false) >= 0) {
+
+                                        printf("ControlGroupAttribute={ controller=%s ; attribute=%s ; value=\"%s\" }\n",
+                                               controller,
+                                               attr,
+                                               value);
+                                }
+
+                                dbus_message_iter_next(&sub);
+                        }
+
+                        return 0;
+
                 } else if (dbus_message_iter_get_element_type(iter) == DBUS_TYPE_STRUCT && startswith(name, "Exec")) {
                         DBusMessageIter sub;
 
diff --git a/src/unit.c b/src/unit.c
index 5006742..e0f4a1b 100644
--- a/src/unit.c
+++ b/src/unit.c
@@ -686,9 +686,17 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                         fprintf(f, "%s\tControlGroup: %s:%s\n",
                                 prefix, b->controller, b->path);
 
-                LIST_FOREACH(by_unit, a, u->meta.cgroup_attributes)
+                LIST_FOREACH(by_unit, a, u->meta.cgroup_attributes) {
+                        char *v = NULL;
+
+                        if (a->map_callback)
+                                a->map_callback(a->controller, a->name, a->value, &v);
+
                         fprintf(f, "%s\tControlGroupAttribute: %s %s \"%s\"\n",
-                                prefix, a->controller, a->name, a->value);
+                                prefix, a->controller, a->name, v ? v : a->value);
+
+                        free(v);
+                }
 
                 if (UNIT_VTABLE(u)->dump)
                         UNIT_VTABLE(u)->dump(u, f, prefix2);



More information about the systemd-commits mailing list