[systemd-devel] [PATCH V1] Add L3 cache allocation settings in systemd
Zhangyanfei (YF)
yanfei.zhang at huawei.com
Sat Jun 30 01:44:37 UTC 2018
Hello,
I am sorry I can only send the patches by using email because of some
security reasons and the limit of my workspace.
================
>From 962eeb1869fb033d04074df0f992a6588e97164e Mon Sep 17 00:00:00 2001
From: Yanfei Zhang <yanfei.zhang at huawei.com>
Date: Fri, 8 Jun 2018 03:00:53 -0400
Subject: [PATCH] Add L3 cache allocation settings in systemd
The patch tries to add L3 cache allocation settings in systemd.
L3 cache allocation control is supported by new intel cpu and
is exposed by a new filesystem named resctrl. For detail information,
please refer to https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt.
The patch adds the following things:
1. Mount resctrl when systemd starts.
2. Add two config items for L3 cache allocation control
- L3CacheAllocationSize=XX
L3CacheAllocationSize is used to indicates how many caches
the group can use at most, but note the value will be round
up to the N * min_granularity. For example, if L3 cache size
is 30M and can be divided into 10 ways, so each way is 3M.
L3CacheAllocationSize=10M means the group will have 4 ways
(4 * 3M > 10M) of the cache.
- L3CacheAllocationIds=XX
L3CacheAllocationIds controls the cache id for this group.
If we have two cpu sockets and we will have two L3 caches.
We can use this setting to indicate which socket cache we
want to control.
3. Create new directory in resctrl and initialize the schemata
when systemd detects the above two configs in a unit. The directory
name is the same as the unit.
4. The settings can be applied to all units just like cgroup setting.
Signed-off-by: Yanfei Zhang <yanfei.zhang at huawei.com>
---
src/basic/exit-status.h | 1 +
src/basic/parse-util.c | 22 +
src/basic/parse-util.h | 6 +
src/core/dbus-resctrl.c | 70 +++
src/core/dbus-resctrl.h | 25 +
src/core/dbus-slice.c | 6 +
src/core/execute.c | 11 +
src/core/load-fragment-gperf.gperf.m4 | 10 +
src/core/load-fragment.c | 43 ++
src/core/load-fragment.h | 1 +
src/core/main.c | 1 +
src/core/meson.build | 4 +
src/core/mount-setup.c | 2 +
src/core/mount.c | 2 +
src/core/mount.h | 1 +
src/core/resctrl.c | 743 ++++++++++++++++++++++++++
src/core/resctrl.h | 90 ++++
src/core/scope.c | 2 +
src/core/scope.h | 1 +
src/core/service.c | 6 +-
src/core/service.h | 1 +
src/core/slice.c | 3 +
src/core/slice.h | 1 +
src/core/socket.c | 2 +
src/core/socket.h | 1 +
src/core/swap.c | 2 +
src/core/swap.h | 1 +
src/core/unit.c | 25 +
src/core/unit.h | 6 +
29 files changed, 1088 insertions(+), 1 deletion(-)
create mode 100644 src/core/dbus-resctrl.c
create mode 100644 src/core/dbus-resctrl.h
create mode 100644 src/core/resctrl.c
create mode 100644 src/core/resctrl.h
diff --git a/src/basic/exit-status.h b/src/basic/exit-status.h
index c41e8b8..fbd3fee 100644
--- a/src/basic/exit-status.h
+++ b/src/basic/exit-status.h
@@ -69,6 +69,7 @@ enum {
EXIT_CACHE_DIRECTORY,
EXIT_LOGS_DIRECTORY, /* 240 */
EXIT_CONFIGURATION_DIRECTORY,
+ EXIT_RESCTRL_WRITE_PID,
};
typedef enum ExitStatusLevel {
diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c
index 6becf85..dd23ca3 100644
--- a/src/basic/parse-util.c
+++ b/src/basic/parse-util.c
@@ -453,6 +453,28 @@ int safe_atollu(const char *s, long long unsigned *ret_llu) {
return 0;
}
+int safe_atollx(const char *s, long long unsigned *ret_llx) {
+ char *x = NULL;
+ unsigned long long l;
+
+ assert(s);
+ assert(ret_llx);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(s, &x, 16);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (*s == '-')
+ return -ERANGE;
+
+ *ret_llx = l;
+ return 0;
+}
+
int safe_atolli(const char *s, long long int *ret_lli) {
char *x = NULL;
long long l;
diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h
index f3267f4..8ebed40 100644
--- a/src/basic/parse-util.h
+++ b/src/basic/parse-util.h
@@ -34,6 +34,7 @@ static inline int safe_atou(const char *s, unsigned *ret_u) {
int safe_atoi(const char *s, int *ret_i);
int safe_atollu(const char *s, unsigned long long *ret_u);
+int safe_atollx(const char *s, unsigned long long *ret_x);
int safe_atolli(const char *s, long long int *ret_i);
int safe_atou8(const char *s, uint8_t *ret);
@@ -65,6 +66,11 @@ static inline int safe_atou64(const char *s, uint64_t *ret_u) {
return safe_atollu(s, (unsigned long long*) ret_u);
}
+static inline int safe_atox64(const char *s, uint64_t *ret_x) {
+ assert_cc(sizeof(uint64_t) == sizeof(unsigned long long));
+ return safe_atollx(s, (unsigned long long*) ret_x);
+}
+
static inline int safe_atoi64(const char *s, int64_t *ret_i) {
assert_cc(sizeof(int64_t) == sizeof(long long int));
return safe_atolli(s, (long long int*) ret_i);
diff --git a/src/core/dbus-resctrl.c b/src/core/dbus-resctrl.c
new file mode 100644
index 0000000..d7eb842
--- /dev/null
+++ b/src/core/dbus-resctrl.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ This file is part of systemd.
+
+ Copyright 2013 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "dbus-resctrl.h"
+#include "string-util.h"
+#include "parse-util.h"
+
+int bus_resctrl_set_property(
+ Unit *u,
+ ResctrlContext *r,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int ret;
+ const char *str;
+ uint64_t l3_size;
+ char *tmp;
+
+ assert(u);
+ assert(r);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ ret = sd_bus_message_read(message, "s", &str);
+ if (ret < 0)
+ return ret;
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return 1;
+
+ if (streq(name, "L3CacheAllocationSize")) {
+ ret = parse_size(str, 1024, &l3_size);
+ if (ret < 0)
+ return ret;
+
+ r->l3_size = l3_size;
+ unit_write_settingf(u, flags, name, "%s=%lu", name, r->l3_size);
+ } else if (streq(name, "L3CacheAllocationIds")) {
+ tmp = strdup(str);
+ if (tmp == NULL)
+ return -ENOMEM;
+ if (r->l3_ids)
+ free(r->l3_ids);
+ r->l3_ids = tmp;
+ unit_write_settingf(u, flags, name, "%s=%s", name, r->l3_ids);
+ }
+
+ return 1;
+}
diff --git a/src/core/dbus-resctrl.h b/src/core/dbus-resctrl.h
new file mode 100644
index 0000000..5435677
--- /dev/null
+++ b/src/core/dbus-resctrl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2013 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "resctrl.h"
+
+int bus_resctrl_set_property(Unit *u, ResctrlContext *r, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c
index 722a568..2f537e1 100644
--- a/src/core/dbus-slice.c
+++ b/src/core/dbus-slice.c
@@ -1,9 +1,11 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include "dbus-cgroup.h"
+#include "dbus-resctrl.h"
#include "dbus-slice.h"
#include "slice.h"
#include "unit.h"
+#include "string-util.h"
const sd_bus_vtable bus_slice_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -22,6 +24,9 @@ int bus_slice_set_property(
assert(name);
assert(u);
+ if (streq(name, "L3CacheAllocationSize") || streq(name, "L3CacheAllocationIds"))
+ return bus_resctrl_set_property(u, &s->resctrl_context, name, message, flags, error);
+
return bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
}
@@ -30,6 +35,7 @@ int bus_slice_commit_properties(Unit *u) {
unit_update_cgroup_members_masks(u);
unit_realize_cgroup(u);
+ unit_realize_resctrl(u);
return 0;
}
diff --git a/src/core/execute.c b/src/core/execute.c
index 8ac69d1..bc79070 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -93,6 +93,7 @@
#include "user-util.h"
#include "util.h"
#include "utmp-wtmp.h"
+#include "resctrl.h"
#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
@@ -2751,6 +2752,7 @@ static int exec_child(
size_t n_fds;
ExecDirectoryType dt;
int secure_bits;
+ ResctrlContext *resctrl_context;
assert(unit);
assert(command);
@@ -2947,6 +2949,15 @@ static int exec_child(
}
}
+ resctrl_context = unit_get_resctrl_context(unit);
+ if (resctrl_context && resctrl_context->group) {
+ r = resctrl_alloc_task_write(resctrl_context->group, getpid());
+ if (r < 0) {
+ *exit_status = EXIT_RESCTRL_WRITE_PID;
+ return -errno;
+ }
+ }
+
if (context->nice_set)
if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
*exit_status = EXIT_NICE;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 15fb478..f2cb4c7 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -191,6 +191,10 @@ $1.IPAddressAllow, config_parse_ip_address_access, 0,
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
)m4_dnl
+m4_define(`RESCTRL_CONTEXT_CONFIG_ITEMS',
+`$1.L3CacheAllocationSize, config_parse_resctrl, 0, offsetof($1, resctrl_context)
+$1.L3CacheAllocationIds, config_parse_resctrl, 0, offsetof($1, resctrl_context)'
+)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
Unit.SourcePath, config_parse_unit_path_printf, 0, offsetof(Unit, source_path)
@@ -321,6 +325,7 @@ Service.USBFunctionDescriptors, config_parse_unit_path_printf, 0,
Service.USBFunctionStrings, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_strings)
EXEC_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
m4_dnl
Socket.ListenStream, config_parse_socket_listen, SOCKET_SOCKET, 0
@@ -388,6 +393,7 @@ m4_ifdef(`HAVE_SELINUX',
`Socket.SELinuxContextFromNet, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
EXEC_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
m4_dnl
Mount.What, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.what)
@@ -401,6 +407,7 @@ Mount.LazyUnmount, config_parse_bool, 0,
Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount)
EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
m4_dnl
Automount.Where, config_parse_unit_path_printf, 0, offsetof(Automount, where)
@@ -413,6 +420,7 @@ Swap.Options, config_parse_unit_string_printf, 0,
Swap.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Swap, timeout_usec)
EXEC_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
m4_dnl
Timer.OnCalendar, config_parse_timer, 0, 0
@@ -438,8 +446,10 @@ Path.MakeDirectory, config_parse_bool, 0,
Path.DirectoryMode, config_parse_mode, 0, offsetof(Path, directory_mode)
m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Slice)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Slice)m4_dnl
m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
+RESCTRL_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
Scope.TimeoutStopSec, config_parse_sec, 0, offsetof(Scope, timeout_stop_usec)
m4_dnl The [Install] section is ignored here.
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 94c7d33..ba2b064 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -57,6 +57,49 @@
#include "user-util.h"
#include "web-util.h"
+int config_parse_resctrl(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ResctrlContext *c = data;
+ Unit *u = userdata;
+ char *tmp;
+ uint64_t v;
+ int r;
+
+ log_debug("%s=%s\n", lvalue, rvalue);
+ if (streq(lvalue, "L3CacheAllocationSize")) {
+ r = parse_size(rvalue, 1024, &v);
+ if (r < 0 || (uint64_t) (size_t) v != v) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue);
+ return 0;
+ }
+ c->l3_size = v;
+ } else if (streq(lvalue, "L3CacheAllocationIds")) {
+ tmp = strdup(rvalue);
+ if (!tmp)
+ return log_oom();
+ if (c->l3_ids)
+ free(c->l3_ids);
+ c->l3_ids = tmp;
+ } else {
+ return -EINVAL;
+ }
+
+ /* ignore error here if failed, let unit restart do another try */
+ unit_realize_resctrl(u);
+
+ return 0;
+}
+
static int supported_socket_protocol_from_string(const char *s) {
int r;
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index dad281e..9dff798 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -10,6 +10,7 @@ int unit_load_fragment(Unit *u);
void unit_dump_config_items(FILE *f);
+int config_parse_resctrl(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
CONFIG_PARSER_PROTOTYPE(config_parse_unit_deps);
CONFIG_PARSER_PROTOTYPE(config_parse_obsolete_unit_deps);
CONFIG_PARSER_PROTOTYPE(config_parse_unit_string_printf);
diff --git a/src/core/main.c b/src/core/main.c
index 44dd834..4abb4da 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1821,6 +1821,7 @@ static int initialize_runtime(
*ret_error_message = "Failed to mount cgroup hierarchies";
return r;
}
+ (void)resctrl_get_info();
status_welcome();
hostname_setup();
diff --git a/src/core/meson.build b/src/core/meson.build
index 3852c5e..dec17ae 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -9,12 +9,16 @@ libcore_la_sources = '''
bpf-firewall.h
cgroup.c
cgroup.h
+ resctrl.c
+ resctrl.h
chown-recursive.c
chown-recursive.h
dbus-automount.c
dbus-automount.h
dbus-cgroup.c
dbus-cgroup.h
+ dbus-resctrl.c
+ dbus-resctrl.h
dbus-device.c
dbus-device.h
dbus-execute.c
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 16880e6..2f93f15 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -103,6 +103,8 @@ static const MountPoint mount_table[] = {
#endif
{ "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE, },
+ { "resctrl", "/sys/fs/resctrl", "resctrl", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE, },
};
/* These are API file systems that might be mounted by other software,
diff --git a/src/core/mount.c b/src/core/mount.c
index 21437da..4ad3639 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -742,6 +742,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&m->exec_context, f, prefix);
kill_context_dump(&m->kill_context, f, prefix);
cgroup_context_dump(&m->cgroup_context, f, prefix);
+ resctrl_context_dump(&m->resctrl_context, f, prefix);
}
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
@@ -1946,6 +1947,7 @@ const UnitVTable mount_vtable = {
.object_size = sizeof(Mount),
.exec_context_offset = offsetof(Mount, exec_context),
.cgroup_context_offset = offsetof(Mount, cgroup_context),
+ .resctrl_context_offset = offsetof(Mount, resctrl_context),
.kill_context_offset = offsetof(Mount, kill_context),
.exec_runtime_offset = offsetof(Mount, exec_runtime),
.dynamic_creds_offset = offsetof(Mount, dynamic_creds),
diff --git a/src/core/mount.h b/src/core/mount.h
index 67ab8ec..f61dc96 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -68,6 +68,7 @@ struct Mount {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
ExecRuntime *exec_runtime;
DynamicCreds dynamic_creds;
diff --git a/src/core/resctrl.c b/src/core/resctrl.c
new file mode 100644
index 0000000..61bd483
--- /dev/null
+++ b/src/core/resctrl.c
@@ -0,0 +1,743 @@
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <limits.h>
+#include "resctrl.h"
+#include "string-util.h"
+#include "fileio.h"
+#include "dirent-util.h"
+#include "parse-util.h"
+#include "fd-util.h"
+#include "extract-word.h"
+#include "strv.h"
+
+static int resctrl_lock(void) {
+ int fd;
+
+ fd = open(RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
+ if (fd < 0) {
+ log_warning("failed to open %s: %m", RESCTRL_PATH);
+ return -1;
+ }
+
+ if (flock(fd, LOCK_EX) < 0) {
+ log_warning("failed to lock %s: %m", RESCTRL_PATH);
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+
+static int resctrl_unlock(int fd) {
+ if (fd == -1)
+ return 0;
+
+ /* close fd will unlock */
+ if (close(fd) < 0) {
+ log_warning("failed to close %s: %m", RESCTRL_PATH);
+
+ /* Trying to unlock again */
+ if (flock(fd, LOCK_UN) < 0)
+ log_warning("failed to unlock %s: %m", RESCTRL_PATH);
+ return -1;
+ }
+ return 0;
+}
+
+static int resctrl_parse_l3_schemata(const char *group_name,
+ ResctrlAllocSchemata *used,
+ char *schema, unsigned int *max_id) {
+ char **s;
+ _cleanup_strv_free_ char **word = NULL;
+ ResctrlL3AllocInfo *l3_a = used ? &used->l3_a : NULL;
+ char *id, *mask;
+ unsigned int id_val;
+ uint64_t mask_val;
+ int ret;
+ unsigned tmp = 0;
+
+ word = strv_split(schema, ";");
+ STRV_FOREACH(s, word) {
+ log_debug("%s: L3 schemata: %s", group_name, *s);
+ if (!l3_a) {
+ tmp++;
+ continue;
+ }
+ ret = extract_many_words((const char **)s, "=", 0, &id, &mask, NULL);
+ if (ret != 2)
+ return -1;
+ ret = safe_atou(id, &id_val);
+ if (ret < 0)
+ return ret;
+ ret = safe_atox64(mask, &mask_val);
+ if (ret < 0)
+ return ret;
+ l3_a->bits_mask[id_val] |= mask_val;
+ }
+ if (!l3_a)
+ *max_id = tmp;
+ return 0;
+}
+
+static int resctrl_alloc_schemata_read(const char *group_name,
+ ResctrlAllocSchemata *used,
+ unsigned int *max_id) {
+ _cleanup_free_ char *file = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char line[LINE_MAX];
+ int ret;
+
+ file = strjoin(RESCTRL_PATH, "/", group_name, "/schemata");
+ if (!file)
+ return -ENOMEM;
+
+ f = fopen(file, "re");
+ if (!f)
+ return log_warning_errno(errno, "Cannot open %s: %m", file);
+
+ FOREACH_LINE(line, f, goto read_fail) {
+ char *c, *type, *schema;
+
+ c = skip_leading_chars(line, NULL);
+ c = truncate_nl(c);
+ if (extract_many_words((const char **)&c, ":", 0, &type, &schema, NULL) != 2)
+ continue;
+
+ if (streq(type, "L3")) {
+ log_debug("%s: L3 schemata: %s", group_name, schema);
+ ret = resctrl_parse_l3_schemata(group_name, used, schema, max_id);
+ if (ret < 0) {
+ log_warning("Failed to parse %s schemata", group_name);
+ return ret;
+ }
+ }
+ }
+ return 0;
+read_fail:
+ return log_warning_errno(errno, "Failed to read %s: %m", file);
+}
+
+static void resctrl_alloc_schemata_sub(ResctrlAllocSchemata *sfree,
+ ResctrlAllocSchemata *used) {
+ ResctrlL3AllocInfo *l3_as = &sfree->l3_a;
+ ResctrlL3AllocInfo *l3_au = &used->l3_a;
+ unsigned int i;
+
+ for (i = 0; i < l3_as->max_ids; i++)
+ l3_as->bits_mask[i] ^= l3_au->bits_mask[i];
+}
+
+static ResctrlAllocSchemata *resctrl_get_alloc_schemata(void) {
+ ResctrlAllocSchemata *alloc = NULL;
+ ResctrlL3AllocInfo *l3_a = NULL;
+
+ alloc = new0(ResctrlAllocSchemata, 1);
+ if (!alloc)
+ return NULL;
+
+ l3_a = &alloc->l3_a;
+ l3_a->max_ids = rinfo->l3_info.max_ids;
+ l3_a->bits_mask = new0(uint64_t, l3_a->max_ids);
+ if (!l3_a->bits_mask)
+ return mfree(alloc);
+
+ return alloc;
+}
+
+static void resctrl_put_alloc_schemata(ResctrlAllocSchemata *alloc) {
+ ResctrlL3AllocInfo *l3_a;
+
+ if (!alloc)
+ return;
+ l3_a = &alloc->l3_a;
+ if (l3_a->bits_mask)
+ free(l3_a->bits_mask);
+ free(alloc);
+}
+
+static int resctrl_alloc_get_unused(const char *group_name,
+ ResctrlAllocSchemata *sfree) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int ret;
+ ResctrlAllocSchemata *used;
+ unsigned int i;
+
+ d = opendir(RESCTRL_PATH);
+ if (!d)
+ return errno == ENOENT ? 0 : -errno;
+
+ used = resctrl_get_alloc_schemata();
+ if (!used)
+ return -ENOMEM;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ if (dirent_is_file(de))
+ continue;
+ if (streq(de->d_name, "info") || streq(de->d_name, "mon_data") ||
+ streq(de->d_name, "mon_groups") || streq(de->d_name, group_name))
+ continue;
+ ret = resctrl_alloc_schemata_read(de->d_name, used, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ resctrl_alloc_schemata_sub(sfree, used);
+
+ for (i = 0; i < sfree->l3_a.max_ids; i++)
+ log_debug("resctrl group %s: L3 free schemata: id=%u, schema=%lx",
+ group_name, i, sfree->l3_a.bits_mask[i]);
+
+ resctrl_put_alloc_schemata(used);
+ return 0;
+}
+
+static int resctrl_alloc_l3_count_need_bits(uint64_t request_size,
+ unsigned int *bits) {
+ if (request_size > rinfo->l3_info.cache_bytes || !request_size)
+ return -1;
+ *bits = DIV_ROUND_UP(request_size, rinfo->l3_info.granularity);
+ return 0;
+}
+
+static int resctrl_alloc_l3_set_mask(ResctrlAllocSchemata *alloc_free,
+ ResctrlAllocSchemata *alloc_to,
+ unsigned *ids_v, unsigned bits) {
+ ResctrlL3AllocInfo *l3_af = &alloc_free->l3_a;
+ ResctrlL3AllocInfo *l3_at = &alloc_to->l3_a;
+ unsigned int i, found, tmp_bits;
+ unsigned int max_bits = rinfo->l3_info.cbm_bits;
+ uint64_t mask;
+
+ for (i = 0; i < l3_af->max_ids; i++) {
+ if (ids_v && !ids_v[i])
+ continue;
+ mask = l3_af->bits_mask[i];
+ tmp_bits = max_bits;
+ found = 0;
+ while (tmp_bits) {
+ if (mask & 1) {
+ found++;
+ l3_at->bits_mask[i] |= (1UL << (max_bits - tmp_bits));
+ if (found == bits)
+ break;
+ } else {
+ found = 0;
+ l3_at->bits_mask[i] = 0;
+ }
+ mask >>= 1;
+ tmp_bits--;
+ }
+ if (found != bits)
+ return -1;
+ }
+ return 0;
+}
+
+static inline int resctrl_set_ids(unsigned int *ids_v, char *s, char *e, unsigned max_id) {
+ unsigned int i;
+ int ret;
+ unsigned int sid, eid;
+
+ ret = safe_atou(s, &sid);
+ if (ret < 0 || sid >= max_id) {
+ log_warning("wrong id %s\n", s);
+ return -1;
+ }
+ if (s == e) {
+ eid = sid;
+ } else {
+ ret = safe_atou(e, &eid);
+ if (ret < 0 || eid >= max_id) {
+ log_warning("wrong id %s\n", e);
+ return -1;
+ }
+ }
+ for (i = sid; i <= eid; i++) {
+ log_debug("id %u set!!!\n", i);
+ ids_v[i] = 1;
+ }
+ return 0;
+}
+
+static int resctrl_parse_ids(const char *ids, unsigned int *ids_v,
+ unsigned int max_id) {
+ char **s;
+ _cleanup_strv_free_ char **word = NULL;
+ char *start, *end;
+ int ret;
+
+ word = strv_split(ids, ",");
+ STRV_FOREACH(s, word) {
+ log_debug("id: %s", *s);
+ ret = extract_many_words((const char **)s, "-", 0, &start, &end, NULL);
+ if (ret == 2)
+ ret = resctrl_set_ids(ids_v, start, end, max_id);
+ else if (ret == 1)
+ ret = resctrl_set_ids(ids_v, start, start, max_id);
+ else
+ ret = -1;
+ if (ret < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int resctrl_alloc_l3_schemata_reserve(const char *group_name,
+ ResctrlAllocSchemata *alloc_free,
+ ResctrlAllocSchemata *alloc_to,
+ const char *ids, uint64_t request_size) {
+ ResctrlL3AllocInfo *l3_a = &alloc_free->l3_a;
+ int ret = 0;
+ unsigned int *ids_v = NULL;
+ unsigned int i;
+ unsigned int bits;
+
+ if (ids) {
+ ids_v = new0(unsigned int, l3_a->max_ids);
+ if (!ids_v)
+ return -ENOMEM;
+ ret = resctrl_parse_ids(ids, ids_v, l3_a->max_ids);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = resctrl_alloc_l3_count_need_bits(request_size, &bits);
+ if (ret < 0)
+ goto out;
+ log_debug("resctrl group %s: l3 request bits %u\n", group_name, bits);
+
+ ret = resctrl_alloc_l3_set_mask(alloc_free, alloc_to, ids_v, bits);
+ if (ret < 0) {
+ log_warning("resctrl group %s: No enough free schemata\n",
+ group_name);
+ goto out;
+ }
+
+ for (i = 0; i < l3_a->max_ids; i++)
+ log_debug("resctrl group %s: L3 new schemata: id=%u, schema=%lx",
+ group_name, i, alloc_to->l3_a.bits_mask[i]);
+
+out:
+ if (ids_v)
+ free(ids_v);
+ return ret;
+}
+
+static int resctrl_alloc_schemata_write(const char *group_name,
+ ResctrlAllocSchemata *alloc_to) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ ResctrlL3AllocInfo *l3_a = &alloc_to->l3_a;
+ char schema[LINE_MAX] = { 0 };
+ char *p;
+ unsigned int i;
+ uint64_t mask;
+
+ path = strjoin(RESCTRL_PATH, "/", group_name, "/schemata");
+ if (!path)
+ return -ENOMEM;
+
+ f = fopen(path, "w");
+ if (!f)
+ return -1;
+
+ p = schema;
+ strcpy(p, "L3:");
+ for (i = 0; i < l3_a->max_ids; i++) {
+ mask = l3_a->bits_mask[i];
+ if (mask == 0)
+ mask = rinfo->l3_info.cbm_mask;
+ p += strlen(p);
+ sprintf(p, "%u=%lx;", i, mask);
+ }
+ p = schema;
+ p = delete_trailing_chars(p, ";");
+ log_debug("resctrl group %s: L3 schemata=%s\n", group_name, p);
+
+ /* Write schemata to file */
+ if (fprintf(f, "%s\n", p) < 0) {
+ return log_warning_errno(errno, "resctrl group %s: failed to "
+ "write schemata %m", group_name);
+ }
+ return 0;
+}
+
+static int resctrl_alloc_schemata_set(const char *group_name,
+ ResctrlContext *r) {
+ ResctrlAllocSchemata *alloc_free = NULL;
+ ResctrlAllocSchemata *alloc_to = NULL;
+ ResctrlL3AllocInfo *l3_a = NULL;
+ int ret;
+ unsigned i;
+
+ alloc_free = resctrl_get_alloc_schemata();
+ if (!alloc_free)
+ return -ENOMEM;
+ l3_a = &alloc_free->l3_a;
+ for (i = 0; i < l3_a->max_ids; i++)
+ l3_a->bits_mask[i] = rinfo->l3_info.cbm_mask;
+
+ ret = resctrl_alloc_get_unused(group_name, alloc_free);
+ if (ret < 0) {
+ log_error("resctrl group %s: Failed to get free schemata\n",
+ group_name);
+ goto out;
+ }
+
+ alloc_to = resctrl_get_alloc_schemata();
+ if (!alloc_to) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* reserve schemata mask for L3 */
+ if (r->l3_size != 0) {
+ ret = resctrl_alloc_l3_schemata_reserve(group_name, alloc_free, alloc_to,
+ r->l3_ids, r->l3_size);
+ if (ret < 0) {
+ log_error("resctrl group %s: Failed to reserve L3 schemata\n",
+ group_name);
+ goto out;
+ }
+ }
+
+ /* write mask into schemata file */
+ ret = resctrl_alloc_schemata_write(group_name, alloc_to);
+ if (ret < 0) {
+ log_error("resctrl group %s: Failed to write schemata\n",
+ group_name);
+ goto out;
+ }
+out:
+ resctrl_put_alloc_schemata(alloc_to);
+ resctrl_put_alloc_schemata(alloc_free);
+ return ret;
+}
+
+static int resctrl_get_l3_cache_size(ResctrlInfoL3 *l3_info) {
+ _cleanup_free_ char *contents = NULL;
+ const char *cache_file = "/sys/devices/system/cpu/cpu0/cache/index3/size";
+ int ret;
+
+ ret = read_one_line_file(cache_file, &contents);
+ if (ret < 0)
+ return log_warning_errno(errno, "Cannot open %s: %m", cache_file);
+
+ return parse_size(contents, 1024, &l3_info->cache_bytes);
+}
+
+static int resctrl_get_l3_cache_max_id(ResctrlInfoL3 *l3_info) {
+ int ret;
+ unsigned int max_id = 0;
+
+ ret = resctrl_alloc_schemata_read(".", NULL, &max_id);
+ if (ret < 0)
+ return ret;
+
+ l3_info->max_ids = max_id;
+ return 0;
+}
+
+static void resctrl_cal_mask_bits(ResctrlInfoL3 *l3_info) {
+ uint64_t tmp_mask;
+
+ tmp_mask = l3_info->cbm_mask;
+ l3_info->cbm_bits = 0;
+ while (tmp_mask) {
+ tmp_mask = tmp_mask >> 1;
+ l3_info->cbm_bits++;
+ }
+}
+
+static int resctrl_get_l3_info(ResctrlInfoL3 *l3_info, const char *type) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *contents = NULL;
+ int ret;
+
+ path = strjoin(RESCTRL_PATH_INFO, "/", type, "/num_closids");
+ if (!path)
+ return -ENOMEM;
+ ret = read_one_line_file(path, &contents);
+ if (ret < 0)
+ return ret;
+ ret = safe_atou(contents, &l3_info->num_closids);
+ if (ret < 0)
+ return ret;
+
+ path = mfree(path);
+ contents = mfree(contents);
+
+ path = strjoin(RESCTRL_PATH_INFO, "/", type, "/min_cbm_bits");
+ if (!path)
+ return -ENOMEM;
+ ret = read_one_line_file(path, &contents);
+ if (ret < 0)
+ return ret;
+ ret = safe_atou(contents, &l3_info->min_cbm_bits);
+ if (ret < 0)
+ return ret;
+
+ path = mfree(path);
+ contents = mfree(contents);
+
+ path = strjoin(RESCTRL_PATH_INFO, "/", type, "/cbm_mask");
+ if (!path) {
+ return -ENOMEM;
+ }
+ ret = read_one_line_file(path, &contents);
+ if (ret < 0)
+ return ret;
+ ret = safe_atox64(contents, &l3_info->cbm_mask);
+ if (ret < 0)
+ return ret;
+
+ ret = resctrl_get_l3_cache_size(l3_info);
+ if (ret < 0)
+ return ret;
+ resctrl_cal_mask_bits(l3_info);
+
+ /* get gran */
+ l3_info->granularity = l3_info->cache_bytes / l3_info->cbm_bits;
+
+ ret = resctrl_get_l3_cache_max_id(l3_info);
+ if (ret < 0)
+ return ret;
+
+ log_info("resctrl l3 cache info: num_closids=%u, "
+ "min_cbm_bits=%u, cbm_mask=%lx, cache_bytes=%lu, "
+ "cbm_bits=%u, granularity=%lu, max_ids=%u\n",
+ l3_info->num_closids, l3_info->min_cbm_bits,
+ l3_info->cbm_mask, l3_info->cache_bytes,
+ l3_info->cbm_bits, l3_info->granularity,
+ l3_info->max_ids);
+
+ return 0;
+}
+
+ResctrlInfo *rinfo;
+
+int resctrl_get_info(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ log_debug("get resctrl info %s\n", RESCTRL_PATH_INFO);
+ d = opendir(RESCTRL_PATH_INFO);
+ if (!d)
+ return errno == ENOENT ? 0 : -errno;
+
+ rinfo = new0(ResctrlInfo, 1);
+ if (!rinfo)
+ return -ENOMEM;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ log_debug("Parsing info file '%s'", de->d_name);
+ if (streq(de->d_name, "L3")) {
+ return resctrl_get_l3_info(&rinfo->l3_info, de->d_name);
+ }
+ }
+ return 0;
+}
+
+void resctrl_context_dump(ResctrlContext *r, FILE* f, const char *prefix) {
+ assert(r);
+
+ if (!r->group)
+ return;
+
+ prefix = strempty(prefix);
+ fprintf(f,
+ "%sL3CacheAllocationGroup=%s\n"
+ "%sL3CacheAllocationIds=%s\n"
+ "%sL3CacheAllocationSize=%lu\n",
+ prefix, r->group,
+ prefix, r->l3_ids,
+ prefix, r->l3_size);
+}
+
+static int resctrl_try_inhert(Unit *u, ResctrlContext *r) {
+ ResctrlContext *slice_r;
+ Unit *slice_unit;
+
+ /* Remove possible remaining slice group */
+ resctrl_alloc_group_remove(u->id);
+ if (r->group)
+ r->group = mfree(r->group);
+ if (r->l3_ids)
+ r->l3_ids = mfree(r->l3_ids);
+
+ /* See if we can inherit from our parent slice */
+ if (UNIT_ISSET(u->slice)) {
+ slice_unit = UNIT_DEREF(u->slice);
+ slice_r = unit_get_resctrl_context(slice_unit);
+ if (slice_r->group) {
+ log_unit_debug(u, "inhert parent's cache configuration");
+ r->group = strdup(slice_r->group);
+ if (!r->group)
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+int unit_realize_resctrl(Unit *u) {
+ ResctrlContext *r;
+ int lock_fd, ret;
+ char *tmp_group = NULL;
+
+ assert(u);
+
+ r = unit_get_resctrl_context(u);
+ if (!r)
+ return 0;
+
+ if (!rinfo)
+ return 0;
+
+ /* no cache config for this unit */
+ if (r->l3_size == 0 && !r->l3_ids)
+ return resctrl_try_inhert(u, r);
+
+ if (r->group && !streq(r->group, u->id))
+ r->group = mfree(r->group);
+
+ if (!r->group) {
+ tmp_group = strdup(u->id);
+ if (!tmp_group)
+ return -ENOMEM;
+ }
+
+ lock_fd = resctrl_lock();
+ if (lock_fd < 0)
+ return -1;
+
+ if (!r->group) {
+ log_debug("resctrl: creating group %s\n", tmp_group);
+ ret = resctrl_alloc_group_create(tmp_group);
+ if (ret < 0) {
+ free(tmp_group);
+ resctrl_unlock(lock_fd);
+ return ret;
+ }
+ r->group = tmp_group;
+ }
+
+ log_debug("resctrl: group %s, l3_size=%lu, l3_ids=%s\n", r->group, r->l3_size, r->l3_ids);
+ ret = resctrl_alloc_schemata_set(r->group, r);
+ if (ret < 0)
+ goto out;
+
+ resctrl_unlock(lock_fd);
+
+ return 0;
+out:
+ resctrl_alloc_group_remove(r->group);
+ resctrl_unlock(lock_fd);
+ free(r->group);
+ return ret;
+}
+
+int resctrl_alloc_group_create(const char *group_name) {
+ _cleanup_free_ char *path = NULL;
+ int ret;
+
+ path = strjoin(RESCTRL_PATH, "/", group_name);
+ if (!path)
+ return -ENOMEM;
+
+ ret = mkdir(path, 0755);
+ if (ret < 0 && errno != EEXIST)
+ return log_warning_errno(errno, "Failed to create resctrl group %s: %m", group_name);
+ log_debug("resctrl group %s created\n", group_name);
+ return 0;
+}
+
+int resctrl_alloc_group_remove(const char *group_name) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *task_file = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t l = 0;
+ int r;
+
+ task_file = strjoin(RESCTRL_PATH, "/", group_name, "/tasks", NULL);
+ if (!task_file)
+ return -ENOMEM;
+
+ r = read_full_file(task_file, &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l > 0) {
+ log_warning("There are tasks running in group %s, not remove\n", group_name);
+ return -1;
+ }
+
+ path = strjoin(RESCTRL_PATH, "/", group_name);
+ if (!path)
+ return -ENOMEM;
+
+ if (rmdir(path) == -1) {
+ if (errno == ENOENT)
+ return 0;
+ return log_warning_errno(errno, "Failed to remove resctrl group %s: %m", group_name);
+ }
+ log_debug("resctrl group %s removed\n", group_name);
+ return 0;
+}
+
+int resctrl_alloc_task_write(const char *group_name, const pid_t task) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+
+ path = strjoin(RESCTRL_PATH, "/", group_name, "/tasks");
+ if (!path)
+ return -ENOMEM;
+
+ f = fopen(path, "w");
+ if (!f)
+ return -1;
+
+ /* Write task ID to file */
+ if (fprintf(f, "%d\n", task) < 0) {
+ return log_warning_errno(errno, "resctrl group %s: failed to "
+ "write task %m", group_name);
+ }
+ return 0;
+}
+
+void resctrl_context_init(ResctrlContext *r) {
+ assert(r);
+
+ r->l3_size = 0;
+ r->group = NULL;
+ r->l3_ids = NULL;
+}
+
+void resctrl_context_done(ResctrlContext *r) {
+ assert(r);
+
+ if (r->group)
+ r->group = mfree(r->group);
+ if (r->l3_ids)
+ r->l3_ids = mfree(r->l3_ids);
+}
+
+void unit_prune_resctrl(Unit *u) {
+ ResctrlContext *r;
+ int ret;
+
+ assert(u);
+
+ r = unit_get_resctrl_context(u);
+ if (!r)
+ return;
+
+ if (!r->group || !streq(r->group, u->id))
+ return;
+
+ ret = resctrl_alloc_group_remove(r->group);
+ if (ret < 0)
+ return;
+ r->group = mfree(r->group);
+}
diff --git a/src/core/resctrl.h b/src/core/resctrl.h
new file mode 100644
index 0000000..dcf0486
--- /dev/null
+++ b/src/core/resctrl.h
@@ -0,0 +1,90 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2013 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef struct ResctrlContext ResctrlContext;
+typedef struct ResctrlInfoL3 ResctrlInfoL3;
+typedef struct ResctrlInfo ResctrlInfo;
+typedef struct ResctrlL3AllocInfo ResctrlL3AllocInfo;
+typedef struct ResctrlAllocSchemata ResctrlAllocSchemata;
+
+struct ResctrlContext {
+ char *group; /* group in resctrlfs */
+
+ char *l3_ids; /* l3 cache ids */
+ uint64_t l3_size; /* l3 cache size */
+};
+
+struct ResctrlInfoL3 {
+ uint64_t cache_bytes;
+
+ /* resctrlfs file */
+ unsigned int min_cbm_bits;
+ unsigned int num_closids;
+ uint64_t cbm_mask;
+
+ /* Smallest possible increase of the allocation size in bytes */
+ uint64_t granularity;
+ /* Max bits of the cbm_mask */
+ unsigned int cbm_bits;
+ /* Minimal allocatable size in bytes (if different from granularity) */
+ uint64_t min;
+
+ unsigned int max_ids; /* socket ids */
+};
+
+struct ResctrlL3AllocInfo{
+ uint64_t *bits_mask; /* per socket id array */
+ unsigned int max_ids; /* socket ids */
+};
+
+struct ResctrlAllocSchemata {
+ ResctrlL3AllocInfo l3_a;
+};
+
+struct ResctrlInfo {
+ ResctrlInfoL3 l3_info;
+};
+
+#include "unit.h"
+#include <sys/types.h>
+
+extern ResctrlInfo *rinfo;
+void resctrl_context_dump(ResctrlContext *r, FILE* f, const char *prefix);
+int unit_realize_resctrl(Unit *u);
+void resctrl_context_init(ResctrlContext *r);
+void resctrl_context_done(ResctrlContext *r);
+void unit_prune_resctrl(Unit *u);
+
+#ifndef RESCTRL_PATH
+#define RESCTRL_PATH "/sys/fs/resctrl"
+#endif
+#define RESCTRL_PATH_INFO RESCTRL_PATH"/info"
+
+int resctrl_alloc_group_create(const char *group_name);
+int resctrl_alloc_group_remove(const char *group_name);
+int resctrl_alloc_task_write(const char *group_name, const pid_t task);
+
+int resctrl_get_info(void);
diff --git a/src/core/scope.c b/src/core/scope.c
index 751556f..f421b5e 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -231,6 +231,7 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) {
cgroup_context_dump(&s->cgroup_context, f, prefix);
kill_context_dump(&s->kill_context, f, prefix);
+ resctrl_context_dump(&s->resctrl_context, f, prefix);
}
static void scope_enter_dead(Scope *s, ScopeResult f) {
@@ -574,6 +575,7 @@ DEFINE_STRING_TABLE_LOOKUP(scope_result, ScopeResult);
const UnitVTable scope_vtable = {
.object_size = sizeof(Scope),
.cgroup_context_offset = offsetof(Scope, cgroup_context),
+ .resctrl_context_offset = offsetof(Scope, resctrl_context),
.kill_context_offset = offsetof(Scope, kill_context),
.sections =
diff --git a/src/core/scope.h b/src/core/scope.h
index c38afb5..6252f9b 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -19,6 +19,7 @@ struct Scope {
Unit meta;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
KillContext kill_context;
ScopeState state, deserialized_state;
diff --git a/src/core/service.c b/src/core/service.c
index db1356c..a706eba 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -865,6 +865,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
prefix, s->n_fd_store);
cgroup_context_dump(&s->cgroup_context, f, prefix);
+ resctrl_context_dump(&s->resctrl_context, f, prefix);
}
static int service_is_suitable_main_pid(Service *s, pid_t pid, int prio) {
@@ -1071,8 +1072,10 @@ static void service_set_state(Service *s, ServiceState state) {
/* For the inactive states unit_notify() will trim the cgroup,
* but for exit we have to do that ourselves... */
- if (state == SERVICE_EXITED && !MANAGER_IS_RELOADING(UNIT(s)->manager))
+ if (state == SERVICE_EXITED && !MANAGER_IS_RELOADING(UNIT(s)->manager)) {
unit_prune_cgroup(UNIT(s));
+ unit_prune_resctrl(UNIT(s));
+ }
if (old_state != state)
log_unit_debug(UNIT(s), "Changed %s -> %s", service_state_to_string(old_state), service_state_to_string(state));
@@ -3886,6 +3889,7 @@ const UnitVTable service_vtable = {
.object_size = sizeof(Service),
.exec_context_offset = offsetof(Service, exec_context),
.cgroup_context_offset = offsetof(Service, cgroup_context),
+ .resctrl_context_offset = offsetof(Service, resctrl_context),
.kill_context_offset = offsetof(Service, kill_context),
.exec_runtime_offset = offsetof(Service, exec_runtime),
.dynamic_creds_offset = offsetof(Service, dynamic_creds),
diff --git a/src/core/service.h b/src/core/service.h
index 9c06e91..efaa070 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -108,6 +108,7 @@ struct Service {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
ServiceState state, deserialized_state;
diff --git a/src/core/slice.c b/src/core/slice.c
index 58f18a4..adcca82 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -211,6 +211,7 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) {
prefix, slice_state_to_string(t->state));
cgroup_context_dump(&t->cgroup_context, f, prefix);
+ resctrl_context_dump(&t->resctrl_context, f, prefix);
}
static int slice_start(Unit *u) {
@@ -225,6 +226,7 @@ static int slice_start(Unit *u) {
return r;
(void) unit_realize_cgroup(u);
+ (void) unit_realize_resctrl(u);
(void) unit_reset_cpu_accounting(u);
(void) unit_reset_ip_accounting(u);
@@ -347,6 +349,7 @@ static void slice_enumerate_perpetual(Manager *m) {
const UnitVTable slice_vtable = {
.object_size = sizeof(Slice),
.cgroup_context_offset = offsetof(Slice, cgroup_context),
+ .resctrl_context_offset = offsetof(Slice, resctrl_context),
.sections =
"Unit\0"
diff --git a/src/core/slice.h b/src/core/slice.h
index 4678c08..2073d1c 100644
--- a/src/core/slice.h
+++ b/src/core/slice.h
@@ -11,6 +11,7 @@ struct Slice {
SliceState state, deserialized_state;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
};
extern const UnitVTable slice_vtable;
diff --git a/src/core/socket.c b/src/core/socket.c
index 56d3222..4b501fc 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -865,6 +865,7 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
}
cgroup_context_dump(&s->cgroup_context, f, prefix);
+ resctrl_context_dump(&s->resctrl_context, f, prefix);
}
static int instance_from_socket(int fd, unsigned nr, char **instance) {
@@ -3274,6 +3275,7 @@ const UnitVTable socket_vtable = {
.object_size = sizeof(Socket),
.exec_context_offset = offsetof(Socket, exec_context),
.cgroup_context_offset = offsetof(Socket, cgroup_context),
+ .resctrl_context_offset = offsetof(Socket, resctrl_context),
.kill_context_offset = offsetof(Socket, kill_context),
.exec_runtime_offset = offsetof(Socket, exec_runtime),
.dynamic_creds_offset = offsetof(Socket, dynamic_creds),
diff --git a/src/core/socket.h b/src/core/socket.h
index c4e25db..239fa2f 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -82,6 +82,7 @@ struct Socket {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
ExecRuntime *exec_runtime;
DynamicCreds dynamic_creds;
diff --git a/src/core/swap.c b/src/core/swap.c
index b78b1aa..89b629a 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -597,6 +597,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&s->exec_context, f, prefix);
kill_context_dump(&s->kill_context, f, prefix);
cgroup_context_dump(&s->cgroup_context, f, prefix);
+ resctrl_context_dump(&s->resctrl_context, f, prefix);
}
static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
@@ -1461,6 +1462,7 @@ const UnitVTable swap_vtable = {
.object_size = sizeof(Swap),
.exec_context_offset = offsetof(Swap, exec_context),
.cgroup_context_offset = offsetof(Swap, cgroup_context),
+ .resctrl_context_offset = offsetof(Swap, resctrl_context),
.kill_context_offset = offsetof(Swap, kill_context),
.exec_runtime_offset = offsetof(Swap, exec_runtime),
.dynamic_creds_offset = offsetof(Swap, dynamic_creds),
diff --git a/src/core/swap.h b/src/core/swap.h
index 1c0c7fc..21869b1 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -64,6 +64,7 @@ struct Swap {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+ ResctrlContext resctrl_context;
ExecRuntime *exec_runtime;
DynamicCreds dynamic_creds;
diff --git a/src/core/unit.c b/src/core/unit.c
index 113205b..b2d04ea 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -138,6 +138,7 @@ static void unit_init(Unit *u) {
CGroupContext *cc;
ExecContext *ec;
KillContext *kc;
+ ResctrlContext *rc;
assert(u);
assert(u->manager);
@@ -175,6 +176,10 @@ static void unit_init(Unit *u) {
if (kc)
kill_context_init(kc);
+ rc = unit_get_resctrl_context(u);
+ if (rc)
+ resctrl_context_init(rc);
+
if (UNIT_VTABLE(u)->init)
UNIT_VTABLE(u)->init(u);
}
@@ -527,6 +532,7 @@ static void unit_free_requires_mounts_for(Unit *u) {
static void unit_done(Unit *u) {
ExecContext *ec;
CGroupContext *cc;
+ ResctrlContext *rc;
assert(u);
@@ -543,6 +549,10 @@ static void unit_done(Unit *u) {
cc = unit_get_cgroup_context(u);
if (cc)
cgroup_context_done(cc);
+
+ rc = unit_get_resctrl_context(u);
+ if (rc)
+ resctrl_context_done(rc);
}
void unit_free(Unit *u) {
@@ -2334,6 +2344,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
/* Make sure the cgroup and state files are always removed when we become inactive */
if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
unit_prune_cgroup(u);
+ unit_prune_resctrl(u);
unit_unlink_state_files(u);
}
@@ -4195,6 +4206,19 @@ CGroupContext *unit_get_cgroup_context(Unit *u) {
return (CGroupContext*) ((uint8_t*) u + offset);
}
+ResctrlContext *unit_get_resctrl_context(Unit *u) {
+ size_t offset;
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->resctrl_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (ResctrlContext*) ((uint8_t*) u + offset);
+}
+
ExecRuntime *unit_get_exec_runtime(Unit *u) {
size_t offset;
@@ -5321,6 +5345,7 @@ int unit_prepare_exec(Unit *u) {
/* Prepares everything so that we can fork of a process for this unit */
(void) unit_realize_cgroup(u);
+ (void) unit_realize_resctrl(u);
if (u->reset_accounting) {
(void) unit_reset_cpu_accounting(u);
diff --git a/src/core/unit.h b/src/core/unit.h
index a9de983..f5a12aa 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -12,6 +12,7 @@
#include "list.h"
#include "unit-name.h"
#include "cgroup.h"
+#include "resctrl.h"
typedef struct UnitRef UnitRef;
@@ -405,6 +406,10 @@ typedef struct UnitVTable {
* that */
size_t exec_runtime_offset;
+ /* If greater than 0, the offset into the object where
+ * ResctrlContext is found, if the unit type has that */
+ size_t resctrl_context_offset;
+
/* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
* has that. */
size_t dynamic_creds_offset;
@@ -727,6 +732,7 @@ int unit_patch_contexts(Unit *u);
ExecContext *unit_get_exec_context(Unit *u) _pure_;
KillContext *unit_get_kill_context(Unit *u) _pure_;
CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
+ResctrlContext *unit_get_resctrl_context(Unit *u) _pure_;
ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
--
2.17.1
More information about the systemd-devel
mailing list