[Intel-gfx] [PATCH i-g-t 19/21] gem_wsim: Per context SSEU control
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed May 8 12:10:56 UTC 2019
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
A new workload command ('S') is added which allows per context slice
(re-)configuration.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
benchmarks/gem_wsim.c | 69 +++++++++++++++++++++++++++++++++++-------
benchmarks/wsim/README | 23 +++++++++++++-
2 files changed, 80 insertions(+), 12 deletions(-)
diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 64dd251a25eb..ed5acee02e20 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -87,6 +87,7 @@ enum w_type
LOAD_BALANCE,
BOND,
TERMINATE,
+ SSEU
};
struct deps
@@ -136,6 +137,7 @@ struct w_step
uint64_t bond_mask;
enum intel_engine_id bond_master;
};
+ int sseu;
};
/* Implementation details */
@@ -171,6 +173,7 @@ struct ctx {
bool targets_instance;
bool wants_balance;
unsigned int static_vcs;
+ uint64_t sseu;
};
struct workload
@@ -241,6 +244,7 @@ static unsigned int context_vcs_rr;
static int verbose = 1;
static int fd;
+static struct drm_i915_gem_context_param_sseu device_sseu;
#define SWAPVCS (1<<0)
#define SEQNO (1<<1)
@@ -456,6 +460,27 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
int_field(SYNC, target,
tmp >= 0 || ((int)nr_steps + tmp) < 0,
"Invalid sync target at step %u!\n");
+ } else if (!strcmp(field, "S")) {
+ unsigned int nr = 0;
+ while ((field = strtok_r(fstart, ".", &fctx))) {
+ tmp = atoi(field);
+ check_arg(tmp <= 0 && nr == 0,
+ "Invalid context at step %u!\n",
+ nr_steps);
+ check_arg(nr > 1,
+ "Invalid SSEU format at step %u!\n",
+ nr_steps);
+
+ if (nr == 0)
+ step.context = tmp;
+ else if (nr == 1)
+ step.sseu = tmp;
+
+ nr++;
+ }
+
+ step.type = SSEU;
+ goto add_step;
} else if (!strcmp(field, "t")) {
int_field(THROTTLE, throttle,
tmp < 0,
@@ -1071,24 +1096,24 @@ static void __ctx_set_prio(uint32_t ctx_id, unsigned int prio)
gem_context_set_param(fd, ¶m);
}
-static void
-set_ctx_sseu(uint32_t ctx)
+static uint64_t
+set_ctx_sseu(uint32_t ctx, uint64_t slice_mask)
{
- struct drm_i915_gem_context_param_sseu sseu = { };
+ struct drm_i915_gem_context_param_sseu sseu = device_sseu;
struct drm_i915_gem_context_param param = { };
- sseu.class = I915_ENGINE_CLASS_RENDER;
- sseu.instance = 0;
+ if (slice_mask == -1)
+ slice_mask = device_sseu.slice_mask;
+
+ sseu.slice_mask = slice_mask;
param.ctx_id = ctx;
param.param = I915_CONTEXT_PARAM_SSEU;
param.value = (uintptr_t)&sseu;
- gem_context_get_param(fd, ¶m);
-
- sseu.slice_mask = 1;
-
gem_context_set_param(fd, ¶m);
+
+ return slice_mask;
}
static int
@@ -1287,6 +1312,7 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
igt_assert(ctx_id);
ctx->id = ctx_id;
+ ctx->sseu = device_sseu.slice_mask;
if (flags & GLOBAL_BALANCE) {
ctx->static_vcs = context_vcs_rr;
@@ -1439,8 +1465,10 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
gem_context_set_param(fd, ¶m);
}
- if (wrk->sseu)
- set_ctx_sseu(arg.ctx_id);
+ if (wrk->sseu) {
+ /* Set to slice 0 only, one slice. */
+ ctx->sseu = set_ctx_sseu(ctx_id, 1);
+ }
}
/* Record default preemption. */
@@ -2409,6 +2437,13 @@ static void *run_workload(void *data)
w->type == LOAD_BALANCE ||
w->type == BOND) {
continue;
+ } else if (w->type == SSEU) {
+ if (w->sseu != wrk->ctx_list[w->context].sseu) {
+ wrk->ctx_list[w->context].sseu =
+ set_ctx_sseu(wrk->ctx_list[w->context].id,
+ w->sseu);
+ }
+ continue;
}
if (do_sleep || w->type == PERIOD) {
@@ -2725,6 +2760,16 @@ static void init_clocks(void)
rcs_end - rcs_start, 1e6*t, 1024e6 * t / (rcs_end - rcs_start));
}
+static void get_device_sseu(void)
+{
+ struct drm_i915_gem_context_param param = { };
+
+ param.param = I915_CONTEXT_PARAM_SSEU;
+ param.value = (uintptr_t)&device_sseu;
+
+ gem_context_get_param(fd, ¶m);
+}
+
int main(int argc, char **argv)
{
unsigned int repeat = 1;
@@ -2753,6 +2798,8 @@ int main(int argc, char **argv)
fd = __drm_open_driver(DRIVER_INTEL);
igt_require(fd);
+ get_device_sseu();
+
init_clocks();
while ((c = getopt(argc, argv,
diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
index c94d01018419..d7c255b9527c 100644
--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
@@ -5,7 +5,7 @@ ctx.engine.duration_us.dependency.wait,...
<uint>.<str>.<uint>[-<uint>]|*.<int <= 0>[/<int <= 0>][...].<0|1>,...
B.<uint>
M.<uint>.<str>[|<str>]...
-P|X.<uint>.<int>
+P|S|X.<uint>.<int>
d|p|s|t|q|a|T.<int>,...
b.<uint>.<uint>.<str>
f
@@ -30,6 +30,7 @@ Additional workload steps are also supported:
'b' - Set up engine bonds.
'M' - Set up engine map.
'P' - Context priority.
+ 'S' - Context SSEU configuration.
'T' - Terminate an infinite batch.
'X' - Context preemption control.
@@ -249,3 +250,23 @@ then look like:
1.DEFAULT.1000.f-1.0
2.DEFAULT.1000.s-1.0
a.-3
+
+Context SSEU configuration
+--------------------------
+
+ S.1.1
+ 1.RCS.1000.0.0
+ S.2.-1
+ 2.RCS.1000.0.0
+
+Context 1 is configured to run with one enabled slice (slice mask 1) and a batch
+is sumitted against it. Context 2 is configured to run with all slices (this is
+the default so the command could also be omitted) and a batch submitted against
+it.
+
+This shows the dynamic SSEU reconfiguration cost beween two contexts competing
+for the render engine.
+
+Slice mask of -1 has a special meaning of "all slices". Otherwise any integer
+can be specifying as the slice mask, but beware any apart from 1 and -1 can make
+the workload not portable between different GPUs.
--
2.19.1
More information about the Intel-gfx
mailing list