[Intel-gfx] [igt-dev] [PATCH i-g-t 12/25] gem_wsim: Engine map support
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon May 20 10:49:13 UTC 2019
On 17/05/2019 20:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-17 12:25:13)
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Support new i915 uAPI for configuring contexts with engine maps.
>>
>> Please refer to the README file for more detailed explanation.
>>
>> v2:
>> * Allow defining engine maps by class.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>> benchmarks/gem_wsim.c | 211 +++++++++++++++++++++++++++++++++++------
>> benchmarks/wsim/README | 25 ++++-
>> 2 files changed, 204 insertions(+), 32 deletions(-)
>>
>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>> index 60b7d32e22d4..e5b12e37490e 100644
>> --- a/benchmarks/gem_wsim.c
>> +++ b/benchmarks/gem_wsim.c
>> @@ -57,6 +57,7 @@
>> #include "ewma.h"
>>
>> enum intel_engine_id {
>> + DEFAULT,
>> RCS,
>> BCS,
>> VCS,
>> @@ -81,7 +82,8 @@ enum w_type
>> SW_FENCE,
>> SW_FENCE_SIGNAL,
>> CTX_PRIORITY,
>> - PREEMPTION
>> + PREEMPTION,
>> + ENGINE_MAP
>> };
>>
>> struct deps
>> @@ -115,6 +117,10 @@ struct w_step
>> int throttle;
>> int fence_signal;
>> int priority;
>> + struct {
>> + unsigned int engine_map_count;
>> + enum intel_engine_id *engine_map;
>> + };
>> };
>>
>> /* Implementation details */
>> @@ -142,6 +148,8 @@ DECLARE_EWMA(uint64_t, rt, 4, 2)
>> struct ctx {
>> uint32_t id;
>> int priority;
>> + unsigned int engine_map_count;
>> + enum intel_engine_id *engine_map;
>> bool targets_instance;
>> bool wants_balance;
>> unsigned int static_vcs;
>> @@ -200,10 +208,10 @@ struct workload
>> int fd;
>> bool first;
>> unsigned int num_engines;
>> - unsigned int engine_map[5];
>> + unsigned int engine_map[NUM_ENGINES];
>> uint64_t t_prev;
>> - uint64_t prev[5];
>> - double busy[5];
>> + uint64_t prev[NUM_ENGINES];
>> + double busy[NUM_ENGINES];
>> } busy_balancer;
>> };
>>
>> @@ -234,6 +242,7 @@ static int fd;
>> #define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x)
>>
>> static const char *ring_str_map[NUM_ENGINES] = {
>> + [DEFAULT] = "DEFAULT",
>> [RCS] = "RCS",
>> [BCS] = "BCS",
>> [VCS] = "VCS",
>> @@ -330,6 +339,43 @@ static int str_to_engine(const char *str)
>> return -1;
>> }
>>
>> +static int parse_engine_map(struct w_step *step, const char *_str)
>> +{
>> + char *token, *tctx = NULL, *tstart = (char *)_str;
>> +
>> + while ((token = strtok_r(tstart, "|", &tctx))) {
>> + enum intel_engine_id engine;
>> + unsigned int add;
>> +
>> + tstart = NULL;
>> +
>> + if (!strcmp(token, "DEFAULT"))
>> + return -1;
>> +
>> + engine = str_to_engine(token);
>> + if ((int)engine < 0)
>> + return -1;
>> +
>> + if (engine != VCS && engine != VCS1 && engine != VCS2)
>> + return -1; /* TODO */
>
> Still a little concerned that the map is VCS only. It just doesn't fit
> my expectations of what the map will be.
I think I could update this now that load_balance takes a list.
>> +
>> + add = engine == VCS ? 2 : 1;
>
> Will we not every ask what happens if we had millions of engines at our
> disposal. But that's a tommorrow problem, ok.
This is improved in a later patch. It felt easier to generalize at the
end in this instance instead of trying to rebase the whole series.
>
>> + step->engine_map_count += add;
>> + step->engine_map = realloc(step->engine_map,
>> + step->engine_map_count *
>> + sizeof(step->engine_map[0]));
>> +
>> + if (engine != VCS) {
>> + step->engine_map[step->engine_map_count - 1] = engine;
>> + } else {
>> + step->engine_map[step->engine_map_count - 2] = VCS1;
>> + step->engine_map[step->engine_map_count - 1] = VCS2;
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static struct workload *
>> parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>> {
>> @@ -448,6 +494,33 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>> } else if (!strcmp(field, "f")) {
>> step.type = SW_FENCE;
>> goto add_step;
>> + } else if (!strcmp(field, "M")) {
>> + unsigned int nr = 0;
>> + while ((field = strtok_r(fstart, ".", &fctx)) !=
>> + NULL) {
>> + tmp = atoi(field);
>> + check_arg(nr == 0 && tmp <= 0,
>> + "Invalid context at step %u!\n",
>> + nr_steps);
>> + check_arg(nr > 1,
>> + "Invalid engine map format at step %u!\n",
>> + nr_steps);
>> +
>> + if (nr == 0) {
>> + step.context = tmp;
>> + } else {
>> + tmp = parse_engine_map(&step,
>> + field);
>> + check_arg(tmp < 0,
>> + "Invalid engine map list at step %u!\n",
>> + nr_steps);
>> + }
>> +
>> + nr++;
>> + }
>> +
>> + step.type = ENGINE_MAP;
>> + goto add_step;
>> } else if (!strcmp(field, "X")) {
>> unsigned int nr = 0;
>> while ((field = strtok_r(fstart, ".", &fctx)) !=
>> @@ -774,6 +847,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>> }
>>
>> static const unsigned int eb_engine_map[NUM_ENGINES] = {
>> + [DEFAULT] = I915_EXEC_DEFAULT,
>> [RCS] = I915_EXEC_RENDER,
>> [BCS] = I915_EXEC_BLT,
>> [VCS] = I915_EXEC_BSD,
>> @@ -796,11 +870,36 @@ eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
>> eb->flags = eb_engine_map[engine];
>> }
>>
>> +static unsigned int
>> +find_engine_in_map(struct ctx *ctx, enum intel_engine_id engine)
>> +{
>> + unsigned int i;
>> +
>> + for (i = 0; i < ctx->engine_map_count; i++) {
>> + if (ctx->engine_map[i] == engine)
>> + return i + 1;
>> + }
>> +
>> + igt_assert(0);
>> + return 0;
>
> No balancer in the map at this point?
Correct, only in one of the later patches.
>
>> +}
>> +
>> +static struct ctx *
>> +__get_ctx(struct workload *wrk, struct w_step *w)
>> +{
>> + return &wrk->ctx_list[w->context * 2];
>> +}
>> +
>> static void
>> -eb_update_flags(struct w_step *w, enum intel_engine_id engine,
>> - unsigned int flags)
>> +eb_update_flags(struct workload *wrk, struct w_step *w,
>> + enum intel_engine_id engine, unsigned int flags)
>> {
>> - eb_set_engine(&w->eb, engine, flags);
>> + struct ctx *ctx = __get_ctx(wrk, w);
>> +
>> + if (ctx->engine_map)
>> + w->eb.flags = find_engine_in_map(ctx, engine);
>> + else
>> + eb_set_engine(&w->eb, engine, flags);
>>
>> w->eb.flags |= I915_EXEC_HANDLE_LUT;
>> w->eb.flags |= I915_EXEC_NO_RELOC;
>> @@ -819,12 +918,6 @@ get_status_objects(struct workload *wrk)
>> return wrk->status_object;
>> }
>>
>> -static struct ctx *
>> -__get_ctx(struct workload *wrk, struct w_step *w)
>> -{
>> - return &wrk->ctx_list[w->context * 2];
>> -}
>> -
>> static uint32_t
>> get_ctxid(struct workload *wrk, struct w_step *w)
>> {
>> @@ -894,7 +987,7 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
>> engine = VCS2;
>> else if (flags & SWAPVCS && engine == VCS2)
>> engine = VCS1;
>> - eb_update_flags(w, engine, flags);
>> + eb_update_flags(wrk, w, engine, flags);
>> #ifdef DEBUG
>> printf("%u: %u:|", w->idx, w->eb.buffer_count);
>> for (i = 0; i <= j; i++)
>> @@ -936,7 +1029,7 @@ static void vm_destroy(int i915, uint32_t vm_id)
>> igt_assert_eq(__vm_destroy(i915, vm_id), 0);
>> }
>>
>> -static void
>> +static int
>> prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>> {
>> unsigned int ctx_vcs;
>> @@ -999,30 +1092,53 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>> /*
>> * Identify if contexts target specific engine instances and if they
>> * want to be balanced.
>> + *
>> + * Transfer over engine map configuration from the workload step.
>> */
>> for (j = 0; j < wrk->nr_ctxs; j += 2) {
>> bool targets = false;
>> bool balance = false;
>>
>> for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> - if (w->type != BATCH)
>> - continue;
>> -
>> if (w->context != (j / 2))
>> continue;
>>
>> - if (w->engine == VCS)
>> - balance = true;
>> - else
>> - targets = true;
>> + if (w->type == BATCH) {
>> + if (w->engine == VCS)
>> + balance = true;
>> + else
>> + targets = true;
>> + } else if (w->type == ENGINE_MAP) {
>> + wrk->ctx_list[j].engine_map = w->engine_map;
>> + wrk->ctx_list[j].engine_map_count =
>> + w->engine_map_count;
>> + }
>> }
>>
>> - if (flags & I915) {
>> - wrk->ctx_list[j].targets_instance = targets;
>> + wrk->ctx_list[j].targets_instance = targets;
>> + if (flags & I915)
>> wrk->ctx_list[j].wants_balance = balance;
>> + }
>> +
>> + /*
>> + * Ensure VCS is not allowed with engine map contexts.
>> + */
>> + for (j = 0; j < wrk->nr_ctxs; j += 2) {
>> + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> + if (w->context != (j / 2))
>> + continue;
>> +
>> + if (w->type != BATCH)
>> + continue;
>> +
>> + if (wrk->ctx_list[j].engine_map && w->engine == VCS) {
>
> But wouldn't VCS still be meaning use the balancer and not a specific
> engine???
>
> I'm not understanding how you are using maps in the .wsim :(
Batch sent to VCS means any VCS if not a context with a map, but VCS
mentioned in the map now auto-expands to all present VCS instances.
VCS as engine specifier at execbuf time could be allowed if code would
then check if there is a load balancer built of vcs engines in this context.
But what use case you think is not covered?
We got legacy wsim files which implicitly create a map by doing:
1.VCS.1000.0.0 -> submit a batch to any vcs
And then after this series you can also do:
M.1.VCS
B.1
1.DEFAULT.1000.0.0
Which would have the same effect.
You would seem want:
M.1.VCS
B.1
1.VCS.1000.0.0
?
But I don't see what it gains?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list