[Intel-gfx] [igt-dev] [PATCH i-g-t 12/25] gem_wsim: Engine map support

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon May 20 10:49:13 UTC 2019


On 17/05/2019 20:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-17 12:25:13)
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Support new i915 uAPI for configuring contexts with engine maps.
>>
>> Please refer to the README file for more detailed explanation.
>>
>> v2:
>>   * Allow defining engine maps by class.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>>   benchmarks/gem_wsim.c  | 211 +++++++++++++++++++++++++++++++++++------
>>   benchmarks/wsim/README |  25 ++++-
>>   2 files changed, 204 insertions(+), 32 deletions(-)
>>
>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>> index 60b7d32e22d4..e5b12e37490e 100644
>> --- a/benchmarks/gem_wsim.c
>> +++ b/benchmarks/gem_wsim.c
>> @@ -57,6 +57,7 @@
>>   #include "ewma.h"
>>   
>>   enum intel_engine_id {
>> +       DEFAULT,
>>          RCS,
>>          BCS,
>>          VCS,
>> @@ -81,7 +82,8 @@ enum w_type
>>          SW_FENCE,
>>          SW_FENCE_SIGNAL,
>>          CTX_PRIORITY,
>> -       PREEMPTION
>> +       PREEMPTION,
>> +       ENGINE_MAP
>>   };
>>   
>>   struct deps
>> @@ -115,6 +117,10 @@ struct w_step
>>                  int throttle;
>>                  int fence_signal;
>>                  int priority;
>> +               struct {
>> +                       unsigned int engine_map_count;
>> +                       enum intel_engine_id *engine_map;
>> +               };
>>          };
>>   
>>          /* Implementation details */
>> @@ -142,6 +148,8 @@ DECLARE_EWMA(uint64_t, rt, 4, 2)
>>   struct ctx {
>>          uint32_t id;
>>          int priority;
>> +       unsigned int engine_map_count;
>> +       enum intel_engine_id *engine_map;
>>          bool targets_instance;
>>          bool wants_balance;
>>          unsigned int static_vcs;
>> @@ -200,10 +208,10 @@ struct workload
>>                  int fd;
>>                  bool first;
>>                  unsigned int num_engines;
>> -               unsigned int engine_map[5];
>> +               unsigned int engine_map[NUM_ENGINES];
>>                  uint64_t t_prev;
>> -               uint64_t prev[5];
>> -               double busy[5];
>> +               uint64_t prev[NUM_ENGINES];
>> +               double busy[NUM_ENGINES];
>>          } busy_balancer;
>>   };
>>   
>> @@ -234,6 +242,7 @@ static int fd;
>>   #define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x)
>>   
>>   static const char *ring_str_map[NUM_ENGINES] = {
>> +       [DEFAULT] = "DEFAULT",
>>          [RCS] = "RCS",
>>          [BCS] = "BCS",
>>          [VCS] = "VCS",
>> @@ -330,6 +339,43 @@ static int str_to_engine(const char *str)
>>          return -1;
>>   }
>>   
>> +static int parse_engine_map(struct w_step *step, const char *_str)
>> +{
>> +       char *token, *tctx = NULL, *tstart = (char *)_str;
>> +
>> +       while ((token = strtok_r(tstart, "|", &tctx))) {
>> +               enum intel_engine_id engine;
>> +               unsigned int add;
>> +
>> +               tstart = NULL;
>> +
>> +               if (!strcmp(token, "DEFAULT"))
>> +                       return -1;
>> +
>> +               engine = str_to_engine(token);
>> +               if ((int)engine < 0)
>> +                       return -1;
>> +
>> +               if (engine != VCS && engine != VCS1 && engine != VCS2)
>> +                       return -1; /* TODO */
> 
> Still a little concerned that the map is VCS only. It just doesn't fit
> my expectations of what the map will be.

I think I could update this now that load_balance takes a list.

>> +
>> +               add = engine == VCS ? 2 : 1;
> 
> Will we not every ask what happens if we had millions of engines at our
> disposal. But that's a tommorrow problem, ok.

This is improved in a later patch. It felt easier to generalize at the 
end in this instance instead of trying to rebase the whole series.

> 
>> +               step->engine_map_count += add;
>> +               step->engine_map = realloc(step->engine_map,
>> +                                          step->engine_map_count *
>> +                                          sizeof(step->engine_map[0]));
>> +
>> +               if (engine != VCS) {
>> +                       step->engine_map[step->engine_map_count - 1] = engine;
>> +               } else {
>> +                       step->engine_map[step->engine_map_count - 2] = VCS1;
>> +                       step->engine_map[step->engine_map_count - 1] = VCS2;
>> +               }
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>   static struct workload *
>>   parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>>   {
>> @@ -448,6 +494,33 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>>                          } else if (!strcmp(field, "f")) {
>>                                  step.type = SW_FENCE;
>>                                  goto add_step;
>> +                       } else if (!strcmp(field, "M")) {
>> +                               unsigned int nr = 0;
>> +                               while ((field = strtok_r(fstart, ".", &fctx)) !=
>> +                                   NULL) {
>> +                                       tmp = atoi(field);
>> +                                       check_arg(nr == 0 && tmp <= 0,
>> +                                                 "Invalid context at step %u!\n",
>> +                                                 nr_steps);
>> +                                       check_arg(nr > 1,
>> +                                                 "Invalid engine map format at step %u!\n",
>> +                                                 nr_steps);
>> +
>> +                                       if (nr == 0) {
>> +                                               step.context = tmp;
>> +                                       } else {
>> +                                               tmp = parse_engine_map(&step,
>> +                                                                      field);
>> +                                               check_arg(tmp < 0,
>> +                                                         "Invalid engine map list at step %u!\n",
>> +                                                         nr_steps);
>> +                                       }
>> +
>> +                                       nr++;
>> +                               }
>> +
>> +                               step.type = ENGINE_MAP;
>> +                               goto add_step;
>>                          } else if (!strcmp(field, "X")) {
>>                                  unsigned int nr = 0;
>>                                  while ((field = strtok_r(fstart, ".", &fctx)) !=
>> @@ -774,6 +847,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
>>   }
>>   
>>   static const unsigned int eb_engine_map[NUM_ENGINES] = {
>> +       [DEFAULT] = I915_EXEC_DEFAULT,
>>          [RCS] = I915_EXEC_RENDER,
>>          [BCS] = I915_EXEC_BLT,
>>          [VCS] = I915_EXEC_BSD,
>> @@ -796,11 +870,36 @@ eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
>>                  eb->flags = eb_engine_map[engine];
>>   }
>>   
>> +static unsigned int
>> +find_engine_in_map(struct ctx *ctx, enum intel_engine_id engine)
>> +{
>> +       unsigned int i;
>> +
>> +       for (i = 0; i < ctx->engine_map_count; i++) {
>> +               if (ctx->engine_map[i] == engine)
>> +                       return i + 1;
>> +       }
>> +
>> +       igt_assert(0);
>> +       return 0;
> 
> No balancer in the map at this point?

Correct, only in one of the later patches.

> 
>> +}
>> +
>> +static struct ctx *
>> +__get_ctx(struct workload *wrk, struct w_step *w)
>> +{
>> +       return &wrk->ctx_list[w->context * 2];
>> +}
>> +
>>   static void
>> -eb_update_flags(struct w_step *w, enum intel_engine_id engine,
>> -               unsigned int flags)
>> +eb_update_flags(struct workload *wrk, struct w_step *w,
>> +               enum intel_engine_id engine, unsigned int flags)
>>   {
>> -       eb_set_engine(&w->eb, engine, flags);
>> +       struct ctx *ctx = __get_ctx(wrk, w);
>> +
>> +       if (ctx->engine_map)
>> +               w->eb.flags = find_engine_in_map(ctx, engine);
>> +       else
>> +               eb_set_engine(&w->eb, engine, flags);
>>   
>>          w->eb.flags |= I915_EXEC_HANDLE_LUT;
>>          w->eb.flags |= I915_EXEC_NO_RELOC;
>> @@ -819,12 +918,6 @@ get_status_objects(struct workload *wrk)
>>                  return wrk->status_object;
>>   }
>>   
>> -static struct ctx *
>> -__get_ctx(struct workload *wrk, struct w_step *w)
>> -{
>> -       return &wrk->ctx_list[w->context * 2];
>> -}
>> -
>>   static uint32_t
>>   get_ctxid(struct workload *wrk, struct w_step *w)
>>   {
>> @@ -894,7 +987,7 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
>>                  engine = VCS2;
>>          else if (flags & SWAPVCS && engine == VCS2)
>>                  engine = VCS1;
>> -       eb_update_flags(w, engine, flags);
>> +       eb_update_flags(wrk, w, engine, flags);
>>   #ifdef DEBUG
>>          printf("%u: %u:|", w->idx, w->eb.buffer_count);
>>          for (i = 0; i <= j; i++)
>> @@ -936,7 +1029,7 @@ static void vm_destroy(int i915, uint32_t vm_id)
>>          igt_assert_eq(__vm_destroy(i915, vm_id), 0);
>>   }
>>   
>> -static void
>> +static int
>>   prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>>   {
>>          unsigned int ctx_vcs;
>> @@ -999,30 +1092,53 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>>          /*
>>           * Identify if contexts target specific engine instances and if they
>>           * want to be balanced.
>> +        *
>> +        * Transfer over engine map configuration from the workload step.
>>           */
>>          for (j = 0; j < wrk->nr_ctxs; j += 2) {
>>                  bool targets = false;
>>                  bool balance = false;
>>   
>>                  for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> -                       if (w->type != BATCH)
>> -                               continue;
>> -
>>                          if (w->context != (j / 2))
>>                                  continue;
>>   
>> -                       if (w->engine == VCS)
>> -                               balance = true;
>> -                       else
>> -                               targets = true;
>> +                       if (w->type == BATCH) {
>> +                               if (w->engine == VCS)
>> +                                       balance = true;
>> +                               else
>> +                                       targets = true;
>> +                       } else if (w->type == ENGINE_MAP) {
>> +                               wrk->ctx_list[j].engine_map = w->engine_map;
>> +                               wrk->ctx_list[j].engine_map_count =
>> +                                       w->engine_map_count;
>> +                       }
>>                  }
>>   
>> -               if (flags & I915) {
>> -                       wrk->ctx_list[j].targets_instance = targets;
>> +               wrk->ctx_list[j].targets_instance = targets;
>> +               if (flags & I915)
>>                          wrk->ctx_list[j].wants_balance = balance;
>> +       }
>> +
>> +       /*
>> +        * Ensure VCS is not allowed with engine map contexts.
>> +        */
>> +       for (j = 0; j < wrk->nr_ctxs; j += 2) {
>> +               for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
>> +                       if (w->context != (j / 2))
>> +                               continue;
>> +
>> +                       if (w->type != BATCH)
>> +                               continue;
>> +
>> +                       if (wrk->ctx_list[j].engine_map && w->engine == VCS) {
> 
> But wouldn't VCS still be meaning use the balancer and not a specific
> engine???
> 
> I'm not understanding how you are using maps in the .wsim :(

Batch sent to VCS means any VCS if not a context with a map, but VCS 
mentioned in the map now auto-expands to all present VCS instances.

VCS as engine specifier at execbuf time could be allowed if code would 
then check if there is a load balancer built of vcs engines in this context.

But what use case you think is not covered?

We got legacy wsim files which implicitly create a map by doing:

1.VCS.1000.0.0 -> submit a batch to any vcs

And then after this series you can also do:

M.1.VCS
B.1
1.DEFAULT.1000.0.0

Which would have the same effect.

You would seem want:

M.1.VCS
B.1
1.VCS.1000.0.0

?

But I don't see what it gains?

Regards,

Tvrtko


More information about the Intel-gfx mailing list