[Intel-gfx] [PATCH i-g-t] tests/i915/gem_ctx_switch: Update with engine discovery

Thu Jun 27 10:55:56 UTC 2019

On 27/06/2019 11:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
>> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
>> index 647911d4c42e..de39748344b8 100644
>> --- a/tests/i915/gem_ctx_switch.c
>> +++ b/tests/i915/gem_ctx_switch.c
>> @@ -55,7 +55,7 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
>>   
>>   static int measure_qlen(int fd,
>>                          struct drm_i915_gem_execbuffer2 *execbuf,
>> -                       unsigned int *engine, unsigned int nengine,
>> +                       const struct intel_engine_data *engines,
>>                          int timeout)
>>   {
>>          const struct drm_i915_gem_exec_object2 * const obj =
>> @@ -63,15 +63,17 @@ static int measure_qlen(int fd,
>>          uint32_t ctx[64];
>>          int min = INT_MAX, max = 0;
>>   
>> -       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
>> +       for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>>                  ctx[i] = gem_context_create(fd);
>> +               gem_context_set_all_engines(fd, ctx[i]);
>> +       }
>>   
>> -       for (unsigned int n = 0; n < nengine; n++) {
>> +       for (unsigned int n = 0; n < engines->nengines; n++) {
>>                  uint64_t saved = execbuf->flags;
>>                  struct timespec tv = {};
>>                  int q;
>>   
>> -               execbuf->flags |= engine[n];
>> +               execbuf->flags |= engines->engines[n].flags;
>>   
>>                  for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>>                          execbuf->rsvd1 = ctx[i];
>> @@ -90,7 +92,8 @@ static int measure_qlen(int fd,
>>                   * Be conservative and aim not to overshoot timeout, so scale
>>                   * down by 8 for hopefully a max of 12.5% error.
>>                   */
>> -               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
>> +               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
>> +                   8 + 1;
>>                  if (q < min)
>>                          min = q;
>>                  if (q > max)
>> @@ -107,7 +110,7 @@ static int measure_qlen(int fd,
>>   }
>>   
>>   static void single(int fd, uint32_t handle,
>> -                  const struct intel_execution_engine *e,
>> +                  const struct intel_execution_engine2 *e2,
>>                     unsigned flags,
>>                     const int ncpus,
>>                     int timeout)
>> @@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
>>          shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>>          igt_assert(shared != MAP_FAILED);
>>   
>> -       gem_require_ring(fd, e->exec_id | e->flags);
>> -
>>          for (n = 0; n < 64; n++) {
>>                  if (flags & QUEUE)
>>                          contexts[n] = gem_queue_create(fd);
>>                  else
>>                          contexts[n] = gem_context_create(fd);
>> +
>> +               if (gem_context_has_engine_map(fd, 0))
>> +                       gem_context_set_all_engines(fd, contexts[n]);
> 
> That was a moment of doubt on how well setting engines on a "queue"
> worked :)

Why it wouldn't work?

> 
>>          }
>>   
>>          memset(&obj, 0, sizeof(obj));
>> @@ -152,12 +156,12 @@ static void single(int fd, uint32_t handle,
>>          execbuf.buffers_ptr = to_user_pointer(&obj);
>>          execbuf.buffer_count = 1;
>>          execbuf.rsvd1 = contexts[0];
>> -       execbuf.flags = e->exec_id | e->flags;
>> +       execbuf.flags = e2->flags;
>>          execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>>          execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>>          igt_require(__gem_execbuf(fd, &execbuf) == 0);
>>          if (__gem_execbuf(fd, &execbuf)) {
>> -               execbuf.flags = e->exec_id | e->flags;
>> +               execbuf.flags = e2->flags;
>>                  reloc.target_handle = obj.handle;
>>                  gem_execbuf(fd, &execbuf);
>>          }
>> @@ -190,7 +194,8 @@ static void single(int fd, uint32_t handle,
>>                  clock_gettime(CLOCK_MONOTONIC, &now);
>>   
>>                  igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
>> -                        child, e->name, count, elapsed(&start, &now)*1e6 / count,
>> +                        child, e2->name, count,
>> +                        elapsed(&start, &now) * 1e6 / count,
>>                           flags & INTERRUPTIBLE ? " (interruptible)" : "");
>>   
>>                  shared[child].elapsed = elapsed(&start, &now);
>> @@ -209,7 +214,7 @@ static void single(int fd, uint32_t handle,
>>                  }
>>   
>>                  igt_info("Total %s: %'lu cycles: %.3fus%s\n",
>> -                        e->name, total, max*1e6 / total,
>> +                        e2->name, total, max*1e6 / total,
>>                           flags & INTERRUPTIBLE ? " (interruptible)" : "");
>>          }
>>   
>> @@ -223,25 +228,20 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   {
>>          struct drm_i915_gem_execbuffer2 execbuf;
>>          struct drm_i915_gem_exec_object2 obj[2];
>> -       unsigned int engine[16], e;
>> -       const char *name[16];
>> +       struct intel_engine_data engines = { };
>>          uint32_t contexts[65];
>> -       unsigned int nengine;
>>          int n, qlen;
>>   
>> -       nengine = 0;
>> -       for_each_physical_engine(fd, e) {
>> -               engine[nengine] = e;
>> -               name[nengine] = e__->name;
>> -               nengine++;
>> -       }
>> -       igt_require(nengine);
>> +       engines = intel_init_engine_list(fd, 0);
>> +       igt_require(engines.nengines);
>>   
>>          for (n = 0; n < ARRAY_SIZE(contexts); n++) {
>>                  if (flags & QUEUE)
>>                          contexts[n] = gem_queue_create(fd);
>>                  else
>>                          contexts[n] = gem_context_create(fd);
>> +
>> +               gem_context_set_all_engines(fd, contexts[n]);
>>          }
>>   
>>          memset(obj, 0, sizeof(obj));
>> @@ -256,7 +256,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>          igt_require(__gem_execbuf(fd, &execbuf) == 0);
>>          gem_sync(fd, handle);
>>   
>> -       qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
>> +       qlen = measure_qlen(fd, &execbuf, &engines, timeout);
>>          igt_info("Using timing depth of %d batches\n", qlen);
>>   
>>          execbuf.buffers_ptr = to_user_pointer(obj);
>> @@ -264,13 +264,15 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   
>>          for (int pot = 2; pot <= 64; pot *= 2) {
>>                  for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
>> -                       igt_fork(child, nengine) {
>> +                       igt_fork(child, engines.nengines) {
>>                                  struct timespec start, now;
>>                                  unsigned int count = 0;
>>   
>>                                  obj[0].handle = gem_create(fd, 4096);
>> -                               execbuf.flags |= engine[child];
>> -                               for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
>> +                               execbuf.flags |= engines.engines[child].flags;
>> +                               for (int loop = 0;
>> +                                    loop < ARRAY_SIZE(contexts);
>> +                                    loop++) {
>>                                          execbuf.rsvd1 = contexts[loop];
>>                                          gem_execbuf(fd, &execbuf);
>>                                  }
>> @@ -279,7 +281,8 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>                                  clock_gettime(CLOCK_MONOTONIC, &start);
>>                                  do {
>>                                          for (int loop = 0; loop < qlen; loop++) {
>> -                                               execbuf.rsvd1 = contexts[loop % nctx];
>> +                                               execbuf.rsvd1 =
>> +                                                       contexts[loop % nctx];
>>                                                  gem_execbuf(fd, &execbuf);
>>                                          }
>>                                          count += qlen;
>> @@ -291,8 +294,11 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>                                  gem_close(fd, obj[0].handle);
>>   
>>                                  igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
>> -                                        nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
>> -                                        flags & INTERRUPTIBLE ? " (interruptible)" : "",
>> +                                        nctx, child,
>> +                                        engines.engines[child].name, count,
>> +                                        elapsed(&start, &now) * 1e6 / count,
>> +                                        flags & INTERRUPTIBLE ?
>> +                                        " (interruptible)" : "",
>>                                           elapsed(&start, &now));
>>                          }
>>                          igt_waitchildren();
>> @@ -306,6 +312,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   igt_main
>>   {
>>          const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>> +       const struct intel_execution_engine2 *e2;
>>          const struct intel_execution_engine *e;
>>          static const struct {
>>                  const char *name;
>> @@ -338,7 +345,49 @@ igt_main
>>                  igt_fork_hang_detector(fd);
>>          }
>>   
>> +       /* Legacy testing must be first. */
>>          for (e = intel_execution_engines; e->name; e++) {
>> +               e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
>> +               if (!e2)
>> +                       continue; /* I915_EXEC_BSD with no ring selectors */
>> +
>> +               for (typeof(*phases) *p = phases; p->name; p++) {
>> +                       igt_subtest_group {
>> +                               igt_fixture {
>> +                                       gem_require_ring(fd, e2->flags);
>> +                                       if (p->require)
>> +                                               igt_require(p->require(fd));
>> +                               }
>> +
>> +                               igt_subtest_f("legacy-%s%s%s",
>> +                                             e->name,
>> +                                             *p->name ? "-" : "",
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, 1, 5);
>> +
>> +                               igt_skip_on_simulation();
>> +
>> +                               igt_subtest_f("legacy-%s%s-heavy%s",
>> +                                             e->exec_id == 0 ? "basic-" : "",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, 1, 5);
>> +                               igt_subtest_f("legacy-forked-%s%s",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, ncpus,
>> +                                              150);
>> +                               igt_subtest_f("legacy-forked-%s-heavy%s",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, ncpus,
>> +                                              150);
>> +                       }
>> +               }
>> +       }
>> +
>> +       /* Must come after legacy subtests. */
>> +       __for_each_physical_engine(fd, e2) {
>>                  for (typeof(*phases) *p = phases; p->name; p++) {
>>                          igt_subtest_group {
>>                                  igt_fixture {
>> @@ -346,33 +395,40 @@ igt_main
>>                                                  igt_require(p->require(fd));
>>                                  }
>>   
>> -                               igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
>> -                                       single(fd, light, e, p->flags, 1, 5);
>> +                               igt_subtest_f("%s%s%s",
>> +                                             e2->name,
>> +                                             *p->name ? "-" : "",
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, 1, 5);
>>   
>>                                  igt_skip_on_simulation();
>>   
>> -                               igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
>> -                                       single(fd, heavy, e, p->flags, 1, 5);
>> -                               igt_subtest_f("forked-%s%s", e->name, p->name)
>> -                                       single(fd, light, e, p->flags, ncpus, 150);
>> -                               igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
>> -                                       single(fd, heavy, e, p->flags, ncpus, 150);
>> +                               igt_subtest_f("%s-heavy%s", e2->name, p->name)
>> +                                       single(fd, heavy, e2, p->flags, 1, 5);
>> +                               igt_subtest_f("%s-forked%s", e2->name, p->name)
>> +                                       single(fd, light, e2, p->flags, ncpus,
>> +                                              150);
>> +                               igt_subtest_f("%s-forked-heavy%s",
>> +                                             e2->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, ncpus,
>> +                                              150);
>>                          }
>>                  }
>>          }
> 
> Ok, that looks like what I would expect. For crux tests, we iterate over
> both the legacy ring selector, and along the engine[] indices.
> 
>> -       igt_subtest("basic-all-light")
>> +       igt_subtest("all-light")
>>                  all(fd, light, 0, 5);
>> -       igt_subtest("basic-all-heavy")
>> +       igt_subtest("all-heavy")
>>                  all(fd, heavy, 0, 5);
> 
> And for "all" tests where we are just trying to utilise all engines at
> once, we only care about the underlying HW utilisation and so the new
> query interface works best.

Cool, so no complaints?

Regards,

Tvrtko