[Intel-gfx] [PATCH i-g-t] tests/i915/gem_ctx_switch: Update with engine discovery

Chris Wilson chris at chris-wilson.co.uk
Thu Jun 27 10:35:27 UTC 2019


Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
> index 647911d4c42e..de39748344b8 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -55,7 +55,7 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
>  
>  static int measure_qlen(int fd,
>                         struct drm_i915_gem_execbuffer2 *execbuf,
> -                       unsigned int *engine, unsigned int nengine,
> +                       const struct intel_engine_data *engines,
>                         int timeout)
>  {
>         const struct drm_i915_gem_exec_object2 * const obj =
> @@ -63,15 +63,17 @@ static int measure_qlen(int fd,
>         uint32_t ctx[64];
>         int min = INT_MAX, max = 0;
>  
> -       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> +       for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>                 ctx[i] = gem_context_create(fd);
> +               gem_context_set_all_engines(fd, ctx[i]);
> +       }
>  
> -       for (unsigned int n = 0; n < nengine; n++) {
> +       for (unsigned int n = 0; n < engines->nengines; n++) {
>                 uint64_t saved = execbuf->flags;
>                 struct timespec tv = {};
>                 int q;
>  
> -               execbuf->flags |= engine[n];
> +               execbuf->flags |= engines->engines[n].flags;
>  
>                 for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>                         execbuf->rsvd1 = ctx[i];
> @@ -90,7 +92,8 @@ static int measure_qlen(int fd,
>                  * Be conservative and aim not to overshoot timeout, so scale
>                  * down by 8 for hopefully a max of 12.5% error.
>                  */
> -               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
> +               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
> +                   8 + 1;
>                 if (q < min)
>                         min = q;
>                 if (q > max)
> @@ -107,7 +110,7 @@ static int measure_qlen(int fd,
>  }
>  
>  static void single(int fd, uint32_t handle,
> -                  const struct intel_execution_engine *e,
> +                  const struct intel_execution_engine2 *e2,
>                    unsigned flags,
>                    const int ncpus,
>                    int timeout)
> @@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
>         shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>         igt_assert(shared != MAP_FAILED);
>  
> -       gem_require_ring(fd, e->exec_id | e->flags);
> -
>         for (n = 0; n < 64; n++) {
>                 if (flags & QUEUE)
>                         contexts[n] = gem_queue_create(fd);
>                 else
>                         contexts[n] = gem_context_create(fd);
> +
> +               if (gem_context_has_engine_map(fd, 0))
> +                       gem_context_set_all_engines(fd, contexts[n]);

That was a moment of doubt on how well setting engines on a "queue"
worked :)

>         }
>  
>         memset(&obj, 0, sizeof(obj));
> @@ -152,12 +156,12 @@ static void single(int fd, uint32_t handle,
>         execbuf.buffers_ptr = to_user_pointer(&obj);
>         execbuf.buffer_count = 1;
>         execbuf.rsvd1 = contexts[0];
> -       execbuf.flags = e->exec_id | e->flags;
> +       execbuf.flags = e2->flags;
>         execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>         execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>         igt_require(__gem_execbuf(fd, &execbuf) == 0);
>         if (__gem_execbuf(fd, &execbuf)) {
> -               execbuf.flags = e->exec_id | e->flags;
> +               execbuf.flags = e2->flags;
>                 reloc.target_handle = obj.handle;
>                 gem_execbuf(fd, &execbuf);
>         }
> @@ -190,7 +194,8 @@ static void single(int fd, uint32_t handle,
>                 clock_gettime(CLOCK_MONOTONIC, &now);
>  
>                 igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
> -                        child, e->name, count, elapsed(&start, &now)*1e6 / count,
> +                        child, e2->name, count,
> +                        elapsed(&start, &now) * 1e6 / count,
>                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
>  
>                 shared[child].elapsed = elapsed(&start, &now);
> @@ -209,7 +214,7 @@ static void single(int fd, uint32_t handle,
>                 }
>  
>                 igt_info("Total %s: %'lu cycles: %.3fus%s\n",
> -                        e->name, total, max*1e6 / total,
> +                        e2->name, total, max*1e6 / total,
>                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
>         }
>  
> @@ -223,25 +228,20 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  {
>         struct drm_i915_gem_execbuffer2 execbuf;
>         struct drm_i915_gem_exec_object2 obj[2];
> -       unsigned int engine[16], e;
> -       const char *name[16];
> +       struct intel_engine_data engines = { };
>         uint32_t contexts[65];
> -       unsigned int nengine;
>         int n, qlen;
>  
> -       nengine = 0;
> -       for_each_physical_engine(fd, e) {
> -               engine[nengine] = e;
> -               name[nengine] = e__->name;
> -               nengine++;
> -       }
> -       igt_require(nengine);
> +       engines = intel_init_engine_list(fd, 0);
> +       igt_require(engines.nengines);
>  
>         for (n = 0; n < ARRAY_SIZE(contexts); n++) {
>                 if (flags & QUEUE)
>                         contexts[n] = gem_queue_create(fd);
>                 else
>                         contexts[n] = gem_context_create(fd);
> +
> +               gem_context_set_all_engines(fd, contexts[n]);
>         }
>  
>         memset(obj, 0, sizeof(obj));
> @@ -256,7 +256,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>         igt_require(__gem_execbuf(fd, &execbuf) == 0);
>         gem_sync(fd, handle);
>  
> -       qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
> +       qlen = measure_qlen(fd, &execbuf, &engines, timeout);
>         igt_info("Using timing depth of %d batches\n", qlen);
>  
>         execbuf.buffers_ptr = to_user_pointer(obj);
> @@ -264,13 +264,15 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  
>         for (int pot = 2; pot <= 64; pot *= 2) {
>                 for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
> -                       igt_fork(child, nengine) {
> +                       igt_fork(child, engines.nengines) {
>                                 struct timespec start, now;
>                                 unsigned int count = 0;
>  
>                                 obj[0].handle = gem_create(fd, 4096);
> -                               execbuf.flags |= engine[child];
> -                               for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
> +                               execbuf.flags |= engines.engines[child].flags;
> +                               for (int loop = 0;
> +                                    loop < ARRAY_SIZE(contexts);
> +                                    loop++) {
>                                         execbuf.rsvd1 = contexts[loop];
>                                         gem_execbuf(fd, &execbuf);
>                                 }
> @@ -279,7 +281,8 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>                                 clock_gettime(CLOCK_MONOTONIC, &start);
>                                 do {
>                                         for (int loop = 0; loop < qlen; loop++) {
> -                                               execbuf.rsvd1 = contexts[loop % nctx];
> +                                               execbuf.rsvd1 =
> +                                                       contexts[loop % nctx];
>                                                 gem_execbuf(fd, &execbuf);
>                                         }
>                                         count += qlen;
> @@ -291,8 +294,11 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>                                 gem_close(fd, obj[0].handle);
>  
>                                 igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
> -                                        nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
> -                                        flags & INTERRUPTIBLE ? " (interruptible)" : "",
> +                                        nctx, child,
> +                                        engines.engines[child].name, count,
> +                                        elapsed(&start, &now) * 1e6 / count,
> +                                        flags & INTERRUPTIBLE ?
> +                                        " (interruptible)" : "",
>                                          elapsed(&start, &now));
>                         }
>                         igt_waitchildren();
> @@ -306,6 +312,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  igt_main
>  {
>         const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +       const struct intel_execution_engine2 *e2;
>         const struct intel_execution_engine *e;
>         static const struct {
>                 const char *name;
> @@ -338,7 +345,49 @@ igt_main
>                 igt_fork_hang_detector(fd);
>         }
>  
> +       /* Legacy testing must be first. */
>         for (e = intel_execution_engines; e->name; e++) {
> +               e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
> +               if (!e2)
> +                       continue; /* I915_EXEC_BSD with no ring selectors */
> +
> +               for (typeof(*phases) *p = phases; p->name; p++) {
> +                       igt_subtest_group {
> +                               igt_fixture {
> +                                       gem_require_ring(fd, e2->flags);
> +                                       if (p->require)
> +                                               igt_require(p->require(fd));
> +                               }
> +
> +                               igt_subtest_f("legacy-%s%s%s",
> +                                             e->name,
> +                                             *p->name ? "-" : "",
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, 1, 5);
> +
> +                               igt_skip_on_simulation();
> +
> +                               igt_subtest_f("legacy-%s%s-heavy%s",
> +                                             e->exec_id == 0 ? "basic-" : "",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, 1, 5);
> +                               igt_subtest_f("legacy-forked-%s%s",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, ncpus,
> +                                              150);
> +                               igt_subtest_f("legacy-forked-%s-heavy%s",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, ncpus,
> +                                              150);
> +                       }
> +               }
> +       }
> +
> +       /* Must come after legacy subtests. */
> +       __for_each_physical_engine(fd, e2) {
>                 for (typeof(*phases) *p = phases; p->name; p++) {
>                         igt_subtest_group {
>                                 igt_fixture {
> @@ -346,33 +395,40 @@ igt_main
>                                                 igt_require(p->require(fd));
>                                 }
>  
> -                               igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> -                                       single(fd, light, e, p->flags, 1, 5);
> +                               igt_subtest_f("%s%s%s",
> +                                             e2->name,
> +                                             *p->name ? "-" : "",
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, 1, 5);
>  
>                                 igt_skip_on_simulation();
>  
> -                               igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> -                                       single(fd, heavy, e, p->flags, 1, 5);
> -                               igt_subtest_f("forked-%s%s", e->name, p->name)
> -                                       single(fd, light, e, p->flags, ncpus, 150);
> -                               igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
> -                                       single(fd, heavy, e, p->flags, ncpus, 150);
> +                               igt_subtest_f("%s-heavy%s", e2->name, p->name)
> +                                       single(fd, heavy, e2, p->flags, 1, 5);
> +                               igt_subtest_f("%s-forked%s", e2->name, p->name)
> +                                       single(fd, light, e2, p->flags, ncpus,
> +                                              150);
> +                               igt_subtest_f("%s-forked-heavy%s",
> +                                             e2->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, ncpus,
> +                                              150);
>                         }
>                 }
>         }

Ok, that looks like what I would expect. For crux tests, we iterate over
both the legacy ring selector, and along the engine[] indices.

> -       igt_subtest("basic-all-light")
> +       igt_subtest("all-light")
>                 all(fd, light, 0, 5);
> -       igt_subtest("basic-all-heavy")
> +       igt_subtest("all-heavy")
>                 all(fd, heavy, 0, 5);

And for "all" tests where we are just trying to utilise all engines at
once, we only care about the underlying HW utilisation and so the new
query interface works best.
-Chris


More information about the Intel-gfx mailing list