[Intel-gfx] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT

Wed May 15 19:33:55 UTC 2019

Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > v2: Test each shared context is its own timeline and allows request
> > reordering between shared contexts.
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> > ---
> >   lib/i915/gem_context.c        |  68 +++
> >   lib/i915/gem_context.h        |  13 +
> >   tests/Makefile.sources        |   1 +
> >   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
> >   tests/i915/gem_exec_whisper.c |  32 +-
> >   tests/meson.build             |   1 +
> >   6 files changed, 962 insertions(+), 9 deletions(-)
> >   create mode 100644 tests/i915/gem_ctx_shared.c
> > 
> > diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> > index f94d89cb4..8fb8984d1 100644
> > --- a/lib/i915/gem_context.c
> > +++ b/lib/i915/gem_context.c
> > @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
> >       igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
> >   }
> >   
> > +int
> > +__gem_context_clone(int i915,
> > +                 uint32_t src, unsigned int share,
> > +                 unsigned int flags,
> > +                 uint32_t *out)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone clone = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = src,
> > +             .flags = share,
> > +     };
> > +     struct drm_i915_gem_context_create_ext arg = {
> > +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&clone),
> > +     };
> > +     int err = 0;
> > +
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> > +             err = -errno;
> > +
> > +     *out = arg.ctx_id;
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, share, flags, &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     errno = 0;
> > +     return ctx;
> > +}
> > +
> > +bool gem_contexts_has_shared_gtt(int i915)
> > +{
> > +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> > +}
> > +
> > +bool gem_has_queues(int i915)
> > +{
> > +     return __gem_context_has(i915,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> > +
> > +     return ctx;
> > +}
> > +
> > +uint32_t gem_queue_create(int i915)
> > +{
> > +     return gem_context_clone(i915, 0,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> >   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
> >   {
> >       struct drm_i915_gem_exec_object2 exec = {};
> > diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> > index a052714d4..8043c3401 100644
> > --- a/lib/i915/gem_context.h
> > +++ b/lib/i915/gem_context.h
> > @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
> >   void gem_context_destroy(int fd, uint32_t ctx_id);
> >   int __gem_context_destroy(int fd, uint32_t ctx_id);
> >   
> > +int __gem_context_clone(int i915,
> > +                     uint32_t src, unsigned int share,
> > +                     unsigned int flags,
> > +                     uint32_t *out);
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags);
> > +
> > +uint32_t gem_queue_create(int i915);
> > +
> > +bool gem_contexts_has_shared_gtt(int i915);
> > +bool gem_has_queues(int i915);
> > +
> >   bool gem_has_contexts(int fd);
> >   void gem_require_contexts(int fd);
> >   void gem_context_require_bannable(int fd);
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e1b7feeb2..3552e895b 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -22,6 +22,7 @@ TESTS_progs = \
> >       drm_mm \
> >       drm_read \
> >       i915/gem_ctx_clone \
> > +     i915/gem_ctx_shared \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> > new file mode 100644
> > index 000000000..0076f5e9d
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_shared.c
> > @@ -0,0 +1,856 @@
> > +/*
> > + * Copyright © 2017 Intel Corporation
> 
> 2019

Nah, that would imply I put any thought into touching it since.

> > +static void exhaust_shared_gtt(int i915, unsigned int flags)
> > +#define EXHAUST_LRC 0x1
> > +{
> > +     i915 = gem_reopen_driver(i915);
> > +
> > +     igt_fork(pid, 1) {
> > +             const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +             struct drm_i915_gem_exec_object2 obj = {
> > +                     .handle = gem_create(i915, 4096)
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +             };
> > +             uint32_t parent, child;
> > +             unsigned long count = 0;
> > +             int err;
> > +
> > +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +             child = 0;
> > +             for (;;) {
> > +                     parent = child;
> > +                     err = __gem_context_clone(i915,
> > +                                               parent, I915_CONTEXT_CLONE_VM,
> > +                                               0, &child);
> > +                     if (err)
> > +                             break;
> > +
> > +                     if (flags & EXHAUST_LRC) {
> > +                             execbuf.rsvd1 = child;
> > +                             err = __gem_execbuf(i915, &execbuf);
> > +                             if (err)
> > +                                     break;
> > +                     }
> 
> What are the stop conditions in this test, with and without the 
> EXHAUST_LRC flag? It would be good to put that in a comment.

It runs until the kernel dies. The giveaway is meant to be the test name.

> Especially since AFAIR this one was causing OOM for me so might need to 
> be tweaked.

It runs until the kernel dies.

> > +
> > +                     count++;
> > +             }
> > +             gem_sync(i915, obj.handle);
> > +
> > +             igt_info("Created %lu shared contexts, before %d (%s)\n",
> > +                      count, err, strerror(-err));
> > +     }
> > +     close(i915);
> > +     igt_waitchildren();
> > +}
> > +
> > +static void exec_shared_gtt(int i915, unsigned int ring)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096)
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +     };
> > +     uint32_t scratch = obj.handle;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(gem_can_store_dword(i915, ring));
> > +
> > +     /* Load object into place in the GTT */
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     /* Presume nothing causes an eviction in the meantime */
> > +
> > +     obj.handle = gem_create(i915, 4096);
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = obj.offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = obj.offset;
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = obj.offset;
> > +     }
> > +     batch[++i] = 0xc0ffee;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +
> > +     obj.offset += 4096; /* make sure we don't cause an eviction! */
> 
> Is 4k apart safe?

Since to change would imply an ABI break and I see no param indicating
an ABI change, and Joonas keeps on refusing to add such information.

> A short comment on how does this test work would be good.
> 
> > +     obj.flags |= EXEC_OBJECT_PINNED;
> > +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> > +     if (gen > 3 && gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_context_destroy(i915, execbuf.rsvd1);
> > +     gem_sync(i915, obj.handle); /* write hazard lies */
> > +     gem_close(i915, obj.handle);
> > +
> > +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> > +     gem_close(i915, scratch);
> > +
> > +     igt_assert_eq_u32(*batch, 0xc0ffee);
> > +}
> > +
> > +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     int err;
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     err = gem_wait(i915, obj.handle, &timeout);
> > +     gem_close(i915, obj.handle);
> > +
> > +     return err;
> > +}
> > +
> > +static bool has_single_timeline(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, 0,
> > +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> > +                         &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     return ctx != 0;
> > +}
> > +
> > +static bool ignore_engine(unsigned engine)
> > +{
> > +     if (engine == 0)
> > +             return true;
> > +
> > +     if (engine == I915_EXEC_BSD)
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static void single_timeline(int i915)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +     };
> > +     struct sync_fence_info rings[16];
> 
> Could use for_each_physical_engine to count the engines. But we probably 
> have plenty of this around the code base.
> 
> > +     struct sync_file_info sync_file_info = {
> > +             .num_fences = 1,
> > +     };
> > +     unsigned int engine;
> > +     int n;
> > +
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +
> > +     /*
> > +      * For a "single timeline" context, each ring is on the common
> > +      * timeline, unlike a normal context where each ring has an
> > +      * independent timeline. That is no matter which engine we submit
> > +      * to, it reports the same timeline name and fence context. However,
> > +      * the fence context is not reported through the sync_fence_info.
> 
> Is the test useful then? There was one I reviewed earlier in this series 
> which tested for execution ordering, which sounds like is what's needed.

It is a variant. This one is a couple of years older. Both accomplish
similar things through very different means, the more the serendipitous.

> 
> > +      */
> > +     execbuf.rsvd1 =
> > +             gem_context_clone(i915, 0, 0,
> > +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     execbuf.flags = I915_EXEC_FENCE_OUT;
> > +     n = 0;
> > +     for_each_engine(i915, engine) {
> 
> for_each_physical_engine to align with Andi's work?

No, this would be an ABI iterator not a physical iterator.

> > +             gem_execbuf_wr(i915, &execbuf);
> > +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> > +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> > +             close(execbuf.rsvd2 >> 32);
> > +
> > +             igt_info("ring[%d] fence: %s %s\n",
> > +                      n, rings[n].driver_name, rings[n].obj_name);
> > +             n++;
> > +     }
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +
> > +     for (int i = 1; i < n; i++) {
> > +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> > +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
> 
> What is in obj_name?

The timeline name. sync_file is plain old useless. The asserts here are
drivel.

> > +     }
> > +}
> > +
> > +static void exec_single_timeline(int i915, unsigned int ring)
> > +{
> > +     unsigned int other;
> > +     igt_spin_t *spin;
> > +     uint32_t ctx;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     /*
> > +      * On an ordinary context, a blockage on one ring doesn't prevent
> > +      * execution on an other.
> > +      */
> > +     ctx = 0;
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> for_each_physical

Modern inventions.

> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_require(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> > +     igt_spin_free(i915, spin);
> > +
> > +     /*
> > +      * But if we create a context with just a single shared timeline,
> > +      * then it will block waiting for the earlier requests on the
> > +      * other engines.
> > +      */
> > +     ctx = gem_context_clone(i915, 0, 0,
> > +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> Ditto.

Hey! Where was that when this code was written^W copied :-p

> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_assert(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> > +     igt_spin_free(i915, spin);
> > +}
> > +
> > +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> > +                     uint32_t target, uint32_t offset, uint32_t value,
> > +                     uint32_t cork, unsigned write_domain)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     struct drm_i915_gem_exec_object2 obj[3];
> > +     struct drm_i915_gem_relocation_entry reloc;
> > +     struct drm_i915_gem_execbuffer2 execbuf;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     memset(&execbuf, 0, sizeof(execbuf));
> > +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> > +     execbuf.buffer_count = 2 + !!cork;
> > +     execbuf.flags = ring;
> > +     if (gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +     execbuf.rsvd1 = ctx;
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = cork;
> > +     obj[1].handle = target;
> > +     obj[2].handle = gem_create(i915, 4096);
> > +
> > +     memset(&reloc, 0, sizeof(reloc));
> > +     reloc.target_handle = obj[1].handle;
> > +     reloc.presumed_offset = 0;
> > +     reloc.offset = sizeof(uint32_t);
> > +     reloc.delta = offset;
> > +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > +     reloc.write_domain = write_domain;
> > +     obj[2].relocs_ptr = to_user_pointer(&reloc);
> > +     obj[2].relocation_count = 1;
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = offset;
> > +             reloc.offset += sizeof(uint32_t);
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = offset;
> > +     }
> > +     batch[++i] = value;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_close(i915, obj[2].handle);
> > +}
> > +
> > +static uint32_t create_highest_priority(int i915)
> > +{
> > +     uint32_t ctx = gem_context_create(i915);
> > +
> > +     /*
> > +      * If there is no priority support, all contexts will have equal
> > +      * priority (and therefore the max user priority), so no context
> > +      * can overtake us, and we effectively can form a plug.
> > +      */
> > +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
> > +
> > +     return ctx;
> > +}
> > +
> > +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> > +{
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> 
> Why is this 16?

2x as big as the deepest known qlen. And 16 is that number that crops up
everywhere as a "just big enough number"

> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = engine,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     igt_cork_unplug(c); /* batches will now be queued on the engine */
> > +     igt_debugfs_dump(i915, "i915_engine_info");
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +}
> > +
> > +static uint32_t store_timestamp(int i915,
> > +                             uint32_t ctx, unsigned ring,
> > +                             unsigned mmio_base)
> > +{
> > +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +             .relocation_count = 1,
> > +     };
> > +     struct drm_i915_gem_relocation_entry reloc = {
> > +             .target_handle = obj.handle,
> > +             .offset = 2 * sizeof(uint32_t),
> > +             .delta = 4092,
> > +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     uint32_t batch[] = {
> > +             0x24 << 23 | (1 + r64b), /* SRM */
> > +             mmio_base + 0x358,
> > +             4092,
> > +             0,
> > +             MI_BATCH_BUFFER_END
> > +     };
> > +
> > +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> > +
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +     obj.relocs_ptr = to_user_pointer(&reloc);
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     return obj.handle;
> > +}
> > +
> > +static void independent(int i915, unsigned ring, unsigned flags)
> > +{
> > +     uint32_t handle[ARRAY_SIZE(priorities)];
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> > +     unsigned int mmio_base;
> > +
> > +     /* XXX i915_query()! */
> > +     switch (ring) {
> > +     case I915_EXEC_DEFAULT:
> > +     case I915_EXEC_RENDER:
> > +             mmio_base = 0x2000;
> > +             break;
> > +#if 0
> > +     case I915_EXEC_BSD:
> > +             mmio_base = 0x12000;
> > +             break;
> > +#endif
> > +     case I915_EXEC_BLT:
> > +             mmio_base = 0x22000;
> > +             break;
> > +
> > +     case I915_EXEC_VEBOX:
> > +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> > +                     mmio_base = 0x1d8000;
> > +             else
> > +                     mmio_base = 0x1a000;
> > +             break;
> > +
> > +     default:
> > +             igt_skip("mmio base not known\n");
> > +     }
> 
> Ufff this is quite questionable. Should we rather have this subtest in 
> selftests only?

We should be exporting this information. It is a non-privileged register
that is used by normal clients to measure elapsed time.

> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = ring,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t ctx = gem_queue_create(i915);
> > +             gem_context_set_priority(i915, ctx, priorities[i]);
> > +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t *ptr;
> > +
> > +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> > +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
> > +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +             gem_close(i915, handle[i]);
> > +
> > +             handle[i] = ptr[1023];
> 
> 1023 relates to 4092 from store_timestamp I gather. The two need to be 
> defined closer together.
> 
> > +             munmap(ptr, 4096);
> > +
> > +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> > +                       i, priorities[i], handle[i]);
> > +     }
> > +
> > +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> > +}
> > +
> > +static void reorder(int i915, unsigned ring, unsigned flags)
> > +#define EQUAL 1
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[2];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* We expect the high priority context to be executed first, and
> > +      * so the final result will be value from the low priority context.
> > +      */
> > +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> > +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     if (flags & EQUAL) /* equal priority, result will be fifo */
> > +             igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     else
> > +             igt_assert_eq_u32(ptr[0], ctx[LO]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void promotion(int i915, unsigned ring)
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t result, dep;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[3];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], 0);
> > +
> > +     ctx[NOISE] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> > +
> > +     result = gem_create(i915, 4096);
> > +     dep = gem_create(i915, 4096);
> > +
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> > +      *
> > +      * fifo would be NOISE, LO, HI.
> > +      * strict priority would be  HI, NOISE, LO
> > +      */
> > +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> > +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> > +
> > +     /* link LO <-> HI via a dependency on another buffer */
> > +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> > +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> > +
> > +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[NOISE]);
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> > +     gem_set_domain(i915, dep, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, dep);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     munmap(ptr, 4096);
> > +
> > +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> > +     gem_set_domain(i915, result, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, result);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void smoketest(int i915, unsigned ring, unsigned timeout)
> > +{
> > +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +     unsigned engines[16];
> > +     unsigned nengine;
> > +     unsigned engine;
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +
> > +     nengine = 0;
> > +     for_each_engine(i915, engine) {
> > +             if (ignore_engine(engine))
> > +                     continue;
> > +
> > +             engines[nengine++] = engine;
> > +     }
> > +     igt_require(nengine);
> 
> for_each_physical and counring the engines for engines array would be 
> better I think.
> 
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     igt_fork(child, ncpus) {
> > +             unsigned long count = 0;
> > +             uint32_t ctx;
> > +
> > +             hars_petruska_f54_1_random_perturb(child);
> > +
> > +             ctx = gem_queue_create(i915);
> > +             igt_until_timeout(timeout) {
> > +                     int prio;
> > +
> > +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> > +                     gem_context_set_priority(i915, ctx, prio);
> > +
> > +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> > +                     store_dword(i915, ctx, engine, scratch,
> > +                                 8*child + 0, ~child,
> > +                                 0, 0);
> > +                     for (unsigned int step = 0; step < 8; step++)
> > +                             store_dword(i915, ctx, engine, scratch,
> > +                                         8*child + 4, count++,
> > +                                         0, 0);
> > +             }
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +     igt_waitchildren();
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     for (unsigned n = 0; n < ncpus; n++) {
> > +             igt_assert_eq_u32(ptr[2*n], ~n);
> > +             /*
> > +              * Note this count is approximate due to unconstrained
> > +              * ordering of the dword writes between engines.
> > +              *
> > +              * Take the result with a pinch of salt.
> > +              */
> > +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> > +     }
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +igt_main
> > +{
> > +     const struct intel_execution_engine *e;
> > +     int i915 = -1;
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +     }
> > +
> > +     igt_subtest_group {
> > +             igt_fixture {
> > +                     igt_require(gem_contexts_has_shared_gtt(i915));
> > +                     igt_fork_hang_detector(i915);
> > +             }
> > +
> > +             igt_subtest("create-shared-gtt")
> > +                     create_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("detached-shared-gtt")
> > +                     create_shared_gtt(i915, DETACHED);
> > +
> > +             igt_subtest("disjoint-timelines")
> > +                     disjoint_timelines(i915);
> > +
> > +             igt_subtest("single-timeline")
> > +                     single_timeline(i915);
> > +
> > +             igt_subtest("exhaust-shared-gtt")
> > +                     exhaust_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("exhaust-shared-gtt-lrc")
> > +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
> > +
> > +             for (e = intel_execution_engines; e->name; e++) {
> > +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
> > +                             exec_shared_gtt(i915, e->exec_id | e->flags);
> 
> The same previously raised question on should it iterate the legacy 
> execbuf engines or physical engines. Maybe you won't different subtests 
> do both?

It should be testing the cross between the context and execbuf uABI, not
physical.
-Chris