[Intel-gfx] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT

Thu May 16 08:51:34 UTC 2019

On 15/05/2019 20:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> v2: Test each shared context is its own timeline and allows request
>>> reordering between shared contexts.
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>>> ---
>>>    lib/i915/gem_context.c        |  68 +++
>>>    lib/i915/gem_context.h        |  13 +
>>>    tests/Makefile.sources        |   1 +
>>>    tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>>>    tests/i915/gem_exec_whisper.c |  32 +-
>>>    tests/meson.build             |   1 +
>>>    6 files changed, 962 insertions(+), 9 deletions(-)
>>>    create mode 100644 tests/i915/gem_ctx_shared.c
>>>
>>> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
>>> index f94d89cb4..8fb8984d1 100644
>>> --- a/lib/i915/gem_context.c
>>> +++ b/lib/i915/gem_context.c
>>> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>>>        igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>>>    }
>>>    
>>> +int
>>> +__gem_context_clone(int i915,
>>> +                 uint32_t src, unsigned int share,
>>> +                 unsigned int flags,
>>> +                 uint32_t *out)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_clone clone = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
>>> +             .clone_id = src,
>>> +             .flags = share,
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext arg = {
>>> +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
>>> +             .extensions = to_user_pointer(&clone),
>>> +     };
>>> +     int err = 0;
>>> +
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
>>> +             err = -errno;
>>> +
>>> +     *out = arg.ctx_id;
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, share, flags, &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     errno = 0;
>>> +     return ctx;
>>> +}
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915)
>>> +{
>>> +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
>>> +}
>>> +
>>> +bool gem_has_queues(int i915)
>>> +{
>>> +     return __gem_context_has(i915,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +uint32_t gem_queue_create(int i915)
>>> +{
>>> +     return gem_context_clone(i915, 0,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>>    bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>>>    {
>>>        struct drm_i915_gem_exec_object2 exec = {};
>>> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
>>> index a052714d4..8043c3401 100644
>>> --- a/lib/i915/gem_context.h
>>> +++ b/lib/i915/gem_context.h
>>> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>>>    void gem_context_destroy(int fd, uint32_t ctx_id);
>>>    int __gem_context_destroy(int fd, uint32_t ctx_id);
>>>    
>>> +int __gem_context_clone(int i915,
>>> +                     uint32_t src, unsigned int share,
>>> +                     unsigned int flags,
>>> +                     uint32_t *out);
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags);
>>> +
>>> +uint32_t gem_queue_create(int i915);
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915);
>>> +bool gem_has_queues(int i915);
>>> +
>>>    bool gem_has_contexts(int fd);
>>>    void gem_require_contexts(int fd);
>>>    void gem_context_require_bannable(int fd);
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e1b7feeb2..3552e895b 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -22,6 +22,7 @@ TESTS_progs = \
>>>        drm_mm \
>>>        drm_read \
>>>        i915/gem_ctx_clone \
>>> +     i915/gem_ctx_shared \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
>>> new file mode 100644
>>> index 000000000..0076f5e9d
>>> --- /dev/null
>>> +++ b/tests/i915/gem_ctx_shared.c
>>> @@ -0,0 +1,856 @@
>>> +/*
>>> + * Copyright © 2017 Intel Corporation
>>
>> 2019
> 
> Nah, that would imply I put any thought into touching it since.
> 
>>> +static void exhaust_shared_gtt(int i915, unsigned int flags)
>>> +#define EXHAUST_LRC 0x1
>>> +{
>>> +     i915 = gem_reopen_driver(i915);
>>> +
>>> +     igt_fork(pid, 1) {
>>> +             const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +             struct drm_i915_gem_exec_object2 obj = {
>>> +                     .handle = gem_create(i915, 4096)
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&obj),
>>> +                     .buffer_count = 1,
>>> +             };
>>> +             uint32_t parent, child;
>>> +             unsigned long count = 0;
>>> +             int err;
>>> +
>>> +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +             child = 0;
>>> +             for (;;) {
>>> +                     parent = child;
>>> +                     err = __gem_context_clone(i915,
>>> +                                               parent, I915_CONTEXT_CLONE_VM,
>>> +                                               0, &child);
>>> +                     if (err)
>>> +                             break;
>>> +
>>> +                     if (flags & EXHAUST_LRC) {
>>> +                             execbuf.rsvd1 = child;
>>> +                             err = __gem_execbuf(i915, &execbuf);
>>> +                             if (err)
>>> +                                     break;
>>> +                     }
>>
>> What are the stop conditions in this test, with and without the
>> EXHAUST_LRC flag? It would be good to put that in a comment.
> 
> It runs until the kernel dies. The giveaway is meant to be the test name.
>   
>> Especially since AFAIR this one was causing OOM for me so might need to
>> be tweaked.
> 
> It runs until the kernel dies.

Aren't we not allowed to add failing tests?

> 
>>> +
>>> +                     count++;
>>> +             }
>>> +             gem_sync(i915, obj.handle);
>>> +
>>> +             igt_info("Created %lu shared contexts, before %d (%s)\n",
>>> +                      count, err, strerror(-err));
>>> +     }
>>> +     close(i915);
>>> +     igt_waitchildren();
>>> +}
>>> +
>>> +static void exec_shared_gtt(int i915, unsigned int ring)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096)
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +     };
>>> +     uint32_t scratch = obj.handle;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(gem_can_store_dword(i915, ring));
>>> +
>>> +     /* Load object into place in the GTT */
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     /* Presume nothing causes an eviction in the meantime */
>>> +
>>> +     obj.handle = gem_create(i915, 4096);
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = obj.offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = obj.offset;
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = obj.offset;
>>> +     }
>>> +     batch[++i] = 0xc0ffee;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +
>>> +     obj.offset += 4096; /* make sure we don't cause an eviction! */
>>
>> Is 4k apart safe?
> 
> Since to change would imply an ABI break and I see no param indicating
> an ABI change, and Joonas keeps on refusing to add such information.

Why it would be an ABI break? Why would the driver be not allowed to add 
arbitrary padding, or use larger pgtable entry or something, so if you 
assume +4k is empty it could actually not be?

>   
>> A short comment on how does this test work would be good.
>>
>>> +     obj.flags |= EXEC_OBJECT_PINNED;
>>> +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
>>> +     if (gen > 3 && gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_context_destroy(i915, execbuf.rsvd1);
>>> +     gem_sync(i915, obj.handle); /* write hazard lies */
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
>>> +     gem_close(i915, scratch);
>>> +
>>> +     igt_assert_eq_u32(*batch, 0xc0ffee);
>>> +}
>>> +
>>> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     int err;
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     err = gem_wait(i915, obj.handle, &timeout);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     return err;
>>> +}
>>> +
>>> +static bool has_single_timeline(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, 0,
>>> +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
>>> +                         &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     return ctx != 0;
>>> +}
>>> +
>>> +static bool ignore_engine(unsigned engine)
>>> +{
>>> +     if (engine == 0)
>>> +             return true;
>>> +
>>> +     if (engine == I915_EXEC_BSD)
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static void single_timeline(int i915)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +     };
>>> +     struct sync_fence_info rings[16];
>>
>> Could use for_each_physical_engine to count the engines. But we probably
>> have plenty of this around the code base.
>>
>>> +     struct sync_file_info sync_file_info = {
>>> +             .num_fences = 1,
>>> +     };
>>> +     unsigned int engine;
>>> +     int n;
>>> +
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +
>>> +     /*
>>> +      * For a "single timeline" context, each ring is on the common
>>> +      * timeline, unlike a normal context where each ring has an
>>> +      * independent timeline. That is no matter which engine we submit
>>> +      * to, it reports the same timeline name and fence context. However,
>>> +      * the fence context is not reported through the sync_fence_info.
>>
>> Is the test useful then? There was one I reviewed earlier in this series
>> which tested for execution ordering, which sounds like is what's needed.
> 
> It is a variant. This one is a couple of years older. Both accomplish
> similar things through very different means, the more the serendipitous.
> 
>>
>>> +      */
>>> +     execbuf.rsvd1 =
>>> +             gem_context_clone(i915, 0, 0,
>>> +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     execbuf.flags = I915_EXEC_FENCE_OUT;
>>> +     n = 0;
>>> +     for_each_engine(i915, engine) {
>>
>> for_each_physical_engine to align with Andi's work?
> 
> No, this would be an ABI iterator not a physical iterator.

Legacy ABI, why? It then covers a subset of engines so is it useful to 
cover more than one but not all?

>   
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
>>> +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
>>> +             close(execbuf.rsvd2 >> 32);
>>> +
>>> +             igt_info("ring[%d] fence: %s %s\n",
>>> +                      n, rings[n].driver_name, rings[n].obj_name);
>>> +             n++;
>>> +     }
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     for (int i = 1; i < n; i++) {
>>> +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
>>> +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
>>
>> What is in obj_name?
> 
> The timeline name. sync_file is plain old useless. The asserts here are
> drivel.
> 
>>> +     }
>>> +}
>>> +
>>> +static void exec_single_timeline(int i915, unsigned int ring)
>>> +{
>>> +     unsigned int other;
>>> +     igt_spin_t *spin;
>>> +     uint32_t ctx;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     /*
>>> +      * On an ordinary context, a blockage on one ring doesn't prevent
>>> +      * execution on an other.
>>> +      */
>>> +     ctx = 0;
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> for_each_physical
> 
> Modern inventions.
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_require(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     /*
>>> +      * But if we create a context with just a single shared timeline,
>>> +      * then it will block waiting for the earlier requests on the
>>> +      * other engines.
>>> +      */
>>> +     ctx = gem_context_clone(i915, 0, 0,
>>> +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> Ditto.
> 
> Hey! Where was that when this code was written^W copied :-p
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_assert(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
>>> +     igt_spin_free(i915, spin);
>>> +}
>>> +
>>> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
>>> +                     uint32_t target, uint32_t offset, uint32_t value,
>>> +                     uint32_t cork, unsigned write_domain)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     struct drm_i915_gem_exec_object2 obj[3];
>>> +     struct drm_i915_gem_relocation_entry reloc;
>>> +     struct drm_i915_gem_execbuffer2 execbuf;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     memset(&execbuf, 0, sizeof(execbuf));
>>> +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
>>> +     execbuf.buffer_count = 2 + !!cork;
>>> +     execbuf.flags = ring;
>>> +     if (gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +     execbuf.rsvd1 = ctx;
>>> +
>>> +     memset(obj, 0, sizeof(obj));
>>> +     obj[0].handle = cork;
>>> +     obj[1].handle = target;
>>> +     obj[2].handle = gem_create(i915, 4096);
>>> +
>>> +     memset(&reloc, 0, sizeof(reloc));
>>> +     reloc.target_handle = obj[1].handle;
>>> +     reloc.presumed_offset = 0;
>>> +     reloc.offset = sizeof(uint32_t);
>>> +     reloc.delta = offset;
>>> +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
>>> +     reloc.write_domain = write_domain;
>>> +     obj[2].relocs_ptr = to_user_pointer(&reloc);
>>> +     obj[2].relocation_count = 1;
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = offset;
>>> +             reloc.offset += sizeof(uint32_t);
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = offset;
>>> +     }
>>> +     batch[++i] = value;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_close(i915, obj[2].handle);
>>> +}
>>> +
>>> +static uint32_t create_highest_priority(int i915)
>>> +{
>>> +     uint32_t ctx = gem_context_create(i915);
>>> +
>>> +     /*
>>> +      * If there is no priority support, all contexts will have equal
>>> +      * priority (and therefore the max user priority), so no context
>>> +      * can overtake us, and we effectively can form a plug.
>>> +      */
>>> +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
>>> +{
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>
>> Why is this 16?
> 
> 2x as big as the deepest known qlen. And 16 is that number that crops up
> everywhere as a "just big enough number"
>   
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = engine,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     igt_cork_unplug(c); /* batches will now be queued on the engine */
>>> +     igt_debugfs_dump(i915, "i915_engine_info");
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +}
>>> +
>>> +static uint32_t store_timestamp(int i915,
>>> +                             uint32_t ctx, unsigned ring,
>>> +                             unsigned mmio_base)
>>> +{
>>> +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +             .relocation_count = 1,
>>> +     };
>>> +     struct drm_i915_gem_relocation_entry reloc = {
>>> +             .target_handle = obj.handle,
>>> +             .offset = 2 * sizeof(uint32_t),
>>> +             .delta = 4092,
>>> +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     uint32_t batch[] = {
>>> +             0x24 << 23 | (1 + r64b), /* SRM */
>>> +             mmio_base + 0x358,
>>> +             4092,
>>> +             0,
>>> +             MI_BATCH_BUFFER_END
>>> +     };
>>> +
>>> +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
>>> +
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +     obj.relocs_ptr = to_user_pointer(&reloc);
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     return obj.handle;
>>> +}
>>> +
>>> +static void independent(int i915, unsigned ring, unsigned flags)
>>> +{
>>> +     uint32_t handle[ARRAY_SIZE(priorities)];
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>> +     unsigned int mmio_base;
>>> +
>>> +     /* XXX i915_query()! */
>>> +     switch (ring) {
>>> +     case I915_EXEC_DEFAULT:
>>> +     case I915_EXEC_RENDER:
>>> +             mmio_base = 0x2000;
>>> +             break;
>>> +#if 0
>>> +     case I915_EXEC_BSD:
>>> +             mmio_base = 0x12000;
>>> +             break;
>>> +#endif
>>> +     case I915_EXEC_BLT:
>>> +             mmio_base = 0x22000;
>>> +             break;
>>> +
>>> +     case I915_EXEC_VEBOX:
>>> +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
>>> +                     mmio_base = 0x1d8000;
>>> +             else
>>> +                     mmio_base = 0x1a000;
>>> +             break;
>>> +
>>> +     default:
>>> +             igt_skip("mmio base not known\n");
>>> +     }
>>
>> Ufff this is quite questionable. Should we rather have this subtest in
>> selftests only?
> 
> We should be exporting this information. It is a non-privileged register
> that is used by normal clients to measure elapsed time.

I see.. who uses it? Mesa? Is it just one register? What would make more 
sense - add a query to read this register, or add a query to get the 
register address, or mmio_base (as you were proposing some time ago)?

> 
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = ring,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t ctx = gem_queue_create(i915);
>>> +             gem_context_set_priority(i915, ctx, priorities[i]);
>>> +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t *ptr;
>>> +
>>> +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
>>> +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
>>> +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +             gem_close(i915, handle[i]);
>>> +
>>> +             handle[i] = ptr[1023];
>>
>> 1023 relates to 4092 from store_timestamp I gather. The two need to be
>> defined closer together.
>>
>>> +             munmap(ptr, 4096);
>>> +
>>> +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
>>> +                       i, priorities[i], handle[i]);
>>> +     }
>>> +
>>> +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
>>> +}
>>> +
>>> +static void reorder(int i915, unsigned ring, unsigned flags)
>>> +#define EQUAL 1
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[2];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* We expect the high priority context to be executed first, and
>>> +      * so the final result will be value from the low priority context.
>>> +      */
>>> +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
>>> +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     if (flags & EQUAL) /* equal priority, result will be fifo */
>>> +             igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     else
>>> +             igt_assert_eq_u32(ptr[0], ctx[LO]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void promotion(int i915, unsigned ring)
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t result, dep;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[3];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], 0);
>>> +
>>> +     ctx[NOISE] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
>>> +
>>> +     result = gem_create(i915, 4096);
>>> +     dep = gem_create(i915, 4096);
>>> +
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
>>> +      *
>>> +      * fifo would be NOISE, LO, HI.
>>> +      * strict priority would be  HI, NOISE, LO
>>> +      */
>>> +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
>>> +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
>>> +
>>> +     /* link LO <-> HI via a dependency on another buffer */
>>> +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
>>> +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
>>> +
>>> +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[NOISE]);
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
>>> +     gem_set_domain(i915, dep, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, dep);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     munmap(ptr, 4096);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
>>> +     gem_set_domain(i915, result, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, result);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void smoketest(int i915, unsigned ring, unsigned timeout)
>>> +{
>>> +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> +     unsigned engines[16];
>>> +     unsigned nengine;
>>> +     unsigned engine;
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +
>>> +     nengine = 0;
>>> +     for_each_engine(i915, engine) {
>>> +             if (ignore_engine(engine))
>>> +                     continue;
>>> +
>>> +             engines[nengine++] = engine;
>>> +     }
>>> +     igt_require(nengine);
>>
>> for_each_physical and counring the engines for engines array would be
>> better I think.
>>
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     igt_fork(child, ncpus) {
>>> +             unsigned long count = 0;
>>> +             uint32_t ctx;
>>> +
>>> +             hars_petruska_f54_1_random_perturb(child);
>>> +
>>> +             ctx = gem_queue_create(i915);
>>> +             igt_until_timeout(timeout) {
>>> +                     int prio;
>>> +
>>> +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
>>> +                     gem_context_set_priority(i915, ctx, prio);
>>> +
>>> +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
>>> +                     store_dword(i915, ctx, engine, scratch,
>>> +                                 8*child + 0, ~child,
>>> +                                 0, 0);
>>> +                     for (unsigned int step = 0; step < 8; step++)
>>> +                             store_dword(i915, ctx, engine, scratch,
>>> +                                         8*child + 4, count++,
>>> +                                         0, 0);
>>> +             }
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +     igt_waitchildren();
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     for (unsigned n = 0; n < ncpus; n++) {
>>> +             igt_assert_eq_u32(ptr[2*n], ~n);
>>> +             /*
>>> +              * Note this count is approximate due to unconstrained
>>> +              * ordering of the dword writes between engines.
>>> +              *
>>> +              * Take the result with a pinch of salt.
>>> +              */
>>> +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
>>> +     }
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     const struct intel_execution_engine *e;
>>> +     int i915 = -1;
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +     }
>>> +
>>> +     igt_subtest_group {
>>> +             igt_fixture {
>>> +                     igt_require(gem_contexts_has_shared_gtt(i915));
>>> +                     igt_fork_hang_detector(i915);
>>> +             }
>>> +
>>> +             igt_subtest("create-shared-gtt")
>>> +                     create_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("detached-shared-gtt")
>>> +                     create_shared_gtt(i915, DETACHED);
>>> +
>>> +             igt_subtest("disjoint-timelines")
>>> +                     disjoint_timelines(i915);
>>> +
>>> +             igt_subtest("single-timeline")
>>> +                     single_timeline(i915);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt")
>>> +                     exhaust_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt-lrc")
>>> +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
>>> +
>>> +             for (e = intel_execution_engines; e->name; e++) {
>>> +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
>>> +                             exec_shared_gtt(i915, e->exec_id | e->flags);
>>
>> The same previously raised question on should it iterate the legacy
>> execbuf engines or physical engines. Maybe you won't different subtests
>> do both?
> 
> It should be testing the cross between the context and execbuf uABI, not
> physical.

Same question as earlier, why is this cross interesting? I mean what is 
interesting in intersection between legacy execbuf engine selection and 
ppgtt sharing? And at the same time not relevant to exercise the new 
execbuf engine selection abi?

Regards,

Tvrtko