[igt-dev] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT

Wed May 15 06:37:18 UTC 2019

On 08/05/2019 11:09, Chris Wilson wrote:
> v2: Test each shared context is its own timeline and allows request
> reordering between shared contexts.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> ---
>   lib/i915/gem_context.c        |  68 +++
>   lib/i915/gem_context.h        |  13 +
>   tests/Makefile.sources        |   1 +
>   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>   tests/i915/gem_exec_whisper.c |  32 +-
>   tests/meson.build             |   1 +
>   6 files changed, 962 insertions(+), 9 deletions(-)
>   create mode 100644 tests/i915/gem_ctx_shared.c
> 
> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> index f94d89cb4..8fb8984d1 100644
> --- a/lib/i915/gem_context.c
> +++ b/lib/i915/gem_context.c
> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>   	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>   }
>   
> +int
> +__gem_context_clone(int i915,
> +		    uint32_t src, unsigned int share,
> +		    unsigned int flags,
> +		    uint32_t *out)
> +{
> +	struct drm_i915_gem_context_create_ext_clone clone = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = src,
> +		.flags = share,
> +	};
> +	struct drm_i915_gem_context_create_ext arg = {
> +		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&clone),
> +	};
> +	int err = 0;
> +
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> +		err = -errno;
> +
> +	*out = arg.ctx_id;
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, share, flags, &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	errno = 0;
> +	return ctx;
> +}
> +
> +bool gem_contexts_has_shared_gtt(int i915)
> +{
> +	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> +}
> +
> +bool gem_has_queues(int i915)
> +{
> +	return __gem_context_has(i915,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> +
> +	return ctx;
> +}
> +
> +uint32_t gem_queue_create(int i915)
> +{
> +	return gem_context_clone(i915, 0,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
>   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>   {
>   	struct drm_i915_gem_exec_object2 exec = {};
> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> index a052714d4..8043c3401 100644
> --- a/lib/i915/gem_context.h
> +++ b/lib/i915/gem_context.h
> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>   void gem_context_destroy(int fd, uint32_t ctx_id);
>   int __gem_context_destroy(int fd, uint32_t ctx_id);
>   
> +int __gem_context_clone(int i915,
> +			uint32_t src, unsigned int share,
> +			unsigned int flags,
> +			uint32_t *out);
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags);
> +
> +uint32_t gem_queue_create(int i915);
> +
> +bool gem_contexts_has_shared_gtt(int i915);
> +bool gem_has_queues(int i915);
> +
>   bool gem_has_contexts(int fd);
>   void gem_require_contexts(int fd);
>   void gem_context_require_bannable(int fd);
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e1b7feeb2..3552e895b 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -22,6 +22,7 @@ TESTS_progs = \
>   	drm_mm \
>   	drm_read \
>   	i915/gem_ctx_clone \
> +	i915/gem_ctx_shared \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> new file mode 100644
> index 000000000..0076f5e9d
> --- /dev/null
> +++ b/tests/i915/gem_ctx_shared.c
> @@ -0,0 +1,856 @@
> +/*
> + * Copyright © 2017 Intel Corporation

2019

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "igt_rand.h"
> +#include "igt_vgem.h"
> +#include "sync_file.h"
> +
> +#define LO 0
> +#define HI 1
> +#define NOISE 2
> +
> +#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
> +#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
> +
> +static int priorities[] = {
> +	[LO] = MIN_PRIO / 2,
> +	[HI] = MAX_PRIO / 2,
> +};
> +
> +#define MAX_ELSP_QLEN 16
> +
> +IGT_TEST_DESCRIPTION("Test shared contexts.");
> +
> +static void create_shared_gtt(int i915, unsigned int flags)
> +#define DETACHED 0x1
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	uint32_t parent, child;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	child = flags & DETACHED ? gem_context_create(i915) : 0;
> +	igt_until_timeout(2) {
> +		parent = flags & DETACHED ? child : 0;
> +		child = gem_context_clone(i915,
> +					  parent, I915_CONTEXT_CLONE_VM,
> +					  0);
> +		execbuf.rsvd1 = child;
> +		gem_execbuf(i915, &execbuf);
> +
> +		if (flags & DETACHED) {
> +			gem_context_destroy(i915, parent);
> +			gem_execbuf(i915, &execbuf);
> +		} else {
> +			parent = child;
> +			gem_context_destroy(i915, parent);
> +		}
> +
> +		execbuf.rsvd1 = parent;
> +		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
> +		igt_assert_eq(__gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &parent), -ENOENT);
> +	}
> +	if (flags & DETACHED)
> +		gem_context_destroy(i915, child);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void disjoint_timelines(int i915)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	igt_spin_t *spin[2];
> +	uint32_t plug, child;
> +
> +	igt_require(gem_has_execlists(i915));
> +
> +	/*
> +	 * Each context, although they share a vm, are expected to be
> +	 * distinct timelines. A request queued to one context should be
> +	 * independent of any shared contexts.
> +	 */
> +	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
> +	spin[1] = __igt_spin_new(i915, .ctx = child);
> +
> +	/* Wait for the second spinner, will hang if stuck behind the first */
> +	igt_spin_end(spin[1]);
> +	gem_sync(i915, spin[1]->handle);
> +
> +	igt_cork_unplug(&cork);
> +
> +	igt_spin_free(i915, spin[1]);
> +	igt_spin_free(i915, spin[0]);
> +}
> +
> +static void exhaust_shared_gtt(int i915, unsigned int flags)
> +#define EXHAUST_LRC 0x1
> +{
> +	i915 = gem_reopen_driver(i915);
> +
> +	igt_fork(pid, 1) {
> +		const uint32_t bbe = MI_BATCH_BUFFER_END;
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = gem_create(i915, 4096)
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +		};
> +		uint32_t parent, child;
> +		unsigned long count = 0;
> +		int err;
> +
> +		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		child = 0;
> +		for (;;) {
> +			parent = child;
> +			err = __gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &child);
> +			if (err)
> +				break;
> +
> +			if (flags & EXHAUST_LRC) {
> +				execbuf.rsvd1 = child;
> +				err = __gem_execbuf(i915, &execbuf);
> +				if (err)
> +					break;
> +			}

What are the stop conditions in this test, with and without the 
EXHAUST_LRC flag? It would be good to put that in a comment.

Especially since AFAIR this one was causing OOM for me so might need to 
be tweaked.

> +
> +			count++;
> +		}
> +		gem_sync(i915, obj.handle);
> +
> +		igt_info("Created %lu shared contexts, before %d (%s)\n",
> +			 count, err, strerror(-err));
> +	}
> +	close(i915);
> +	igt_waitchildren();
> +}
> +
> +static void exec_shared_gtt(int i915, unsigned int ring)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096)
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +	};
> +	uint32_t scratch = obj.handle;
> +	uint32_t batch[16];
> +	int i;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(gem_can_store_dword(i915, ring));
> +
> +	/* Load object into place in the GTT */
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +
> +	/* Presume nothing causes an eviction in the meantime */
> +
> +	obj.handle = gem_create(i915, 4096);
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = obj.offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = obj.offset;
> +	} else {
> +		batch[i]--;
> +		batch[++i] = obj.offset;
> +	}
> +	batch[++i] = 0xc0ffee;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +
> +	obj.offset += 4096; /* make sure we don't cause an eviction! */

Is 4k apart safe?

A short comment on how does this test work would be good.

> +	obj.flags |= EXEC_OBJECT_PINNED;
> +	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	if (gen > 3 && gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	gem_execbuf(i915, &execbuf);
> +	gem_context_destroy(i915, execbuf.rsvd1);
> +	gem_sync(i915, obj.handle); /* write hazard lies */
> +	gem_close(i915, obj.handle);
> +
> +	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> +	gem_close(i915, scratch);
> +
> +	igt_assert_eq_u32(*batch, 0xc0ffee);
> +}
> +
> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	int err;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	err = gem_wait(i915, obj.handle, &timeout);
> +	gem_close(i915, obj.handle);
> +
> +	return err;
> +}
> +
> +static bool has_single_timeline(int i915)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, 0,
> +			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> +			    &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	return ctx != 0;
> +}
> +
> +static bool ignore_engine(unsigned engine)
> +{
> +	if (engine == 0)
> +		return true;
> +
> +	if (engine == I915_EXEC_BSD)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void single_timeline(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	struct sync_fence_info rings[16];

Could use for_each_physical_engine to count the engines. But we probably 
have plenty of this around the code base.

> +	struct sync_file_info sync_file_info = {
> +		.num_fences = 1,
> +	};
> +	unsigned int engine;
> +	int n;
> +
> +	igt_require(has_single_timeline(i915));
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	/*
> +	 * For a "single timeline" context, each ring is on the common
> +	 * timeline, unlike a normal context where each ring has an
> +	 * independent timeline. That is no matter which engine we submit
> +	 * to, it reports the same timeline name and fence context. However,
> +	 * the fence context is not reported through the sync_fence_info.

Is the test useful then? There was one I reviewed earlier in this series 
which tested for execution ordering, which sounds like is what's needed.

> +	 */
> +	execbuf.rsvd1 =
> +		gem_context_clone(i915, 0, 0,
> +				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	execbuf.flags = I915_EXEC_FENCE_OUT;
> +	n = 0;
> +	for_each_engine(i915, engine) {

for_each_physical_engine to align with Andi's work?

> +		gem_execbuf_wr(i915, &execbuf);
> +		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> +		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> +		close(execbuf.rsvd2 >> 32);
> +
> +		igt_info("ring[%d] fence: %s %s\n",
> +			 n, rings[n].driver_name, rings[n].obj_name);
> +		n++;
> +	}
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +
> +	for (int i = 1; i < n; i++) {
> +		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> +		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));

What is in obj_name?

> +	}
> +}
> +
> +static void exec_single_timeline(int i915, unsigned int ring)
> +{
> +	unsigned int other;
> +	igt_spin_t *spin;
> +	uint32_t ctx;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(has_single_timeline(i915));
> +
> +	/*
> +	 * On an ordinary context, a blockage on one ring doesn't prevent
> +	 * execution on an other.
> +	 */
> +	ctx = 0;
> +	spin = NULL;
> +	for_each_engine(i915, other) {

for_each_physical

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_require(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> +	igt_spin_free(i915, spin);
> +
> +	/*
> +	 * But if we create a context with just a single shared timeline,
> +	 * then it will block waiting for the earlier requests on the
> +	 * other engines.
> +	 */
> +	ctx = gem_context_clone(i915, 0, 0,
> +				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	spin = NULL;
> +	for_each_engine(i915, other) {

Ditto.

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_assert(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> +	igt_spin_free(i915, spin);
> +}
> +
> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> +			uint32_t target, uint32_t offset, uint32_t value,
> +			uint32_t cork, unsigned write_domain)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	uint32_t batch[16];
> +	int i;
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> +	execbuf.buffer_count = 2 + !!cork;
> +	execbuf.flags = ring;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +	execbuf.rsvd1 = ctx;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = cork;
> +	obj[1].handle = target;
> +	obj[2].handle = gem_create(i915, 4096);
> +
> +	memset(&reloc, 0, sizeof(reloc));
> +	reloc.target_handle = obj[1].handle;
> +	reloc.presumed_offset = 0;
> +	reloc.offset = sizeof(uint32_t);
> +	reloc.delta = offset;
> +	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +	reloc.write_domain = write_domain;
> +	obj[2].relocs_ptr = to_user_pointer(&reloc);
> +	obj[2].relocation_count = 1;
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = offset;
> +		reloc.offset += sizeof(uint32_t);
> +	} else {
> +		batch[i]--;
> +		batch[++i] = offset;
> +	}
> +	batch[++i] = value;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> +	gem_execbuf(i915, &execbuf);
> +	gem_close(i915, obj[2].handle);
> +}
> +
> +static uint32_t create_highest_priority(int i915)
> +{
> +	uint32_t ctx = gem_context_create(i915);
> +
> +	/*
> +	 * If there is no priority support, all contexts will have equal
> +	 * priority (and therefore the max user priority), so no context
> +	 * can overtake us, and we effectively can form a plug.
> +	 */
> +	__gem_context_set_priority(i915, ctx, MAX_PRIO);
> +
> +	return ctx;
> +}
> +
> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> +{
> +	igt_spin_t *spin[MAX_ELSP_QLEN];

Why is this 16?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = engine,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	igt_cork_unplug(c); /* batches will now be queued on the engine */
> +	igt_debugfs_dump(i915, "i915_engine_info");
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +}
> +
> +static uint32_t store_timestamp(int i915,
> +				uint32_t ctx, unsigned ring,
> +				unsigned mmio_base)
> +{
> +	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +		.relocation_count = 1,
> +	};
> +	struct drm_i915_gem_relocation_entry reloc = {
> +		.target_handle = obj.handle,
> +		.offset = 2 * sizeof(uint32_t),
> +		.delta = 4092,
> +		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	uint32_t batch[] = {
> +		0x24 << 23 | (1 + r64b), /* SRM */
> +		mmio_base + 0x358,
> +		4092,
> +		0,
> +		MI_BATCH_BUFFER_END
> +	};
> +
> +	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> +
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +	obj.relocs_ptr = to_user_pointer(&reloc);
> +
> +	gem_execbuf(i915, &execbuf);
> +
> +	return obj.handle;
> +}
> +
> +static void independent(int i915, unsigned ring, unsigned flags)
> +{
> +	uint32_t handle[ARRAY_SIZE(priorities)];
> +	igt_spin_t *spin[MAX_ELSP_QLEN];
> +	unsigned int mmio_base;
> +
> +	/* XXX i915_query()! */
> +	switch (ring) {
> +	case I915_EXEC_DEFAULT:
> +	case I915_EXEC_RENDER:
> +		mmio_base = 0x2000;
> +		break;
> +#if 0
> +	case I915_EXEC_BSD:
> +		mmio_base = 0x12000;
> +		break;
> +#endif
> +	case I915_EXEC_BLT:
> +		mmio_base = 0x22000;
> +		break;
> +
> +	case I915_EXEC_VEBOX:
> +		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> +			mmio_base = 0x1d8000;
> +		else
> +			mmio_base = 0x1a000;
> +		break;
> +
> +	default:
> +		igt_skip("mmio base not known\n");
> +	}

Ufff this is quite questionable. Should we rather have this subtest in 
selftests only?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = ring,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t ctx = gem_queue_create(i915);
> +		gem_context_set_priority(i915, ctx, priorities[i]);
> +		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t *ptr;
> +
> +		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> +		gem_set_domain(i915, handle[i], /* no write hazard lies! */
> +			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +		gem_close(i915, handle[i]);
> +
> +		handle[i] = ptr[1023];

1023 relates to 4092 from store_timestamp I gather. The two need to be 
defined closer together.

> +		munmap(ptr, 4096);
> +
> +		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> +			  i, priorities[i], handle[i]);
> +	}
> +
> +	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> +}
> +
> +static void reorder(int i915, unsigned ring, unsigned flags)
> +#define EQUAL 1
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +	uint32_t ctx[2];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> +
> +	scratch = gem_create(i915, 4096);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* We expect the high priority context to be executed first, and
> +	 * so the final result will be value from the low priority context.
> +	 */
> +	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> +	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	if (flags & EQUAL) /* equal priority, result will be fifo */
> +		igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	else
> +		igt_assert_eq_u32(ptr[0], ctx[LO]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void promotion(int i915, unsigned ring)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t result, dep;
> +	uint32_t *ptr;
> +	uint32_t ctx[3];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], 0);
> +
> +	ctx[NOISE] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> +
> +	result = gem_create(i915, 4096);
> +	dep = gem_create(i915, 4096);
> +
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> +	 *
> +	 * fifo would be NOISE, LO, HI.
> +	 * strict priority would be  HI, NOISE, LO
> +	 */
> +	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> +	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> +
> +	/* link LO <-> HI via a dependency on another buffer */
> +	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> +	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> +
> +	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[NOISE]);
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> +	gem_set_domain(i915, dep, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, dep);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	munmap(ptr, 4096);
> +
> +	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> +	gem_set_domain(i915, result, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, result);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void smoketest(int i915, unsigned ring, unsigned timeout)
> +{
> +	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	unsigned engines[16];
> +	unsigned nengine;
> +	unsigned engine;
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +
> +	nengine = 0;
> +	for_each_engine(i915, engine) {
> +		if (ignore_engine(engine))
> +			continue;
> +
> +		engines[nengine++] = engine;
> +	}
> +	igt_require(nengine);

for_each_physical and counring the engines for engines array would be 
better I think.

> +
> +	scratch = gem_create(i915, 4096);
> +	igt_fork(child, ncpus) {
> +		unsigned long count = 0;
> +		uint32_t ctx;
> +
> +		hars_petruska_f54_1_random_perturb(child);
> +
> +		ctx = gem_queue_create(i915);
> +		igt_until_timeout(timeout) {
> +			int prio;
> +
> +			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> +			gem_context_set_priority(i915, ctx, prio);
> +
> +			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> +			store_dword(i915, ctx, engine, scratch,
> +				    8*child + 0, ~child,
> +				    0, 0);
> +			for (unsigned int step = 0; step < 8; step++)
> +				store_dword(i915, ctx, engine, scratch,
> +					    8*child + 4, count++,
> +					    0, 0);
> +		}
> +		gem_context_destroy(i915, ctx);
> +	}
> +	igt_waitchildren();
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	for (unsigned n = 0; n < ncpus; n++) {
> +		igt_assert_eq_u32(ptr[2*n], ~n);
> +		/*
> +		 * Note this count is approximate due to unconstrained
> +		 * ordering of the dword writes between engines.
> +		 *
> +		 * Take the result with a pinch of salt.
> +		 */
> +		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> +	}
> +	munmap(ptr, 4096);
> +}
> +
> +igt_main
> +{
> +	const struct intel_execution_engine *e;
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +	}
> +
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_contexts_has_shared_gtt(i915));
> +			igt_fork_hang_detector(i915);
> +		}
> +
> +		igt_subtest("create-shared-gtt")
> +			create_shared_gtt(i915, 0);
> +
> +		igt_subtest("detached-shared-gtt")
> +			create_shared_gtt(i915, DETACHED);
> +
> +		igt_subtest("disjoint-timelines")
> +			disjoint_timelines(i915);
> +
> +		igt_subtest("single-timeline")
> +			single_timeline(i915);
> +
> +		igt_subtest("exhaust-shared-gtt")
> +			exhaust_shared_gtt(i915, 0);
> +
> +		igt_subtest("exhaust-shared-gtt-lrc")
> +			exhaust_shared_gtt(i915, EXHAUST_LRC);
> +
> +		for (e = intel_execution_engines; e->name; e++) {
> +			igt_subtest_f("exec-shared-gtt-%s", e->name)
> +				exec_shared_gtt(i915, e->exec_id | e->flags);

The same previously raised question on should it iterate the legacy 
execbuf engines or physical engines. Maybe you won't different subtests 
do both?

> +
> +			if (!ignore_engine(e->exec_id | e->flags)) {
> +				igt_subtest_f("exec-single-timeline-%s",
> +					      e->name)
> +					exec_single_timeline(i915,
> +							     e->exec_id | e->flags);
> +			}
> +
> +			/*
> +			 * Check that the shared contexts operate independently,
> +			 * that is requests on one ("queue") can be scheduled
> +			 * around another queue. We only check the basics here,
> +			 * enough to reduce the queue into just another context,
> +			 * and so rely on gem_exec_schedule to prove the rest.
> +			 */
> +			igt_subtest_group {
> +				igt_fixture {
> +					gem_require_ring(i915, e->exec_id | e->flags);
> +					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
> +					igt_require(gem_scheduler_enabled(i915));
> +					igt_require(gem_scheduler_has_ctx_priority(i915));
> +				}
> +
> +				igt_subtest_f("Q-independent-%s", e->name)
> +					independent(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-in-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, EQUAL);
> +
> +				igt_subtest_f("Q-out-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-promotion-%s", e->name)
> +					promotion(i915, e->exec_id | e->flags);
> +
> +				igt_subtest_f("Q-smoketest-%s", e->name)
> +					smoketest(i915, e->exec_id | e->flags, 5);
> +			}
> +		}
> +
> +		igt_subtest("Q-smoketest-all") {
> +			igt_require(gem_scheduler_enabled(i915));
> +			igt_require(gem_scheduler_has_ctx_priority(i915));
> +			smoketest(i915, -1, 30);
> +		}
> +
> +		igt_fixture {
> +			igt_stop_hang_detector();
> +		}
> +	}
> +}
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index 6c3b53756..d3e0b0ba2 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define HANG 0x20
>   #define SYNC 0x40
>   #define PRIORITY 0x80
> +#define QUEUES 0x100
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
>   {
>   	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
>   	gem_context_set_priority(fd, ctx, prio);
> -};
> +}
>   
>   static void whisper(int fd, unsigned engine, unsigned flags)
>   {
> @@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & CONTEXTS)
>   		gem_require_contexts(fd);
>   
> +	if (flags & QUEUES)
> +		igt_require(gem_has_queues(fd));
> +
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> @@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				contexts[n] = gem_context_create(fd);
>   		}
> +		if (flags & QUEUES) {
> +			for (n = 0; n < 64; n++)
> +				contexts[n] = gem_queue_create(fd);
> +		}
>   		if (flags & FDS) {
>   			for (n = 0; n < 64; n++)
>   				fds[n] = drm_open_driver(DRIVER_INTEL);
> @@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   						execbuf.flags &= ~ENGINE_MASK;
>   						execbuf.flags |= engines[rand() % nengine];
>   					}
> -					if (flags & CONTEXTS) {
> +					if (flags & (CONTEXTS | QUEUES)) {
>   						execbuf.rsvd1 = contexts[rand() % 64];
>   						if (flags & PRIORITY)
>   							ctx_set_random_priority(this_fd, execbuf.rsvd1);
> @@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				close(fds[n]);
>   		}
> -		if (flags & CONTEXTS) {
> +		if (flags & (CONTEXTS | QUEUES)) {
>   			for (n = 0; n < 64; n++)
>   				gem_context_destroy(fd, contexts[n]);
>   		}
> @@ -522,18 +530,24 @@ igt_main
>   		{ "chain-forked", CHAIN | FORKED },
>   		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
>   		{ "chain-sync", CHAIN | SYNC },
> -		{ "contexts", CONTEXTS },
> -		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> -		{ "contexts-forked", CONTEXTS | FORKED},
> -		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> -		{ "contexts-chain", CONTEXTS | CHAIN },
> -		{ "contexts-sync", CONTEXTS | SYNC },
>   		{ "fds", FDS },
>   		{ "fds-interruptible", FDS | INTERRUPTIBLE},
>   		{ "fds-forked", FDS | FORKED},
>   		{ "fds-priority", FDS | FORKED | PRIORITY },
>   		{ "fds-chain", FDS | CHAIN},
>   		{ "fds-sync", FDS | SYNC},
> +		{ "contexts", CONTEXTS },
> +		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> +		{ "contexts-forked", CONTEXTS | FORKED},
> +		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> +		{ "contexts-chain", CONTEXTS | CHAIN },
> +		{ "contexts-sync", CONTEXTS | SYNC },
> +		{ "queues", QUEUES },
> +		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
> +		{ "queues-forked", QUEUES | FORKED},
> +		{ "queues-priority", QUEUES | FORKED | PRIORITY },
> +		{ "queues-chain", QUEUES | CHAIN },
> +		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
>   	int fd;
> diff --git a/tests/meson.build b/tests/meson.build
> index 3810bd760..3883ae127 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -114,6 +114,7 @@ i915_progs = [
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
>   	'gem_ctx_param',
> +	'gem_ctx_shared',
>   	'gem_ctx_switch',
>   	'gem_ctx_thrash',
>   	'gem_double_irq_loop',
> 

Regards,

Tvrtko