[PATCH i-g-t v2 04/10] tests/intel/xe_drm_fdinfo: Add helpers for spinning batches

Thu Aug 15 16:46:56 UTC 2024

On Fri, Jul 12, 2024 at 12:04:31AM GMT, Matthew Brost wrote:
>On Thu, Jul 11, 2024 at 11:29:00AM -0700, Umesh Nerlige Ramappa wrote:
>> On Thu, Jul 11, 2024 at 07:46:23AM -0500, Lucas De Marchi wrote:
>> > On Tue, Jul 02, 2024 at 05:25:26PM GMT, Umesh Nerlige Ramappa wrote:
>> > > Add helpers for submitting batches and waiting for them to start.
>> > >
>> > > v2: Remove xe prefixes from the structures and helpers (Lucas)
>> > >
>> > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>> > > ---
>> > > tests/intel/xe_drm_fdinfo.c | 135 ++++++++++++++++++++++++++++++++++++
>> > > 1 file changed, 135 insertions(+)
>> > >
>> > > diff --git a/tests/intel/xe_drm_fdinfo.c b/tests/intel/xe_drm_fdinfo.c
>> > > index c697f3e53..037f25e53 100644
>> > > --- a/tests/intel/xe_drm_fdinfo.c
>> > > +++ b/tests/intel/xe_drm_fdinfo.c
>> > > @@ -54,6 +54,17 @@ static const char *engine_map[] = {
>> > > 	"vecs",
>> > > 	"ccs",
>> > > };
>> > > +
>> > > +static const uint64_t batch_addr[] = {
>> > > +	0x170000,
>> > > +	0x180000,
>> > > +	0x190000,
>> > > +	0x1a0000,
>> > > +	0x1b0000,
>> > > +	0x1c0000,
>> > > +	0x1d0000,
>> > > +	0x1e0000,
>> > > +};
>> >
>> > where are these numbers coming from?
>>
>> The hardcoded number above are arbitrary, although I don't know if that's an
>> issue. I did see similar numbers in use with other xe tests.
>>
>
>See below, I think you can just use a single address when setting up the
>cork (see below).

because each cork uses a different vm rather than the shared vm used
here, right?

>
>> > I'm looking at
>> > tests/intel/xe_spin_batch.c and the lib/xe/xe_spin.c implementation and
>> > I don't understand why we need to add so many helpers and handcrafted
>> > things in this particular test.
>>
>> These helpers are just a break down of something like xe_exec_balancer.c ->
>> test_exec(). The helpers are still using functions from xe_spin.c.
>>
>> The breakdown helps control some execution elements for batches like (1)
>> using a single address space when running batches on all classes, (2)
>> configuring width and num_placements for parallel/virtual cases and (3)
>> provide some flexibility to test utilization when the batch has started
>> running.
>>
>
>I see what you are trying to do, create spinners for virtual and
>parallel exec queues.
>
>I think the proper place to put this would be in xe_spin.c adding 1 or
>2 new functions that look like xe_cork_init() but for virtual and
>parallel exec queues. Then just use existing cork functions for
>everything else.
>
>I think that will work.

It seems like one problem with cork is that it exec on init.  Maybe
adding a xe_cork_start_sync() so we can separate the setup part from the
start part would be ok?

We could do _init() + _wait_started(), but with the current state of the
test we are trying to minimize the jitter to calculate the time in which
it ran. Although I was thinking... if we stop checking the cpu time and
only rely on the gpu time we collect from fdinfo the igt asserts should
be more reliable. And another thing we could use is the time saved by
the gpu in the spin itself.

I sent a few patches to better abstract the parallel/virtual setup,
similarly to what you suggested in another thread:
https://patchwork.freedesktop.org/series/137317/

I think that should mostly take care about the parallel/virtual tests
added here.  For the single() ones, I squashed locally the patches 3-5
(because otherwise they create warnings) and I'm thinking on merging
them mostly as is. Then I add the missing cork stuff and migrate the
test to use that instead of the local functions. Thoughts?

Long term I think we could do:

	s/xe_spin/xe_spin_bath/
	s/xe_cork/xe_spinner/

Thoughts?

Thanks
Lucas De Marchi

>
>I don't think any IGTs actually use the cork stuff yet (several probably
>should) but I did post an IGT recently [1] that made use of this.
>
>Matt
>
>[1] https://patchwork.freedesktop.org/patch/600254/?series=135143&rev=1
>
>> Regards,
>> Umesh
>>
>> >
>> > +Matt Brost, +Zbigniew Kempczyński to help help here
>> >
>> >
>> > Lucas De Marchi
>> >
>> > > static void read_engine_cycles(int xe, struct pceu_cycles *pceu)
>> > > {
>> > > 	struct drm_client_fdinfo info = { };
>> > > @@ -329,6 +340,130 @@ static void basic_engine_utilization(int xe)
>> > > 	igt_require(info.num_engines);
>> > > }
>> > >
>> > > +#define MAX_PARALLEL 8
>> > > +struct spin_ctx {
>> > > +	uint32_t vm;
>> > > +	uint64_t addr[MAX_PARALLEL];
>> > > +	struct drm_xe_sync sync[2];
>> > > +	struct drm_xe_exec exec;
>> > > +	uint32_t exec_queue;
>> > > +	size_t bo_size;
>> > > +	uint32_t bo;
>> > > +	struct xe_spin *spin;
>> > > +	struct xe_spin_opts spin_opts;
>> > > +	bool ended;
>> > > +	uint16_t class;
>> > > +	uint16_t width;
>> > > +	uint16_t num_placements;
>> > > +};
>> > > +
>> > > +static struct spin_ctx *
>> > > +spin_ctx_init(int fd, struct drm_xe_engine_class_instance *hwe, uint32_t vm,
>> > > +	      uint16_t width, uint16_t num_placements)
>> > > +{
>> > > +	struct spin_ctx *ctx = calloc(1, sizeof(*ctx));
>> > > +
>> > > +	igt_assert(width && num_placements &&
>> > > +		   (width == 1 || num_placements == 1));
>> > > +
>> > > +	igt_assert(width <= MAX_PARALLEL);
>> > > +
>> > > +	ctx->class = hwe->engine_class;
>> > > +	ctx->width = width;
>> > > +	ctx->num_placements = num_placements;
>> > > +	ctx->vm = vm;
>> > > +	for (int i = 0; i < ctx->width; i++)
>> > > +		ctx->addr[i] = batch_addr[hwe->engine_class];
>> > > +
>> > > +	ctx->exec.num_batch_buffer = width;
>> > > +	ctx->exec.num_syncs = 2;
>> > > +	ctx->exec.syncs = to_user_pointer(ctx->sync);
>> > > +
>> > > +	ctx->sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
>> > > +	ctx->sync[0].flags = DRM_XE_SYNC_FLAG_SIGNAL;
>> > > +	ctx->sync[0].handle = syncobj_create(fd, 0);
>> > > +
>> > > +	ctx->sync[1].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
>> > > +	ctx->sync[1].flags = DRM_XE_SYNC_FLAG_SIGNAL;
>> > > +	ctx->sync[1].handle = syncobj_create(fd, 0);
>> > > +
>> > > +	ctx->bo_size = sizeof(struct xe_spin);
>> > > +	ctx->bo_size = xe_bb_size(fd, ctx->bo_size);
>> > > +	ctx->bo = xe_bo_create(fd, ctx->vm, ctx->bo_size,
>> > > +			       vram_if_possible(fd, hwe->gt_id),
>> > > +			       DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
>> > > +	ctx->spin = xe_bo_map(fd, ctx->bo, ctx->bo_size);
>> > > +
>> > > +	igt_assert_eq(__xe_exec_queue_create(fd, ctx->vm, width, num_placements,
>> > > +					     hwe, 0, &ctx->exec_queue), 0);
>> > > +
>> > > +	xe_vm_bind_async(fd, ctx->vm, 0, ctx->bo, 0, ctx->addr[0], ctx->bo_size,
>> > > +			 ctx->sync, 1);
>> > > +
>> > > +	return ctx;
>> > > +}
>> > > +
>> > > +static void
>> > > +spin_sync_start(int fd, struct spin_ctx *ctx)
>> > > +{
>> > > +	if (!ctx)
>> > > +		return;
>> > > +
>> > > +	ctx->spin_opts.addr = ctx->addr[0];
>> > > +	ctx->spin_opts.preempt = true;
>> > > +	xe_spin_init(ctx->spin, &ctx->spin_opts);
>> > > +
>> > > +	/* re-use sync[0] for exec */
>> > > +	ctx->sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
>> > > +
>> > > +	ctx->exec.exec_queue_id = ctx->exec_queue;
>> > > +	if (ctx->width > 1)
>> > > +		ctx->exec.address = to_user_pointer(ctx->addr);
>> > > +	else
>> > > +		ctx->exec.address = ctx->addr[0];
>> > > +	xe_exec(fd, &ctx->exec);
>> > > +
>> > > +	xe_spin_wait_started(ctx->spin);
>> > > +	igt_assert(!syncobj_wait(fd, &ctx->sync[1].handle, 1, 1, 0, NULL));
>> > > +
>> > > +	igt_debug("%s: spinner started\n", engine_map[ctx->class]);
>> > > +}
>> > > +
>> > > +static void
>> > > +spin_sync_end(int fd, struct spin_ctx *ctx)
>> > > +{
>> > > +	if (!ctx || ctx->ended)
>> > > +		return;
>> > > +
>> > > +	xe_spin_end(ctx->spin);
>> > > +
>> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[1].handle, 1, INT64_MAX, 0, NULL));
>> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[0].handle, 1, INT64_MAX, 0, NULL));
>> > > +
>> > > +	ctx->sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
>> > > +	xe_vm_unbind_async(fd, ctx->vm, 0, 0, ctx->addr[0], ctx->bo_size, ctx->sync, 1);
>> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[0].handle, 1, INT64_MAX, 0, NULL));
>> > > +
>> > > +	ctx->ended = true;
>> > > +	igt_debug("%s: spinner ended\n", engine_map[ctx->class]);
>> > > +}
>> > > +
>> > > +static void
>> > > +spin_ctx_destroy(int fd, struct spin_ctx *ctx)
>> > > +{
>> > > +	if (!ctx)
>> > > +		return;
>> > > +
>> > > +	syncobj_destroy(fd, ctx->sync[0].handle);
>> > > +	syncobj_destroy(fd, ctx->sync[1].handle);
>> > > +	xe_exec_queue_destroy(fd, ctx->exec_queue);
>> > > +
>> > > +	munmap(ctx->spin, ctx->bo_size);
>> > > +	gem_close(fd, ctx->bo);
>> > > +
>> > > +	free(ctx);
>> > > +}
>> > > +
>> > > igt_main
>> > > {
>> > > 	int xe;
>> > > --
>> > > 2.38.1
>> > >