[PATCH i-g-t v2 04/10] tests/intel/xe_drm_fdinfo: Add helpers for spinning batches

Fri Jul 12 00:04:31 UTC 2024

On Thu, Jul 11, 2024 at 11:29:00AM -0700, Umesh Nerlige Ramappa wrote:
> On Thu, Jul 11, 2024 at 07:46:23AM -0500, Lucas De Marchi wrote:
> > On Tue, Jul 02, 2024 at 05:25:26PM GMT, Umesh Nerlige Ramappa wrote:
> > > Add helpers for submitting batches and waiting for them to start.
> > > 
> > > v2: Remove xe prefixes from the structures and helpers (Lucas)
> > > 
> > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> > > ---
> > > tests/intel/xe_drm_fdinfo.c | 135 ++++++++++++++++++++++++++++++++++++
> > > 1 file changed, 135 insertions(+)
> > > 
> > > diff --git a/tests/intel/xe_drm_fdinfo.c b/tests/intel/xe_drm_fdinfo.c
> > > index c697f3e53..037f25e53 100644
> > > --- a/tests/intel/xe_drm_fdinfo.c
> > > +++ b/tests/intel/xe_drm_fdinfo.c
> > > @@ -54,6 +54,17 @@ static const char *engine_map[] = {
> > > 	"vecs",
> > > 	"ccs",
> > > };
> > > +
> > > +static const uint64_t batch_addr[] = {
> > > +	0x170000,
> > > +	0x180000,
> > > +	0x190000,
> > > +	0x1a0000,
> > > +	0x1b0000,
> > > +	0x1c0000,
> > > +	0x1d0000,
> > > +	0x1e0000,
> > > +};
> > 
> > where are these numbers coming from?
> 
> The hardcoded number above are arbitrary, although I don't know if that's an
> issue. I did see similar numbers in use with other xe tests.
>

See below, I think you can just use a single address when setting up the
cork (see below).

> > I'm looking at
> > tests/intel/xe_spin_batch.c and the lib/xe/xe_spin.c implementation and
> > I don't understand why we need to add so many helpers and handcrafted
> > things in this particular test.
> 
> These helpers are just a break down of something like xe_exec_balancer.c ->
> test_exec(). The helpers are still using functions from xe_spin.c.
> 
> The breakdown helps control some execution elements for batches like (1)
> using a single address space when running batches on all classes, (2)
> configuring width and num_placements for parallel/virtual cases and (3)
> provide some flexibility to test utilization when the batch has started
> running.
> 

I see what you are trying to do, create spinners for virtual and
parallel exec queues.

I think the proper place to put this would be in xe_spin.c adding 1 or
2 new functions that look like xe_cork_init() but for virtual and
parallel exec queues. Then just use existing cork functions for
everything else.

I think that will work.

I don't think any IGTs actually use the cork stuff yet (several probably
should) but I did post an IGT recently [1] that made use of this.

Matt

[1] https://patchwork.freedesktop.org/patch/600254/?series=135143&rev=1

> Regards,
> Umesh
> 
> > 
> > +Matt Brost, +Zbigniew Kempczyński to help help here
> > 
> > 
> > Lucas De Marchi
> > 
> > > static void read_engine_cycles(int xe, struct pceu_cycles *pceu)
> > > {
> > > 	struct drm_client_fdinfo info = { };
> > > @@ -329,6 +340,130 @@ static void basic_engine_utilization(int xe)
> > > 	igt_require(info.num_engines);
> > > }
> > > 
> > > +#define MAX_PARALLEL 8
> > > +struct spin_ctx {
> > > +	uint32_t vm;
> > > +	uint64_t addr[MAX_PARALLEL];
> > > +	struct drm_xe_sync sync[2];
> > > +	struct drm_xe_exec exec;
> > > +	uint32_t exec_queue;
> > > +	size_t bo_size;
> > > +	uint32_t bo;
> > > +	struct xe_spin *spin;
> > > +	struct xe_spin_opts spin_opts;
> > > +	bool ended;
> > > +	uint16_t class;
> > > +	uint16_t width;
> > > +	uint16_t num_placements;
> > > +};
> > > +
> > > +static struct spin_ctx *
> > > +spin_ctx_init(int fd, struct drm_xe_engine_class_instance *hwe, uint32_t vm,
> > > +	      uint16_t width, uint16_t num_placements)
> > > +{
> > > +	struct spin_ctx *ctx = calloc(1, sizeof(*ctx));
> > > +
> > > +	igt_assert(width && num_placements &&
> > > +		   (width == 1 || num_placements == 1));
> > > +
> > > +	igt_assert(width <= MAX_PARALLEL);
> > > +
> > > +	ctx->class = hwe->engine_class;
> > > +	ctx->width = width;
> > > +	ctx->num_placements = num_placements;
> > > +	ctx->vm = vm;
> > > +	for (int i = 0; i < ctx->width; i++)
> > > +		ctx->addr[i] = batch_addr[hwe->engine_class];
> > > +
> > > +	ctx->exec.num_batch_buffer = width;
> > > +	ctx->exec.num_syncs = 2;
> > > +	ctx->exec.syncs = to_user_pointer(ctx->sync);
> > > +
> > > +	ctx->sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
> > > +	ctx->sync[0].flags = DRM_XE_SYNC_FLAG_SIGNAL;
> > > +	ctx->sync[0].handle = syncobj_create(fd, 0);
> > > +
> > > +	ctx->sync[1].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
> > > +	ctx->sync[1].flags = DRM_XE_SYNC_FLAG_SIGNAL;
> > > +	ctx->sync[1].handle = syncobj_create(fd, 0);
> > > +
> > > +	ctx->bo_size = sizeof(struct xe_spin);
> > > +	ctx->bo_size = xe_bb_size(fd, ctx->bo_size);
> > > +	ctx->bo = xe_bo_create(fd, ctx->vm, ctx->bo_size,
> > > +			       vram_if_possible(fd, hwe->gt_id),
> > > +			       DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> > > +	ctx->spin = xe_bo_map(fd, ctx->bo, ctx->bo_size);
> > > +
> > > +	igt_assert_eq(__xe_exec_queue_create(fd, ctx->vm, width, num_placements,
> > > +					     hwe, 0, &ctx->exec_queue), 0);
> > > +
> > > +	xe_vm_bind_async(fd, ctx->vm, 0, ctx->bo, 0, ctx->addr[0], ctx->bo_size,
> > > +			 ctx->sync, 1);
> > > +
> > > +	return ctx;
> > > +}
> > > +
> > > +static void
> > > +spin_sync_start(int fd, struct spin_ctx *ctx)
> > > +{
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	ctx->spin_opts.addr = ctx->addr[0];
> > > +	ctx->spin_opts.preempt = true;
> > > +	xe_spin_init(ctx->spin, &ctx->spin_opts);
> > > +
> > > +	/* re-use sync[0] for exec */
> > > +	ctx->sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
> > > +
> > > +	ctx->exec.exec_queue_id = ctx->exec_queue;
> > > +	if (ctx->width > 1)
> > > +		ctx->exec.address = to_user_pointer(ctx->addr);
> > > +	else
> > > +		ctx->exec.address = ctx->addr[0];
> > > +	xe_exec(fd, &ctx->exec);
> > > +
> > > +	xe_spin_wait_started(ctx->spin);
> > > +	igt_assert(!syncobj_wait(fd, &ctx->sync[1].handle, 1, 1, 0, NULL));
> > > +
> > > +	igt_debug("%s: spinner started\n", engine_map[ctx->class]);
> > > +}
> > > +
> > > +static void
> > > +spin_sync_end(int fd, struct spin_ctx *ctx)
> > > +{
> > > +	if (!ctx || ctx->ended)
> > > +		return;
> > > +
> > > +	xe_spin_end(ctx->spin);
> > > +
> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[1].handle, 1, INT64_MAX, 0, NULL));
> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[0].handle, 1, INT64_MAX, 0, NULL));
> > > +
> > > +	ctx->sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
> > > +	xe_vm_unbind_async(fd, ctx->vm, 0, 0, ctx->addr[0], ctx->bo_size, ctx->sync, 1);
> > > +	igt_assert(syncobj_wait(fd, &ctx->sync[0].handle, 1, INT64_MAX, 0, NULL));
> > > +
> > > +	ctx->ended = true;
> > > +	igt_debug("%s: spinner ended\n", engine_map[ctx->class]);
> > > +}
> > > +
> > > +static void
> > > +spin_ctx_destroy(int fd, struct spin_ctx *ctx)
> > > +{
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	syncobj_destroy(fd, ctx->sync[0].handle);
> > > +	syncobj_destroy(fd, ctx->sync[1].handle);
> > > +	xe_exec_queue_destroy(fd, ctx->exec_queue);
> > > +
> > > +	munmap(ctx->spin, ctx->bo_size);
> > > +	gem_close(fd, ctx->bo);
> > > +
> > > +	free(ctx);
> > > +}
> > > +
> > > igt_main
> > > {
> > > 	int xe;
> > > -- 
> > > 2.38.1
> > >