[PATCH i-g-t 1/1] tests/intel/xe_exec_mix_modes: Add new tests for parallel execution

Wed Jul 17 16:24:58 UTC 2024

On Wed, Jul 17, 2024 at 01:47:39PM +0000, Matthew Brost wrote:
> On Wed, Jul 17, 2024 at 02:30:48PM +0200, Francois Dugast wrote:
> > Test parallel execution of LR and dma fence jobs on the same device.
> > 
> > Add the following tests:
> > * "exec-simple-batch-store-lr"
> > * "exec-simple-batch-store-dma-fence"
> > * "exec-spinner-interrupted-lr"
> > * "exec-spinner-interrupted-dma-fence"
> > 
> 
> Really good test, a couple of nits / questions.

Thanks for the review.

> 
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > ---
> >  tests/intel/xe_exec_mix_modes.c | 277 ++++++++++++++++++++++++++++++++
> >  tests/meson.build               |   1 +
> >  2 files changed, 278 insertions(+)
> >  create mode 100644 tests/intel/xe_exec_mix_modes.c
> > 
> > diff --git a/tests/intel/xe_exec_mix_modes.c b/tests/intel/xe_exec_mix_modes.c
> > new file mode 100644
> > index 000000000..44265b220
> > --- /dev/null
> > +++ b/tests/intel/xe_exec_mix_modes.c
> > @@ -0,0 +1,277 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +/**
> > + * TEST: Test the parallel submission of jobs in LR and dma fence modecs
> > + * Category: Core
> > + * Mega feature: General Core features
> > + * Sub-category: CMD submission
> > + * Functionality: fault mode
> > + * GPU requirements: GPU needs support for DRM_XE_VM_CREATE_FLAG_FAULT_MODE
> > + */
> > +
> > +#include <fcntl.h>
> > +
> > +#include "igt.h"
> > +#include "lib/igt_syncobj.h"
> > +#include "lib/intel_reg.h"
> > +#include "xe_drm.h"
> > +
> > +#include "xe/xe_ioctl.h"
> > +#include "xe/xe_query.h"
> > +#include "xe/xe_spin.h"
> > +#include <string.h>
> > +
> > +#define FLAG_EXEC_MODE_LR	(0x1 << 0)
> > +#define FLAG_JOB_TYPE_SIMPLE	(0x1 << 1)
> > +
> > +#define NUM_INTERRUPTING_JOBS	5
> > +#define USER_FENCE_VALUE	0xdeadbeefdeadbeefull
> > +#define ONE_SEC			MS_TO_NS(1000)
> > +#define VM_DATA			0
> > +#define SPIN_DATA		1
> > +#define EXEC_DATA		2
> > +#define DATA_COUNT		3
> > +
> > +struct data {
> > +	struct xe_spin spin;
> > +	uint32_t batch[16];
> > +	uint64_t vm_sync;
> > +	uint32_t data;
> > +	uint64_t exec_sync;
> > +	uint64_t addr;
> > +};
> > +
> > +static void store_dword_batch(struct data *data, uint64_t addr, int value)
> > +{
> > +	int b;
> > +	uint64_t batch_offset = (char *)&(data->batch) - (char *)data;
> > +	uint64_t batch_addr = addr + batch_offset;
> > +	uint64_t sdi_offset = (char *)&(data->data) - (char *)data;
> > +	uint64_t sdi_addr = addr + sdi_offset;
> > +
> > +	b = 0;
> > +	data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
> > +	data->batch[b++] = sdi_addr;
> > +	data->batch[b++] = sdi_addr >> 32;
> > +	data->batch[b++] = value;
> > +	data->batch[b++] = MI_BATCH_BUFFER_END;
> > +	igt_assert(b <= ARRAY_SIZE(data->batch));
> > +
> > +	data->addr = batch_addr;
> > +}
> > +
> > +enum engine_execution_mode {
> > +	EXEC_MODE_LR,
> > +	EXEC_MODE_DMA_FENCE,
> > +};
> > +
> > +enum job_type {
> > +	SIMPLE_BATCH_STORE,
> > +	SPINNER_INTERRUPTED,
> > +};
> > +
> > +static void
> > +run_job(int fd, struct drm_xe_engine_class_instance *hwe,
> > +	enum engine_execution_mode engine_execution_mode,
> > +	enum job_type job_type)
> > +{
> > +	struct drm_xe_sync sync[1] = {
> > +		{ .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
> > +	};
> > +	struct drm_xe_exec exec = {
> > +		.num_batch_buffer = 1,
> > +		.num_syncs = 1,
> > +		.syncs = to_user_pointer(&sync),
> > +	};
> > +	struct drm_xe_ext_set_property ext = {
> > +		.base.next_extension = 0,
> > +		.base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY,
> > +		.property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY,
> > +		.value = 2, /* High priority */
> > +	};
> 
> I don't understand why setting High priority here. Can you explain?

No reason for it, it was a left over from earlier experiments and will
be removed in the next version.

> 
> > +	struct data *data;
> > +	uint32_t vm;
> > +	uint32_t exec_queue;
> > +	size_t bo_size;
> > +	int value = 0x123456;
> > +	uint64_t addr = 0x100000;
> > +	uint32_t bo = 0;
> > +	unsigned int vm_flags = 0;
> > +	struct xe_spin_opts spin_opts = { .preempt = true };
> > +	const uint64_t duration_ns = NSEC_PER_SEC / 2; /* 500ms */
> > +	struct timespec tv;
> > +
> > +	if (engine_execution_mode == EXEC_MODE_LR) {
> > +		sync[0].type = DRM_XE_SYNC_TYPE_USER_FENCE;
> > +		sync[0].timeline_value = USER_FENCE_VALUE;
> > +		vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
> > +	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > +		sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
> > +		sync[0].handle = syncobj_create(fd, 0);
> > +	}
> > +
> > +	vm = xe_vm_create(fd, vm_flags, 0);
> > +	bo_size = sizeof(*data) * DATA_COUNT;
> > +	bo_size = xe_bb_size(fd, bo_size);
> > +	bo = xe_bo_create(fd, vm, bo_size,
> > +			  vram_if_possible(fd, hwe->gt_id),
> > +			  DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> > +	data = xe_bo_map(fd, bo, bo_size);
> > +	if (engine_execution_mode == EXEC_MODE_LR)
> > +		sync[0].addr = to_user_pointer(&data[VM_DATA].vm_sync);
> > +	xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, &sync[0], 1);
> > +
> > +	store_dword_batch(data, addr, value);
> > +	if (engine_execution_mode == EXEC_MODE_LR) {
> > +		xe_wait_ufence(fd, &data[VM_DATA].vm_sync, USER_FENCE_VALUE, 0, ONE_SEC);
> > +		sync[0].addr = addr + (char *)&data[EXEC_DATA].exec_sync - (char *)data;
> > +	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > +		igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
> > +		syncobj_reset(fd, &sync[0].handle, 1);
> > +		sync[0].flags &= DRM_XE_SYNC_FLAG_SIGNAL;
> > +	}
> > +	exec_queue = xe_exec_queue_create(fd, vm, hwe, to_user_pointer(&ext));
> > +	exec.exec_queue_id = exec_queue;
> > +
> > +	if (job_type == SPINNER_INTERRUPTED) {
> > +		spin_opts.addr = addr + (char *)&data[SPIN_DATA].spin - (char *)data;
> > +		spin_opts.ctx_ticks = duration_to_ctx_ticks(fd, 0, duration_ns);
> > +		xe_spin_init(&data[SPIN_DATA].spin, &spin_opts);
> > +		if (engine_execution_mode == EXEC_MODE_LR)
> > +		    sync[0].addr = addr + (char *)&data[SPIN_DATA].exec_sync - (char *)data;
> 
> Identation looks off here.

Will fix.

> 
> > +		exec.address = spin_opts.addr;
> > +	} else if (job_type == SIMPLE_BATCH_STORE) {
> > +		exec.address = data->addr;
> > +	}
> > +	xe_exec(fd, &exec);
> > +
> > +	if (job_type == SPINNER_INTERRUPTED) {
> > +		enum engine_execution_mode interrupting_engine_execution_mode;
> > +		if (engine_execution_mode == EXEC_MODE_LR)
> > +			interrupting_engine_execution_mode = EXEC_MODE_DMA_FENCE;
> > +		else if (engine_execution_mode == EXEC_MODE_DMA_FENCE)
> > +			interrupting_engine_execution_mode = EXEC_MODE_LR;
> > +		xe_spin_wait_started(&data[SPIN_DATA].spin);
> > +		igt_gettime(&tv);
> > +		for (int i = 0; i < NUM_INTERRUPTING_JOBS; i++)
> > +		{
> > +			run_job(fd, hwe, interrupting_engine_execution_mode, SIMPLE_BATCH_STORE);
> > +			/**
> > +			 * Executing a SIMPLE_BATCH_STORE job takes significantly less time than
> > +			 * duration_ns.
> > +			 * When a spinner is running in LR mode, the interrupting job preempts it
> > +			 * in KMD and should complete fast, shortly after starting the spinner.
> > +			 * When a spinner is running in dma fence mode, the interrupting job waits
> > +			 * in KMD and should complete shortly after the spinner has ended.
> > +			 * The checks below are to verify preempting/waiting happens as expected
> > +			 * depending on the execution mode.
> > +			 */
> > +			if (engine_execution_mode == EXEC_MODE_LR)
> > +				igt_assert(igt_nsec_elapsed(&tv) < 0.5 * duration_ns);
> > +			else if (engine_execution_mode == EXEC_MODE_DMA_FENCE)
> > +				igt_assert(igt_nsec_elapsed(&tv) > duration_ns);
> > +		}
> > +	}
> 
> Should we also run the 'NUM_INTERRUPTING_JOBS' loop here? i.e.
> simple-batch-store-lr / simple-batch-store-dma-fence test opening VMs in
> both modes doing a simple store (no preemption)?

Yes this is a good improvement and only requires a minor change.

> 
> We'd have to protect again infinite recursion though but adding flag to
> protect against that should be easy. e.g.
> SIMPLE_BATCH_STORE_NO_RECURSION.
>  
> > +
> > +	if (engine_execution_mode == EXEC_MODE_LR) {
> > +		if (job_type == SPINNER_INTERRUPTED)
> > +			xe_wait_ufence(fd, &data[SPIN_DATA].exec_sync, USER_FENCE_VALUE, 0, ONE_SEC * 2);
> > +		else if (job_type == SIMPLE_BATCH_STORE)
> > +			xe_wait_ufence(fd, &data[EXEC_DATA].exec_sync, USER_FENCE_VALUE, 0, ONE_SEC * 2);
> > +	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > +		igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
> > +		syncobj_destroy(fd, sync[0].handle);
> > +	}
> > +
> > +	if (job_type == SIMPLE_BATCH_STORE)
> > +		igt_assert_eq(data->data, value);
> > +
> > +	munmap(data, bo_size);
> > +	gem_close(fd, bo);
> > +	xe_exec_queue_destroy(fd, exec_queue);
> > +	xe_vm_destroy(fd, vm);
> > +}
> > +
> > +/**
> > + * SUBTEST: exec-simple-batch-store-lr
> > + * Description: Execute a simple batch store job in long running mode
> > + *
> > + * SUBTEST: exec-simple-batch-store-dma-fence
> > + * Description: Execute a simple batch store job in dma fence mode
> > + *
> > + * SUBTEST: exec-spinner-interrupted-lr
> > + * Description: Spin in long running mode then get interrupted by a simple
> > + *              batch store job in dma fence mode
> > + *
> > + * SUBTEST: exec-spinner-interrupted-dma-fence
> > + * Description: Spin in dma fence mode then get interrupted by a simple
> > + *              batch store job in long running mode
> > + */
> > +static void
> > +test_exec(int fd, struct drm_xe_engine_class_instance *hwe,
> > +	  unsigned int flags)
> > +{
> > +	enum engine_execution_mode engine_execution_mode;
> > +	enum job_type job_type;
> > +
> > +	if (flags & FLAG_EXEC_MODE_LR)
> > +		engine_execution_mode = EXEC_MODE_LR;
> > +	else
> > +		engine_execution_mode = EXEC_MODE_DMA_FENCE;
> > +
> > +	if (flags & FLAG_JOB_TYPE_SIMPLE)
> > +		job_type = SIMPLE_BATCH_STORE;
> > +	else
> > +		job_type = SPINNER_INTERRUPTED;
> > +
> > +	run_job(fd, hwe, engine_execution_mode, job_type);
> > +}
> > +
> > +igt_main
> > +
> > +
> 
> Extra whitespace.

Will fix.

> 
> > +{
> > +	struct drm_xe_engine_class_instance *hwe;
> > +	const struct section {
> > +		const char *name;
> > +		unsigned int flags;
> > +	} sections[] = {
> > +		{ "simple-batch-store-lr", FLAG_JOB_TYPE_SIMPLE | FLAG_EXEC_MODE_LR },
> > +		{ "simple-batch-store-dma-fence", FLAG_JOB_TYPE_SIMPLE },
> > +		{ "spinner-interrupted-lr", FLAG_EXEC_MODE_LR },
> > +		{ "spinner-interrupted-dma-fence", 0 },
> > +		{ NULL },
> > +	};
> > +	int fd;
> > +
> > +	igt_fixture {
> > +		struct timespec tv = {};
> > +		bool supports_faults;
> > +		int ret = 0;
> > +		int timeout = igt_run_in_simulation() ? 20 : 2;
> > +
> > +		fd = drm_open_driver(DRIVER_XE);
> > +		do {
> > +			if (ret)
> > +				usleep(5000);
> > +			ret = xe_supports_faults(fd);
> > +		} while (ret == -EBUSY && igt_seconds_elapsed(&tv) < timeout);
> > +
> > +		supports_faults = !ret;
> > +		igt_require(supports_faults);
> 
> I was unsure why this code was added in xe_exec_fault_mode, so had to
> look:
> 
> git format-patch -1 8abb25ffe58
> 
> If you read the explaination for that it because we don't support mixing
> faulting and non-faulting VMs being open at the same time + races
> closing the VMs. With your KMD series we support having both a faulting
> VM and non-faulting VM open so this loop is not required.
> 
> e.g. I think you can just do this:
> 
> igt_fixture {
> 	fd = drm_open_driver(DRIVER_XE);;
> 	igt_require(xe_supports_faults(fd));
> }

Yes indeed, actually taking into account the logic for xe_supports_faults()
it should probably be along those lines:

	igt_fixture {
		ret = xe_supports_faults(fd);
		supports_faults = !ret;
		igt_require(supports_faults);
	}

> 
> Matt
> 
> > +	}
> > +
> > +	for (const struct section *s = sections; s->name; s++) {
> > +		igt_subtest_f("exec-%s", s->name)
> > +			xe_for_each_engine(fd, hwe)
> > +				if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
> > +					test_exec(fd, hwe, s->flags);
> > +	}
> > +
> > +	igt_fixture {
> > +		drm_close_driver(fd);
> > +	}
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 357db2723..e649466be 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -286,6 +286,7 @@ intel_xe_progs = [
> >  	'xe_exec_basic',
> >  	'xe_exec_compute_mode',
> >  	'xe_exec_fault_mode',
> > +	'xe_exec_mix_modes',
> >  	'xe_exec_queue_property',
> >  	'xe_exec_reset',
> >  	'xe_exec_sip',
> > -- 
> > 2.43.0
> >