[PATCH i-g-t 1/1] tests/intel/xe_exec_mix_modes: Add new tests for parallel execution
Francois Dugast
francois.dugast at intel.com
Wed Jul 17 16:24:58 UTC 2024
On Wed, Jul 17, 2024 at 01:47:39PM +0000, Matthew Brost wrote:
> On Wed, Jul 17, 2024 at 02:30:48PM +0200, Francois Dugast wrote:
> > Test parallel execution of LR and dma fence jobs on the same device.
> >
> > Add the following tests:
> > * "exec-simple-batch-store-lr"
> > * "exec-simple-batch-store-dma-fence"
> > * "exec-spinner-interrupted-lr"
> > * "exec-spinner-interrupted-dma-fence"
> >
>
> Really good test, a couple of nits / questions.
Thanks for the review.
>
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > ---
> > tests/intel/xe_exec_mix_modes.c | 277 ++++++++++++++++++++++++++++++++
> > tests/meson.build | 1 +
> > 2 files changed, 278 insertions(+)
> > create mode 100644 tests/intel/xe_exec_mix_modes.c
> >
> > diff --git a/tests/intel/xe_exec_mix_modes.c b/tests/intel/xe_exec_mix_modes.c
> > new file mode 100644
> > index 000000000..44265b220
> > --- /dev/null
> > +++ b/tests/intel/xe_exec_mix_modes.c
> > @@ -0,0 +1,277 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +/**
> > + * TEST: Test the parallel submission of jobs in LR and dma fence modecs
> > + * Category: Core
> > + * Mega feature: General Core features
> > + * Sub-category: CMD submission
> > + * Functionality: fault mode
> > + * GPU requirements: GPU needs support for DRM_XE_VM_CREATE_FLAG_FAULT_MODE
> > + */
> > +
> > +#include <fcntl.h>
> > +
> > +#include "igt.h"
> > +#include "lib/igt_syncobj.h"
> > +#include "lib/intel_reg.h"
> > +#include "xe_drm.h"
> > +
> > +#include "xe/xe_ioctl.h"
> > +#include "xe/xe_query.h"
> > +#include "xe/xe_spin.h"
> > +#include <string.h>
> > +
> > +#define FLAG_EXEC_MODE_LR (0x1 << 0)
> > +#define FLAG_JOB_TYPE_SIMPLE (0x1 << 1)
> > +
> > +#define NUM_INTERRUPTING_JOBS 5
> > +#define USER_FENCE_VALUE 0xdeadbeefdeadbeefull
> > +#define ONE_SEC MS_TO_NS(1000)
> > +#define VM_DATA 0
> > +#define SPIN_DATA 1
> > +#define EXEC_DATA 2
> > +#define DATA_COUNT 3
> > +
> > +struct data {
> > + struct xe_spin spin;
> > + uint32_t batch[16];
> > + uint64_t vm_sync;
> > + uint32_t data;
> > + uint64_t exec_sync;
> > + uint64_t addr;
> > +};
> > +
> > +static void store_dword_batch(struct data *data, uint64_t addr, int value)
> > +{
> > + int b;
> > + uint64_t batch_offset = (char *)&(data->batch) - (char *)data;
> > + uint64_t batch_addr = addr + batch_offset;
> > + uint64_t sdi_offset = (char *)&(data->data) - (char *)data;
> > + uint64_t sdi_addr = addr + sdi_offset;
> > +
> > + b = 0;
> > + data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
> > + data->batch[b++] = sdi_addr;
> > + data->batch[b++] = sdi_addr >> 32;
> > + data->batch[b++] = value;
> > + data->batch[b++] = MI_BATCH_BUFFER_END;
> > + igt_assert(b <= ARRAY_SIZE(data->batch));
> > +
> > + data->addr = batch_addr;
> > +}
> > +
> > +enum engine_execution_mode {
> > + EXEC_MODE_LR,
> > + EXEC_MODE_DMA_FENCE,
> > +};
> > +
> > +enum job_type {
> > + SIMPLE_BATCH_STORE,
> > + SPINNER_INTERRUPTED,
> > +};
> > +
> > +static void
> > +run_job(int fd, struct drm_xe_engine_class_instance *hwe,
> > + enum engine_execution_mode engine_execution_mode,
> > + enum job_type job_type)
> > +{
> > + struct drm_xe_sync sync[1] = {
> > + { .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
> > + };
> > + struct drm_xe_exec exec = {
> > + .num_batch_buffer = 1,
> > + .num_syncs = 1,
> > + .syncs = to_user_pointer(&sync),
> > + };
> > + struct drm_xe_ext_set_property ext = {
> > + .base.next_extension = 0,
> > + .base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY,
> > + .property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY,
> > + .value = 2, /* High priority */
> > + };
>
> I don't understand why setting High priority here. Can you explain?
No reason for it, it was a left over from earlier experiments and will
be removed in the next version.
>
> > + struct data *data;
> > + uint32_t vm;
> > + uint32_t exec_queue;
> > + size_t bo_size;
> > + int value = 0x123456;
> > + uint64_t addr = 0x100000;
> > + uint32_t bo = 0;
> > + unsigned int vm_flags = 0;
> > + struct xe_spin_opts spin_opts = { .preempt = true };
> > + const uint64_t duration_ns = NSEC_PER_SEC / 2; /* 500ms */
> > + struct timespec tv;
> > +
> > + if (engine_execution_mode == EXEC_MODE_LR) {
> > + sync[0].type = DRM_XE_SYNC_TYPE_USER_FENCE;
> > + sync[0].timeline_value = USER_FENCE_VALUE;
> > + vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
> > + } else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > + sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
> > + sync[0].handle = syncobj_create(fd, 0);
> > + }
> > +
> > + vm = xe_vm_create(fd, vm_flags, 0);
> > + bo_size = sizeof(*data) * DATA_COUNT;
> > + bo_size = xe_bb_size(fd, bo_size);
> > + bo = xe_bo_create(fd, vm, bo_size,
> > + vram_if_possible(fd, hwe->gt_id),
> > + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> > + data = xe_bo_map(fd, bo, bo_size);
> > + if (engine_execution_mode == EXEC_MODE_LR)
> > + sync[0].addr = to_user_pointer(&data[VM_DATA].vm_sync);
> > + xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, &sync[0], 1);
> > +
> > + store_dword_batch(data, addr, value);
> > + if (engine_execution_mode == EXEC_MODE_LR) {
> > + xe_wait_ufence(fd, &data[VM_DATA].vm_sync, USER_FENCE_VALUE, 0, ONE_SEC);
> > + sync[0].addr = addr + (char *)&data[EXEC_DATA].exec_sync - (char *)data;
> > + } else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > + igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
> > + syncobj_reset(fd, &sync[0].handle, 1);
> > + sync[0].flags &= DRM_XE_SYNC_FLAG_SIGNAL;
> > + }
> > + exec_queue = xe_exec_queue_create(fd, vm, hwe, to_user_pointer(&ext));
> > + exec.exec_queue_id = exec_queue;
> > +
> > + if (job_type == SPINNER_INTERRUPTED) {
> > + spin_opts.addr = addr + (char *)&data[SPIN_DATA].spin - (char *)data;
> > + spin_opts.ctx_ticks = duration_to_ctx_ticks(fd, 0, duration_ns);
> > + xe_spin_init(&data[SPIN_DATA].spin, &spin_opts);
> > + if (engine_execution_mode == EXEC_MODE_LR)
> > + sync[0].addr = addr + (char *)&data[SPIN_DATA].exec_sync - (char *)data;
>
> Identation looks off here.
Will fix.
>
> > + exec.address = spin_opts.addr;
> > + } else if (job_type == SIMPLE_BATCH_STORE) {
> > + exec.address = data->addr;
> > + }
> > + xe_exec(fd, &exec);
> > +
> > + if (job_type == SPINNER_INTERRUPTED) {
> > + enum engine_execution_mode interrupting_engine_execution_mode;
> > + if (engine_execution_mode == EXEC_MODE_LR)
> > + interrupting_engine_execution_mode = EXEC_MODE_DMA_FENCE;
> > + else if (engine_execution_mode == EXEC_MODE_DMA_FENCE)
> > + interrupting_engine_execution_mode = EXEC_MODE_LR;
> > + xe_spin_wait_started(&data[SPIN_DATA].spin);
> > + igt_gettime(&tv);
> > + for (int i = 0; i < NUM_INTERRUPTING_JOBS; i++)
> > + {
> > + run_job(fd, hwe, interrupting_engine_execution_mode, SIMPLE_BATCH_STORE);
> > + /**
> > + * Executing a SIMPLE_BATCH_STORE job takes significantly less time than
> > + * duration_ns.
> > + * When a spinner is running in LR mode, the interrupting job preempts it
> > + * in KMD and should complete fast, shortly after starting the spinner.
> > + * When a spinner is running in dma fence mode, the interrupting job waits
> > + * in KMD and should complete shortly after the spinner has ended.
> > + * The checks below are to verify preempting/waiting happens as expected
> > + * depending on the execution mode.
> > + */
> > + if (engine_execution_mode == EXEC_MODE_LR)
> > + igt_assert(igt_nsec_elapsed(&tv) < 0.5 * duration_ns);
> > + else if (engine_execution_mode == EXEC_MODE_DMA_FENCE)
> > + igt_assert(igt_nsec_elapsed(&tv) > duration_ns);
> > + }
> > + }
>
> Should we also run the 'NUM_INTERRUPTING_JOBS' loop here? i.e.
> simple-batch-store-lr / simple-batch-store-dma-fence test opening VMs in
> both modes doing a simple store (no preemption)?
Yes this is a good improvement and only requires a minor change.
>
> We'd have to protect again infinite recursion though but adding flag to
> protect against that should be easy. e.g.
> SIMPLE_BATCH_STORE_NO_RECURSION.
>
> > +
> > + if (engine_execution_mode == EXEC_MODE_LR) {
> > + if (job_type == SPINNER_INTERRUPTED)
> > + xe_wait_ufence(fd, &data[SPIN_DATA].exec_sync, USER_FENCE_VALUE, 0, ONE_SEC * 2);
> > + else if (job_type == SIMPLE_BATCH_STORE)
> > + xe_wait_ufence(fd, &data[EXEC_DATA].exec_sync, USER_FENCE_VALUE, 0, ONE_SEC * 2);
> > + } else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
> > + igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
> > + syncobj_destroy(fd, sync[0].handle);
> > + }
> > +
> > + if (job_type == SIMPLE_BATCH_STORE)
> > + igt_assert_eq(data->data, value);
> > +
> > + munmap(data, bo_size);
> > + gem_close(fd, bo);
> > + xe_exec_queue_destroy(fd, exec_queue);
> > + xe_vm_destroy(fd, vm);
> > +}
> > +
> > +/**
> > + * SUBTEST: exec-simple-batch-store-lr
> > + * Description: Execute a simple batch store job in long running mode
> > + *
> > + * SUBTEST: exec-simple-batch-store-dma-fence
> > + * Description: Execute a simple batch store job in dma fence mode
> > + *
> > + * SUBTEST: exec-spinner-interrupted-lr
> > + * Description: Spin in long running mode then get interrupted by a simple
> > + * batch store job in dma fence mode
> > + *
> > + * SUBTEST: exec-spinner-interrupted-dma-fence
> > + * Description: Spin in dma fence mode then get interrupted by a simple
> > + * batch store job in long running mode
> > + */
> > +static void
> > +test_exec(int fd, struct drm_xe_engine_class_instance *hwe,
> > + unsigned int flags)
> > +{
> > + enum engine_execution_mode engine_execution_mode;
> > + enum job_type job_type;
> > +
> > + if (flags & FLAG_EXEC_MODE_LR)
> > + engine_execution_mode = EXEC_MODE_LR;
> > + else
> > + engine_execution_mode = EXEC_MODE_DMA_FENCE;
> > +
> > + if (flags & FLAG_JOB_TYPE_SIMPLE)
> > + job_type = SIMPLE_BATCH_STORE;
> > + else
> > + job_type = SPINNER_INTERRUPTED;
> > +
> > + run_job(fd, hwe, engine_execution_mode, job_type);
> > +}
> > +
> > +igt_main
> > +
> > +
>
> Extra whitespace.
Will fix.
>
> > +{
> > + struct drm_xe_engine_class_instance *hwe;
> > + const struct section {
> > + const char *name;
> > + unsigned int flags;
> > + } sections[] = {
> > + { "simple-batch-store-lr", FLAG_JOB_TYPE_SIMPLE | FLAG_EXEC_MODE_LR },
> > + { "simple-batch-store-dma-fence", FLAG_JOB_TYPE_SIMPLE },
> > + { "spinner-interrupted-lr", FLAG_EXEC_MODE_LR },
> > + { "spinner-interrupted-dma-fence", 0 },
> > + { NULL },
> > + };
> > + int fd;
> > +
> > + igt_fixture {
> > + struct timespec tv = {};
> > + bool supports_faults;
> > + int ret = 0;
> > + int timeout = igt_run_in_simulation() ? 20 : 2;
> > +
> > + fd = drm_open_driver(DRIVER_XE);
> > + do {
> > + if (ret)
> > + usleep(5000);
> > + ret = xe_supports_faults(fd);
> > + } while (ret == -EBUSY && igt_seconds_elapsed(&tv) < timeout);
> > +
> > + supports_faults = !ret;
> > + igt_require(supports_faults);
>
> I was unsure why this code was added in xe_exec_fault_mode, so had to
> look:
>
> git format-patch -1 8abb25ffe58
>
> If you read the explaination for that it because we don't support mixing
> faulting and non-faulting VMs being open at the same time + races
> closing the VMs. With your KMD series we support having both a faulting
> VM and non-faulting VM open so this loop is not required.
>
> e.g. I think you can just do this:
>
> igt_fixture {
> fd = drm_open_driver(DRIVER_XE);;
> igt_require(xe_supports_faults(fd));
> }
Yes indeed, actually taking into account the logic for xe_supports_faults()
it should probably be along those lines:
igt_fixture {
ret = xe_supports_faults(fd);
supports_faults = !ret;
igt_require(supports_faults);
}
>
> Matt
>
> > + }
> > +
> > + for (const struct section *s = sections; s->name; s++) {
> > + igt_subtest_f("exec-%s", s->name)
> > + xe_for_each_engine(fd, hwe)
> > + if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
> > + test_exec(fd, hwe, s->flags);
> > + }
> > +
> > + igt_fixture {
> > + drm_close_driver(fd);
> > + }
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 357db2723..e649466be 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -286,6 +286,7 @@ intel_xe_progs = [
> > 'xe_exec_basic',
> > 'xe_exec_compute_mode',
> > 'xe_exec_fault_mode',
> > + 'xe_exec_mix_modes',
> > 'xe_exec_queue_property',
> > 'xe_exec_reset',
> > 'xe_exec_sip',
> > --
> > 2.43.0
> >
More information about the igt-dev
mailing list