[PATCH 2/3] tests/intel/xe_exec_system_allocator: Validate preftch of svm with single and multi ranges
Matthew Brost
matthew.brost at intel.com
Wed May 14 23:04:44 UTC 2025
On Tue, May 13, 2025 at 05:07:10PM +0000, sai.gowtham.ch at intel.com wrote:
> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>
> Tests validates Prefetch of SVM with single range and multiple ranges, with different
> range of sizes. checks if not svm pagefaults are seen while prefetching the ranges of
> svm.
>
> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> ---
> tests/intel/xe_exec_system_allocator.c | 126 +++++++++++++++++++++++++
> 1 file changed, 126 insertions(+)
>
> diff --git a/tests/intel/xe_exec_system_allocator.c b/tests/intel/xe_exec_system_allocator.c
> index ba11ed834..c3f0e61f3 100644
> --- a/tests/intel/xe_exec_system_allocator.c
> +++ b/tests/intel/xe_exec_system_allocator.c
> @@ -20,6 +20,7 @@
> #include "lib/igt_syncobj.h"
> #include "lib/intel_reg.h"
> #include "xe_drm.h"
> +#include "xe/xe_gt.c"
>
> #include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> @@ -139,6 +140,7 @@ static void signal_pdata(struct process_data *pdata)
> #define CPU_FAULT_THREADS (0x1 << 2)
> #define CPU_FAULT_PROCESS (0x1 << 3)
> #define CPU_FAULT_SAME_PAGE (0x1 << 4)
> +#define VRAM (0x1 << 5)
These flags are for 'many_alloc' as indicated by the comment in the good.
>
> static void process_check(void *ptr, uint64_t alloc_size, uint64_t stride,
> unsigned int flags)
> @@ -464,6 +466,106 @@ static void test_basic(int fd, struct drm_xe_engine_class_instance *eci,
> xe_exec_queue_destroy(fd, exec_queues);
> xe_vm_destroy(fd, vm);
> }
> +
> +/**
> + * SUBTEST: prefetch-smem-%s
> + * Description: Test to validate functionality of Prefetch using SVM of size arg[1] at smem region
> + * Test category: functionality test
> + *
> + * SUBTEST: prefetch-vram-%s
> + * Description: Test to validate functionality of Prefetch using SVM of size arg[1] at vram region
> + * Test category: functionality test
> + *
> + * SUBTEST: multi-range-smem-%s
> + * Description: Prefetch of mutliple ranges within arg[1] size to validate multiple ranges are created
> + * Test category: functionality test
> + *
> + * SUBTEST: multi-range-vram-%s
> + * Description: Prefetch of mutliple ranges within arg[1] size and check if multiple ranges are created
> + * Test category: functionality test
> + *
> + * arg[1]:
> + *
> + * @SZ_4K: SZ_4K
> + * @SZ_64K: SZ_64K
> + * @SZ_2M: SZ_2M
> + */
> +#define MAX_BATCH_DWORDS 16
> +static void prefetch(int fd, struct drm_xe_engine_class_instance *eci,
> + size_t bo_size, unsigned int flags, int num_dwords)
> +{
> + struct batch_data *data;
> + uint64_t target_addr;
> + uint64_t addr;
> + u64 *exec_ufence = NULL;
> + struct drm_xe_sync sync[1] = {
> + { .type = DRM_XE_SYNC_TYPE_USER_FENCE, .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + .timeline_value = USER_FENCE_VALUE },
> + };
> + struct drm_xe_exec exec = {
> + .num_batch_buffer = 1,
> + .num_syncs = 1,
> + .syncs = to_user_pointer(sync),
> + };
> + size_t slice_size = bo_size;
> + uint64_t batch_addr;
> + uint32_t exec_queues, expected, vm, *result_ptr, *batch;
> + size_t aligned_size = bo_size ?: xe_get_default_alignment(fd);
> + const char *stat = "svm_pagefault_count";
> + int b, svm_pf_count_pre, svm_pf_count_pos;
> +
> + bo_size = bo_size * num_dwords;
> + vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE, 0);
> + data = aligned_alloc(aligned_size, bo_size);
> + memset(data, 0, bo_size);
> + addr = to_user_pointer(data);
> +
> + exec_queues = xe_exec_queue_create(fd, vm, eci, 0);
> + sync[0].addr = to_user_pointer(&data[0].vm_sync);
> + bind_system_allocator(sync, 1);
> + xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
> + data[0].vm_sync = 0;
> + exec_ufence = mmap(NULL, SZ_4K, PROT_READ |
> + PROT_WRITE, MAP_SHARED |
MAP_SHARED can't migrate to VRAM so I don't think are testing what you
think you are.
> + MAP_ANONYMOUS, -1, 0);
> + igt_assert(exec_ufence != MAP_FAILED);
> + memset(exec_ufence, 0, SZ_4K);
> +
> + for (int i = 0; i < num_dwords; i++) {
> + batch = (uint32_t *)((uint8_t *)data + i * slice_size);
> + target_addr = addr + i * slice_size + 0x100;
> + b = 0;
> +
> + igt_assert(b + 5 <= MAX_BATCH_DWORDS);
> + write_dword(batch, target_addr, 0xDEADBEEF + i, &b);
> + }
> + sync[0].addr = to_user_pointer(exec_ufence);
> + xe_vm_prefetch_async(fd, vm, 0, 0, addr, bo_size, sync, 1, flags & VRAM ? 1 : 0);
> + xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
> + exec.exec_queue_id = exec_queues;
> +
> + for (int i = 0; i < num_dwords; i++) {
> + result_ptr = (uint32_t *)((uint8_t *)data + i * slice_size + 0x100);
> + expected = 0xDEADBEEF + i;
> +
> + svm_pf_count_pre = xe_gt_stats_get_count(fd, eci->gt_id, stat);
> + batch_addr = addr + i * slice_size;
> + exec.address = batch_addr;
> + exec_ufence[0] = 0;
> + sync[0].addr = to_user_pointer(exec_ufence);
> + xe_exec(fd, &exec);
> + svm_pf_count_pos = xe_gt_stats_get_count(fd, eci->gt_id, stat);
> + igt_assert(svm_pf_count_pre == svm_pf_count_pos);
> + xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE, exec_queues, NSEC_PER_SEC);
> + exec_ufence[0] = 0;
> + igt_assert_eq(*result_ptr, expected);
> + }
> + xe_exec_queue_destroy(fd, exec_queues);
> + unbind_system_allocator();
> + munmap(data, bo_size);
> + xe_vm_destroy(fd, vm);
> +}
> +
> /**
> * SUBTEST: unaligned-alloc
> * Description: allocate unaligned sizes of memory
> @@ -1658,6 +1760,16 @@ igt_main
> {
> struct drm_xe_engine *engine;
> struct drm_xe_engine_class_instance *hwe;
> + const struct mode {
> + const char *name;
> + size_t size;
> + } mode[] = {
> + { "SZ_4K", SZ_4K},
> + { "SZ_64K", SZ_64K},
> + { "SZ_2M", SZ_2M},
> + {},
> + }, *m;
> +
> const struct section sections[] = {
> { "malloc", 0 },
> { "malloc-multi-fault", MULTI_FAULT },
> @@ -1931,6 +2043,20 @@ igt_main
> processes_evict(fd, SZ_8M, SZ_1M, s->flags);
> }
>
> + for (m = mode; m->name; m++) {
> + igt_subtest_f("prefetch-smem-%s", m->name)
> + prefetch(fd, &engine->instance, m->size, 0, 1);
> +
> + igt_subtest_f("prefetch-vram-%s", m->name)
> + prefetch(fd, &engine->instance, m->size, VRAM, 1);
> +
> + igt_subtest_f("multi-range-smem-%s", m->name)
> + prefetch(fd, &engine->instance, m->size, 0, 10);
> +
> + igt_subtest_f("multi-range-vram-%s", m->name)
> + prefetch(fd, &engine->instance, m->size, VRAM, 10);
> + }
> +
In general I'd strongly prefer the existing test_exec function was
updated with prefetch rather than unique sections for it. The power of
this test is how it scales, we should build on that.
Matt
> igt_fixture {
> xe_device_put(fd);
> drm_close_driver(fd);
> --
> 2.34.1
>
More information about the igt-dev
mailing list