[PATCH 2/3] tests/intel/xe_exec_system_allocator: Validate preftch of svm with single and multi ranges

Wed May 14 20:58:00 UTC 2025

From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>

Tests validates Prefetch of SVM with single range and multiple ranges, with different
range of sizes. checks if not svm pagefaults are seen while prefetching the ranges of
svm.
v2: Enhance test to utilize smem/vram flags from the selection loop (Jonathan Cavitt)

Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
---
 tests/intel/xe_exec_system_allocator.c | 121 +++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/tests/intel/xe_exec_system_allocator.c b/tests/intel/xe_exec_system_allocator.c
index 68bf29898..d4f2e8d98 100644
--- a/tests/intel/xe_exec_system_allocator.c
+++ b/tests/intel/xe_exec_system_allocator.c
@@ -20,6 +20,7 @@
 #include "lib/igt_syncobj.h"
 #include "lib/intel_reg.h"
 #include "xe_drm.h"
+#include "xe/xe_gt.c"
 
 #include "xe/xe_ioctl.h"
 #include "xe/xe_query.h"
@@ -139,6 +140,7 @@ static void signal_pdata(struct process_data *pdata)
 #define CPU_FAULT_THREADS	(0x1 << 2)
 #define CPU_FAULT_PROCESS	(0x1 << 3)
 #define CPU_FAULT_SAME_PAGE	(0x1 << 4)
+#define VRAM			(0x1 << 5)
 
 static void process_check(void *ptr, uint64_t alloc_size, uint64_t stride,
 			  unsigned int flags)
@@ -464,6 +466,101 @@ static void test_basic(int fd, struct drm_xe_engine_class_instance *eci,
 	xe_exec_queue_destroy(fd, exec_queues);
 	xe_vm_destroy(fd, vm);
 }
+
+/**
+ * SUBTEST: prefetch-%s
+ * Description: Test to validate functionality of Prefetch of SVM %arg[1]
+ * Test category: functionality test
+ *
+ * SUBTEST: multi-range-%s
+ * Description: Multi range Prefetch of SVM %arg[1] and  check if multiple ranges are created
+ * Test category: functionality test
+ *
+ * arg[1]:
+ *
+ * @smem-SZ_4K: with size of SZ_4K on smem region
+ * @smem-SZ_64K: with size of SZ_64K on smem region
+ * @smem-SZ_2M: with size of SZ_2M on smem region
+ * @vram-SZ_4K: with size of SZ_4K on vram region
+ * @vram-SZ_64K: with size of SZ_64K on vram region
+ * @vram-SZ_2M: with size of SZ_2M on vram region
+ */
+#define MAX_BATCH_DWORDS 16
+static void prefetch(int fd, struct drm_xe_engine_class_instance *eci,
+		     size_t bo_size, unsigned int flags, int num_dwords)
+{
+	struct batch_data *data;
+	uint64_t target_addr;
+	uint64_t addr;
+	u64 *exec_ufence = NULL;
+	struct drm_xe_sync sync[1] = {
+		{ .type = DRM_XE_SYNC_TYPE_USER_FENCE, .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+		  .timeline_value = USER_FENCE_VALUE },
+	};
+	struct drm_xe_exec exec = {
+		.num_batch_buffer = 1,
+		.num_syncs = 1,
+		.syncs = to_user_pointer(sync),
+	};
+	size_t slice_size = bo_size;
+	uint64_t batch_addr;
+	uint32_t exec_queues, expected, vm, *result_ptr, *batch;
+	size_t aligned_size = bo_size ?: xe_get_default_alignment(fd);
+	const char *stat = "svm_pagefault_count";
+	int b, svm_pf_count_pre, svm_pf_count_pos;
+
+	bo_size = bo_size * num_dwords;
+	vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE, 0);
+	data = aligned_alloc(aligned_size, bo_size);
+	memset(data, 0, bo_size);
+	addr = to_user_pointer(data);
+
+	exec_queues = xe_exec_queue_create(fd, vm, eci, 0);
+	sync[0].addr = to_user_pointer(&data[0].vm_sync);
+	bind_system_allocator(sync, 1);
+	xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
+	data[0].vm_sync = 0;
+	exec_ufence = mmap(NULL, SZ_4K, PROT_READ |
+				 PROT_WRITE, MAP_SHARED |
+				 MAP_ANONYMOUS, -1, 0);
+	igt_assert(exec_ufence != MAP_FAILED);
+	memset(exec_ufence, 0, SZ_4K);
+
+	for (int i = 0; i < num_dwords; i++) {
+		batch = (uint32_t *)((uint8_t *)data + i * slice_size);
+		target_addr = addr + i * slice_size + 0x100;
+		b = 0;
+
+		igt_assert(b + 5 <= MAX_BATCH_DWORDS);
+		write_dword(batch, target_addr, 0xDEADBEEF + i, &b);
+	}
+	sync[0].addr = to_user_pointer(exec_ufence);
+	xe_vm_prefetch_async(fd, vm, 0, 0, addr, bo_size, sync, 1, flags & VRAM ? 1 : 0);
+	xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
+	exec.exec_queue_id = exec_queues;
+
+	for (int i = 0; i < num_dwords; i++) {
+		result_ptr = (uint32_t *)((uint8_t *)data + i * slice_size + 0x100);
+		expected = 0xDEADBEEF + i;
+
+		svm_pf_count_pre = xe_gt_stats_get_count(fd, eci->gt_id, stat);
+		batch_addr = addr + i * slice_size;
+		exec.address = batch_addr;
+		exec_ufence[0] = 0;
+		sync[0].addr = to_user_pointer(exec_ufence);
+		xe_exec(fd, &exec);
+		svm_pf_count_pos = xe_gt_stats_get_count(fd, eci->gt_id, stat);
+		igt_assert(svm_pf_count_pre == svm_pf_count_pos);
+		xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE, exec_queues, NSEC_PER_SEC);
+		exec_ufence[0] = 0;
+		igt_assert_eq(*result_ptr, expected);
+	}
+	xe_exec_queue_destroy(fd, exec_queues);
+	unbind_system_allocator();
+	munmap(data, bo_size);
+	xe_vm_destroy(fd, vm);
+}
+
 /**
  * SUBTEST: unaligned-alloc
  * Description: allocate unaligned sizes of memory
@@ -1657,6 +1754,20 @@ struct section {
 igt_main
 {
 	struct drm_xe_engine_class_instance *hwe;
+	const struct mode {
+		const char *name;
+		unsigned int flags;
+		size_t size;
+	} mode[] = {
+		{ "smem-SZ_4K", 0, SZ_4K},
+		{ "smem-SZ_64K", 0, SZ_64K},
+		{ "smem-SZ_2M", 0, SZ_2M},
+		{ "vram-SZ_4K", VRAM, SZ_4K},
+		{ "vram-SZ_64K", VRAM, SZ_64K},
+		{ "vram-SZ_2M", VRAM, SZ_2M},
+		{},
+	}, *m;
+
 	const struct section sections[] = {
 		{ "malloc", 0 },
 		{ "malloc-multi-fault", MULTI_FAULT },
@@ -1930,6 +2041,16 @@ igt_main
 			processes_evict(fd, SZ_8M, SZ_1M, s->flags);
 	}
 
+	for (m = mode; m->name; m++) {
+		igt_subtest_f("prefetch-%s", m->name)
+			xe_for_each_engine(fd, hwe)
+				prefetch(fd, hwe, m->size, m->flags, 1);
+
+		igt_subtest_f("multi-range-%s", m->name)
+			xe_for_each_engine(fd, hwe)
+				prefetch(fd, hwe, m->size, m->flags, 10);
+	}
+
 	igt_fixture {
 		xe_device_put(fd);
 		drm_close_driver(fd);
-- 
2.34.1