[PATCH 2/3] tests/intel/xe_exec_system_allocator: Validate preftch of svm with single and multi ranges

Ch, Sai Gowtham sai.gowtham.ch at intel.com
Thu May 15 16:04:10 UTC 2025



>-----Original Message-----
>From: Brost, Matthew <matthew.brost at intel.com>
>Sent: Thursday, May 15, 2025 4:35 AM
>To: Ch, Sai Gowtham <sai.gowtham.ch at intel.com>
>Cc: igt-dev at lists.freedesktop.org; Ghimiray, Himal Prasad
><himal.prasad.ghimiray at intel.com>
>Subject: Re: [PATCH 2/3] tests/intel/xe_exec_system_allocator: Validate preftch of
>svm with single and multi ranges
>
>On Tue, May 13, 2025 at 05:07:10PM +0000, sai.gowtham.ch at intel.com wrote:
>> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>>
>> Tests validates Prefetch of SVM with single range and multiple ranges,
>> with different range of sizes. checks if not svm pagefaults are seen
>> while prefetching the ranges of svm.
>>
>> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>> ---
>>  tests/intel/xe_exec_system_allocator.c | 126
>> +++++++++++++++++++++++++
>>  1 file changed, 126 insertions(+)
>>
>> diff --git a/tests/intel/xe_exec_system_allocator.c
>> b/tests/intel/xe_exec_system_allocator.c
>> index ba11ed834..c3f0e61f3 100644
>> --- a/tests/intel/xe_exec_system_allocator.c
>> +++ b/tests/intel/xe_exec_system_allocator.c
>> @@ -20,6 +20,7 @@
>>  #include "lib/igt_syncobj.h"
>>  #include "lib/intel_reg.h"
>>  #include "xe_drm.h"
>> +#include "xe/xe_gt.c"
>>
>>  #include "xe/xe_ioctl.h"
>>  #include "xe/xe_query.h"
>> @@ -139,6 +140,7 @@ static void signal_pdata(struct process_data *pdata)
>>  #define CPU_FAULT_THREADS	(0x1 << 2)
>>  #define CPU_FAULT_PROCESS	(0x1 << 3)
>>  #define CPU_FAULT_SAME_PAGE	(0x1 << 4)
>> +#define VRAM            	(0x1 << 5)
>
>These flags are for 'many_alloc' as indicated by the comment in the good.
Sure will change it.
>
>>
>>  static void process_check(void *ptr, uint64_t alloc_size, uint64_t stride,
>>  			  unsigned int flags)
>> @@ -464,6 +466,106 @@ static void test_basic(int fd, struct
>drm_xe_engine_class_instance *eci,
>>  	xe_exec_queue_destroy(fd, exec_queues);
>>  	xe_vm_destroy(fd, vm);
>>  }
>> +
>> +/**
>> + * SUBTEST: prefetch-smem-%s
>> + * Description: Test to validate functionality of Prefetch using SVM
>> +of size arg[1] at smem region
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: prefetch-vram-%s
>> + * Description: Test to validate functionality of Prefetch using SVM
>> +of size arg[1] at vram region
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: multi-range-smem-%s
>> + * Description: Prefetch of mutliple ranges within arg[1] size to
>> +validate multiple ranges are created
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: multi-range-vram-%s
>> + * Description: Prefetch of mutliple ranges within arg[1] size and
>> +check if multiple ranges are created
>> + * Test category: functionality test
>> + *
>> + * arg[1]:
>> + *
>> + * @SZ_4K: SZ_4K
>> + * @SZ_64K: SZ_64K
>> + * @SZ_2M: SZ_2M
>> + */
>> +#define MAX_BATCH_DWORDS 16
>> +static void prefetch(int fd, struct drm_xe_engine_class_instance *eci,
>> +		     size_t bo_size, unsigned int flags, int num_dwords) {
>> +	struct batch_data *data;
>> +	uint64_t target_addr;
>> +	uint64_t addr;
>> +	u64 *exec_ufence = NULL;
>> +	struct drm_xe_sync sync[1] = {
>> +		{ .type = DRM_XE_SYNC_TYPE_USER_FENCE, .flags =
>DRM_XE_SYNC_FLAG_SIGNAL,
>> +		  .timeline_value = USER_FENCE_VALUE },
>> +	};
>> +	struct drm_xe_exec exec = {
>> +		.num_batch_buffer = 1,
>> +		.num_syncs = 1,
>> +		.syncs = to_user_pointer(sync),
>> +	};
>> +	size_t slice_size = bo_size;
>> +	uint64_t batch_addr;
>> +	uint32_t exec_queues, expected, vm, *result_ptr, *batch;
>> +	size_t aligned_size = bo_size ?: xe_get_default_alignment(fd);
>> +	const char *stat = "svm_pagefault_count";
>> +	int b, svm_pf_count_pre, svm_pf_count_pos;
>> +
>> +	bo_size = bo_size * num_dwords;
>> +	vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE |
>DRM_XE_VM_CREATE_FLAG_FAULT_MODE, 0);
>> +	data = aligned_alloc(aligned_size, bo_size);
>> +	memset(data, 0, bo_size);
>> +	addr = to_user_pointer(data);
>> +
>> +	exec_queues = xe_exec_queue_create(fd, vm, eci, 0);
>> +	sync[0].addr = to_user_pointer(&data[0].vm_sync);
>> +	bind_system_allocator(sync, 1);
>> +	xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0,
>NSEC_PER_SEC);
>> +	data[0].vm_sync = 0;
>> +	exec_ufence = mmap(NULL, SZ_4K, PROT_READ |
>> +				 PROT_WRITE, MAP_SHARED |
>
>MAP_SHARED can't migrate to VRAM so I don't think are testing what you think
>you are.
Will do the necessary change.
>
>> +				 MAP_ANONYMOUS, -1, 0);
>> +	igt_assert(exec_ufence != MAP_FAILED);
>> +	memset(exec_ufence, 0, SZ_4K);
>> +
>> +	for (int i = 0; i < num_dwords; i++) {
>> +		batch = (uint32_t *)((uint8_t *)data + i * slice_size);
>> +		target_addr = addr + i * slice_size + 0x100;
>> +		b = 0;
>> +
>> +		igt_assert(b + 5 <= MAX_BATCH_DWORDS);
>> +		write_dword(batch, target_addr, 0xDEADBEEF + i, &b);
>> +	}
>> +	sync[0].addr = to_user_pointer(exec_ufence);
>> +	xe_vm_prefetch_async(fd, vm, 0, 0, addr, bo_size, sync, 1, flags & VRAM ?
>1 : 0);
>> +	xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE, 0,
>NSEC_PER_SEC);
>> +	exec.exec_queue_id = exec_queues;
>> +
>> +	for (int i = 0; i < num_dwords; i++) {
>> +		result_ptr = (uint32_t *)((uint8_t *)data + i * slice_size + 0x100);
>> +		expected = 0xDEADBEEF + i;
>> +
>> +		svm_pf_count_pre = xe_gt_stats_get_count(fd, eci->gt_id, stat);
>> +		batch_addr = addr + i * slice_size;
>> +		exec.address = batch_addr;
>> +		exec_ufence[0] = 0;
>> +		sync[0].addr = to_user_pointer(exec_ufence);
>> +		xe_exec(fd, &exec);
>> +		svm_pf_count_pos = xe_gt_stats_get_count(fd, eci->gt_id, stat);
>> +		igt_assert(svm_pf_count_pre == svm_pf_count_pos);
>> +		xe_wait_ufence(fd, exec_ufence, USER_FENCE_VALUE,
>exec_queues, NSEC_PER_SEC);
>> +		exec_ufence[0] = 0;
>> +		igt_assert_eq(*result_ptr, expected);
>> +	}
>> +	xe_exec_queue_destroy(fd, exec_queues);
>> +	unbind_system_allocator();
>> +	munmap(data, bo_size);
>> +	xe_vm_destroy(fd, vm);
>> +}
>> +
>>  /**
>>   * SUBTEST: unaligned-alloc
>>   * Description: allocate unaligned sizes of memory @@ -1658,6
>> +1760,16 @@ igt_main  {
>>  	struct drm_xe_engine *engine;
>>  	struct drm_xe_engine_class_instance *hwe;
>> +	const struct mode {
>> +		const char *name;
>> +		size_t size;
>> +	} mode[] = {
>> +		{ "SZ_4K", SZ_4K},
>> +		{ "SZ_64K", SZ_64K},
>> +		{ "SZ_2M", SZ_2M},
>> +		{},
>> +	}, *m;
>> +
>>  	const struct section sections[] = {
>>  		{ "malloc", 0 },
>>  		{ "malloc-multi-fault", MULTI_FAULT }, @@ -1931,6 +2043,20
>@@
>> igt_main
>>  			processes_evict(fd, SZ_8M, SZ_1M, s->flags);
>>  	}
>>
>> +	for (m = mode; m->name; m++) {
>> +                igt_subtest_f("prefetch-smem-%s", m->name)
>> +                        prefetch(fd, &engine->instance, m->size, 0,
>> +1);
>> +
>> +                igt_subtest_f("prefetch-vram-%s", m->name)
>> +                        prefetch(fd, &engine->instance, m->size,
>> + VRAM, 1);
>> +
>> +                igt_subtest_f("multi-range-smem-%s", m->name)
>> +                        prefetch(fd, &engine->instance, m->size, 0,
>> + 10);
>> +
>> +                igt_subtest_f("multi-range-vram-%s", m->name)
>> +                        prefetch(fd, &engine->instance, m->size, VRAM, 10);
>> +        }
>> +
>
>In general I'd strongly prefer the existing test_exec function was updated with
>prefetch rather than unique sections for it. The power of this test is how it scales,
>we should build on that.

Unique section was added just for better readability, 
easy to debug any issues, However if you think other wise sure thing this can be moved under test_exec.

---
Gowtham
>
>Matt
>
>>  	igt_fixture {
>>  		xe_device_put(fd);
>>  		drm_close_driver(fd);
>> --
>> 2.34.1
>>


More information about the igt-dev mailing list