[PATCH i-g-t 5/5] tests/intel/xe_exec_system_allocator: Added atomic_batch test in IGT

Thu Aug 28 16:27:32 UTC 2025

On 8/28/2025 8:38 PM, Gurram, Pravalika wrote:
>
>> -----Original Message-----
>> From: igt-dev <igt-dev-bounces at lists.freedesktop.org> On Behalf Of
>> nishit.sharma at intel.com
>> Sent: Tuesday, August 26, 2025 2:12 PM
>> To: igt-dev at lists.freedesktop.org; Ghimiray, Himal Prasad
>> <himal.prasad.ghimiray at intel.com>; Brost, Matthew
>> <matthew.brost at intel.com>; Sharma, Nishit <nishit.sharma at intel.com>
>> Subject: [PATCH i-g-t 5/5] tests/intel/xe_exec_system_allocator: Added
>> atomic_batch test in IGT
>>
>> From: Nishit Sharma <nishit.sharma at intel.com>
>>
> Title cane be "] tests/intel/xe_exec_system_allocator: Add atomic_batch test in IGT"
>
>> ATOMIC_BATCH flag is introduced when true MI_ATOMIC |
>> MI_ATOMIC_INC operation will be called. This will avoid writing another
>> function which performs atomic increment operations. ATOMIC_BATCH flag
>> is passed as argument in write_dword() if true then value will be written on
>> passed address and incremented by ATOMIC_INC operation. For all memory
>> operations this flag will be used to verify if ATOMIC operation is working or
>> not.
>>
>> Signed-off-by: Nishit Sharma <nishit.sharma at intel.com>
>> ---
>>   tests/intel/xe_exec_system_allocator.c | 482 +++++++++++++++++++++---
>> -
>>   1 file changed, 412 insertions(+), 70 deletions(-)
>>
>> diff --git a/tests/intel/xe_exec_system_allocator.c
>> b/tests/intel/xe_exec_system_allocator.c
>> index 70ca5fc2e..4569ace8d 100644
>> --- a/tests/intel/xe_exec_system_allocator.c
>> +++ b/tests/intel/xe_exec_system_allocator.c
>> @@ -29,6 +29,14 @@
>>   #define USER_FENCE_VALUE	0xdeadbeefdeadbeefull
>>   #define QUARTER_SEC		(NSEC_PER_SEC / 4)
>>   #define FIVE_SEC		(5LL * NSEC_PER_SEC)
> Space here . and runc checkpatch.pl
>> +struct test_exec_data {
>> +	uint32_t batch[32];
>> +	uint64_t pad;
>> +	uint64_t vm_sync;
>> +	uint64_t exec_sync;
>> +	uint32_t data;
>> +	uint32_t expected_data;
>> +};
>>
>>   struct batch_data {
>>   	uint32_t batch[16];
>> @@ -37,6 +45,7 @@ struct batch_data {
>>   	uint32_t expected_data;
>>   };
>>
>> +#define VAL_ATOMIC_EXPECTED  56
>>   #define WRITE_VALUE(data__, i__)	({			\
>>   	if (!(data__)->expected_data)				\
>>   		(data__)->expected_data = rand() << 12 | (i__);	\
>> @@ -53,10 +62,19 @@ static void __write_dword(uint32_t *batch, uint64_t
>> sdi_addr, uint32_t wdata,
>>   	batch[(*idx)++] = wdata;
>>   }
>>
>> -static void write_dword(uint32_t *batch, uint64_t sdi_addr, uint32_t wdata,
>> -			int *idx)
>> +static void write_dword(struct test_exec_data *data, uint64_t sdi_addr,
>> uint32_t wdata,
>> +			int *idx, bool atomic)
>>   {
>> -	__write_dword(batch, sdi_addr, wdata, idx);
>> +	uint32_t *batch = data->batch;
>> +
>> +	if (atomic) {
>> +		data->data = 55;
>> +		batch[(*idx)++] = MI_ATOMIC | MI_ATOMIC_INC;
>> +		batch[(*idx)++] = sdi_addr;
>> +		batch[(*idx)++] = sdi_addr >> 32;
>> +	} else {
>> +		__write_dword(batch, sdi_addr, wdata, idx);
>> +	}
>>   	batch[(*idx)++] = MI_BATCH_BUFFER_END;  }
>>
>> @@ -271,7 +289,7 @@ check_all_pages_threads(void *ptr, uint64_t
>> alloc_size, uint64_t stride,
>>
>>   static void touch_all_pages(int fd, uint32_t exec_queue, void *ptr,
>>   			    uint64_t alloc_size, uint64_t stride,
>> -			    struct timespec *tv, uint64_t *submit)
>> +			    struct timespec *tv, uint64_t *submit, bool atomic)
>>   {
>>   	struct drm_xe_sync sync[1] = {
>>   		{ .type = DRM_XE_SYNC_TYPE_USER_FENCE, @@ -302,7
>> +320,8 @@ static void touch_all_pages(int fd, uint32_t exec_queue, void
>> *ptr,
>>   		uint64_t sdi_addr = addr + sdi_offset;
>>   		int b = 0;
>>
>> -		write_dword(data->batch, sdi_addr, WRITE_VALUE(data, i),
>> &b);
>> +		write_dword((struct test_exec_data *)data, sdi_addr,
>> WRITE_VALUE(data, i),
>> +			     &b, atomic ? true : false);
>>   		igt_assert(b <= ARRAY_SIZE(data->batch));
>>   	}
>>
>> @@ -407,34 +426,45 @@ static void __aligned_partial_free(struct
>> aligned_alloc_type  *aligned_alloc_typ
>>
>>   #define MAX_N_EXEC_QUEUES       16
>>
>> -#define MMAP                    (0x1 << 0)
>> -#define NEW                     (0x1 << 1)
>> -#define BO_UNMAP                (0x1 << 2)
>> -#define FREE                    (0x1 << 3)
>> -#define BUSY                    (0x1 << 4)
>> -#define BO_MAP                  (0x1 << 5)
>> -#define RACE                    (0x1 << 6)
>> -#define SKIP_MEMSET             (0x1 << 7)
>> -#define FAULT                   (0x1 << 8)
>> -#define FILE_BACKED             (0x1 << 9)
>> -#define LOCK                    (0x1 << 10)
>> -#define MMAP_SHARED             (0x1 << 11)
>> -#define HUGE_PAGE               (0x1 << 12)
>> -#define SHARED_ALLOC            (0x1 << 13)
>> -#define FORK_READ               (0x1 << 14)
>> -#define FORK_READ_AFTER         (0x1 << 15)
>> -#define MREMAP                  (0x1 << 16)
>> -#define DONTUNMAP               (0x1 << 17)
>> -#define READ_ONLY_REMAP         (0x1 << 18)
>> -#define SYNC_EXEC               (0x1 << 19)
>> -#define EVERY_OTHER_CHECK       (0x1 << 20)
>> -#define MULTI_FAULT             (0x1 << 21)
>> -#define PREFETCH                (0x1 << 22)
>> -#define THREADS                 (0x1 << 23)
>> -#define PROCESSES               (0x1 << 24)
>> -#define PREFETCH_BENCHMARK      (0x1 << 25)
>> -#define PREFETCH_SYS_BENCHMARK	(0x1 << 26)
>> -#define PREFERRED_LOC_SMEM      (0x1 << 27)
>> +#define MMAP				(0x1 << 0)
>> +#define NEW				(0x1 << 1)
>> +#define BO_UNMAP			(0x1 << 2)
>> +#define FREE				(0x1 << 3)
>> +#define BUSY				(0x1 << 4)
>> +#define BO_MAP				(0x1 << 5)
>> +#define RACE				(0x1 << 6)
>> +#define SKIP_MEMSET			(0x1 << 7)
>> +#define FAULT				(0x1 << 8)
>> +#define FILE_BACKED			(0x1 << 9)
>> +#define LOCK				(0x1 << 10)
>> +#define MMAP_SHARED			(0x1 << 11)
>> +#define HUGE_PAGE			(0x1 << 12)
>> +#define SHARED_ALLOC			(0x1 << 13)
>> +#define FORK_READ			(0x1 << 14)
>> +#define FORK_READ_AFTER			(0x1 << 15)
>> +#define MREMAP				(0x1 << 16)
>> +#define DONTUNMAP			(0x1 << 17)
>> +#define READ_ONLY_REMAP			(0x1 << 18)
>> +#define SYNC_EXEC			(0x1 << 19)
>> +#define EVERY_OTHER_CHECK		(0x1 << 20)
>> +#define MULTI_FAULT			(0x1 << 21)
>> +#define PREFETCH			(0x1 << 22)
>> +#define THREADS				(0x1 << 23)
>> +#define PROCESSES			(0x1 << 24)
>> +#define PREFETCH_BENCHMARK		(0x1 << 25)
>> +#define PREFETCH_SYS_BENCHMARK		(0x1 << 26)
>> +#define PREFERRED_LOC_SMEM		(0x1 << 27)
> diff should be seen after this

Initially the Macros were defined after partial sub-test. Few macros 
were called by different sub-tests before they were defined and giving 
compilation issue.

hence moved macros upwards so that each macro is accessible to all 
sub-tests.

>> +#define ATOMIC_BATCH			(0x1 << 28)
>> +#define MIGRATE_ALL_PAGES		(0x1 << 29)
>> +#define PREFERRED_LOC_ATOMIC_DEVICE	(0x1 << 30)
>> +#define PREFERRED_LOC_ATOMIC_GL		(0x1ull << 31)
>> +#define PREFERRED_LOC_ATOMIC_CPU	(0x1ull << 32)
>> +#define MADVISE_MULTI_VMA		(0x1ull << 33)
>> +#define MADVISE_SPLIT_VMA		(0x1ull << 34)
>> +#define MADVISE_ATOMIC_VMA		(0x1ull << 35)
>> +#define PREFETCH_SPLIT_VMA		(0x1ull << 36)
>> +#define PREFETCH_CHANGE_ATTR		(0x1ull << 37)
>> +#define PREFETCH_SAME_ATTR		(0x1ull << 38)
>>
>>   #define N_MULTI_FAULT           4
>>
>> @@ -478,6 +508,47 @@ static void __aligned_partial_free(struct
>> aligned_alloc_type  *aligned_alloc_typ
>>    * SUBTEST: processes-evict-malloc-mix-bo
>>    * Description: multi-process trigger eviction of VRAM allocated via malloc
>> and BO create
>>    * Test category: stress test
>> + *
>> + * SUBTEST: madvise-multi-vma
>> + * Description: performs multiple madvise operations on multiple
>> + virtual memory area using atomic device attributes
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-split-vma
>> + * Description: perform madvise operations on multiple type VMAs (BO
>> + and CPU VMAs)
>> + * Test category: perform madvise operations on multiple type VMAs (BO
>> + and CPU VMAs)
>> + *
>> + * SUBTEST: madvise-atomic-vma
>> + * Description: perform madvise atomic operations on BO in VRAM/SMEM if
>> + atomic ATTR global/device
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-split-vma-with-mapping
>> + * Description: performs prefetch and page migration
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-preffered-loc-atomic-vram
>> + * Description: performs both atomic and preferred loc madvise
>> + operations atomic device attributes set
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-preffered-loc-atomic-gl
>> + * Description: performs both atomic and preferred loc madvise
>> + operations with atomic global attributes set
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-preffered-loc-atomic-cpu
>> + * Description: performs both atomic and preferred loc madvise
>> + operations with atomic cpu attributes set
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-preffered-loc-sram-migrate-pages
>> + * Description: performs preferred loc madvise operations and migrating
>> + all pages in smem
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-no-range-invalidate-same-attr
>> + * Description: performs atomic global madvise operation, prefetch and
>> + again madvise operation with same atomic attribute
>> + * Test category: functionality test
>> + *
>> + * SUBTEST: madvise-range-invalidate-change-attr
>> + * Description: performs atomic global madvise operation, prefetch and
>> + again madvise operation with different atomic attribute
>> + * Test category: functionality test
>> + *
>>    */
>>
>>   static void
>> @@ -544,7 +615,7 @@ many_allocs(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   		allocs[i] = alloc;
>>
>>   		touch_all_pages(fd, exec_queue, allocs[i].ptr, alloc_size,
>> stride,
>> -				&tv, &submit);
>> +				&tv, &submit, flags & ATOMIC_BATCH);
>>   	}
>>
>>   	if (barrier)
>> @@ -692,7 +763,7 @@ partial(int fd, struct drm_xe_engine_class_instance
>> *eci, unsigned int flags)
>>   		.num_syncs = 1,
>>   		.syncs = to_user_pointer(sync),
>>   	};
>> -	struct {
>> +	struct batch_data {
> Any reason batch_data is added , no difference between these two declarations. If not needed can you please drop
batch_data was already part of original xe_exec_system_allocator. For 
newly added madvise tests new structure has been defined with extra 
members.
>>   		uint32_t batch[16];
>>   		uint64_t pad;
>>   		uint64_t vm_sync;
>> @@ -750,7 +821,8 @@ partial(int fd, struct drm_xe_engine_class_instance
>> *eci, unsigned int flags)
>>   		uint64_t sdi_addr = addr + sdi_offset;
>>   		int b = 0;
>>
>> -		write_dword(data[i].batch, sdi_addr,
>> WRITE_VALUE(&data[i], i), &b);
>> +		write_dword((struct test_exec_data *)&data[i], sdi_addr,
>> WRITE_VALUE(&data[i], i),
>> +			     &b, ATOMIC_BATCH ? true : false);
>>   		igt_assert(b <= ARRAY_SIZE(data[i].batch));
>>
>>   		if (!i)
>> @@ -773,7 +845,10 @@ partial(int fd, struct drm_xe_engine_class_instance
>> *eci, unsigned int flags)
>>   		xe_wait_ufence(fd, new ?: exec_ufence,
>> USER_FENCE_VALUE,
>>   			       exec_queue, FIVE_SEC);
>>   		if (i || (flags & CPU_FAULT))
>> -			igt_assert_eq(data[i].data, READ_VALUE(&data[i]));
>> +			igt_assert_eq(data[i].data,
>> +				      flags & ATOMIC_BATCH
>> +				      ? VAL_ATOMIC_EXPECTED
>> +				      : READ_VALUE(&data[i]));
>>   		exec_ufence[0] = 0;
>>
>>   		if (!i) {
>> @@ -1072,16 +1147,6 @@ partial(int fd, struct
>> drm_xe_engine_class_instance *eci, unsigned int flags)
>>    * Description: Create multiple threads with a faults on different hardware
>> engines to same addresses, racing between CPU and GPU access
>>    * Test category: stress test
>>    */
>> -
>> -struct test_exec_data {
>> -	uint32_t batch[32];
>> -	uint64_t pad;
>> -	uint64_t vm_sync;
>> -	uint64_t exec_sync;
>> -	uint32_t data;
>> -	uint32_t expected_data;
>> -};
>> -
>>   static void igt_require_hugepages(void)  {
>>   	igt_skip_on_f(!igt_get_meminfo("HugePages_Total"),
>> @@ -1090,11 +1155,52 @@ static void igt_require_hugepages(void)
>>   		      "No huge pages available!\n");
>>   }
>>
>> +static int
>> +xe_vm_madvixe_pat_attr(int fd, uint32_t vm, uint64_t addr, uint64_t
>> range,
>> +			int pat_index)
>> +{
>> +	int err;
>> +
>> +	err = xe_vm_madvise(fd, vm, addr, range, 0,
>> +			    DRM_XE_MEM_RANGE_ATTR_PAT, pat_index, 0);
>> +	if (err)
>> +		return err;
>> +	return 0;
>> +}
>> +
> this more simplified way. In failure case we are asserting  can you please is this possible?
> static int xe_vm_madvise_pat_attr(int fd, uint32_t vm, uint64_t addr, uint64_t range, int pat_index)
> {
>      return xe_vm_madvise(fd, vm, addr, range, 0,
>                           DRM_XE_MEM_RANGE_ATTR_PAT, pat_index, 0);
> }
>
>> +static int
>> +xe_vm_madvise_atomic_attr(int fd, uint32_t vm, uint64_t addr, uint64_t
>> range,
>> +			  int mem_attr)
>> +{
>> +	int err;
>> +
>> +	err = xe_vm_madvise(fd, vm, addr, range, 0,
>> +			    DRM_XE_MEM_RANGE_ATTR_ATOMIC,
>> +			    mem_attr, 0);
>> +	if (err)
>> +		return err;
>> +	return 0;
>> +}
>> +
>> +static int
>> +xe_vm_madvise_migrate_pages(int fd, uint32_t vm, uint64_t addr,
>> +uint64_t range) {
>> +	int err;
>> +
>> +	err = xe_vm_madvise(fd, vm, addr, range, 0,
>> +			    DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC,
>> +			    DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM,
>> +			    DRM_XE_MIGRATE_ALL_PAGES);
>> +	if (err)
>> +		return err;
>> +	return 0;
>> +}
>> +
> Same here as well
>>   static void
>>   test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>>   	  int n_exec_queues, int n_execs, size_t bo_size,
>>   	  size_t stride, uint32_t vm, void *alloc, pthread_barrier_t *barrier,
>> -	  unsigned int flags)
>> +	  unsigned long long flags)
>>   {
>>   	uint64_t addr;
>>   	struct drm_xe_sync sync[1] = {
>> @@ -1107,9 +1213,10 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   		.syncs = to_user_pointer(sync),
>>   	};
>>   	uint32_t exec_queues[MAX_N_EXEC_QUEUES];
>> -	struct test_exec_data *data, *next_data = NULL;
>> +	struct test_exec_data *data, *next_data = NULL, *org_data;
>>   	uint32_t bo_flags;
>>   	uint32_t bo = 0, bind_sync = 0;
>> +	uint32_t val;
>>   	void **pending_free;
>>   	u64 *exec_ufence = NULL, *bind_ufence = NULL;
>>   	int i, j, b, file_fd = -1, prev_idx, pf_count, err; @@ -1234,6 +1341,133
>> @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>>   				 strerror(errno), vm, to_user_pointer(data),
>> bo_size);
>>   	}
>>
>> +	if (flags & PREFERRED_LOC_SMEM) {
>> +		if (flags & MIGRATE_ALL_PAGES) {
>> +			err = xe_vm_madvise_migrate_pages(fd, vm,
>> to_user_pointer(data), bo_size);
>> +			if (err)
>> +				igt_warn("failure in page migration err = %s,
>> vm =%u, data=%"PRIu64"\n",
>> +					 strerror(errno), vm,
>> to_user_pointer(data));
>> +		} else {
>> +			err = xe_vm_madvise(fd, vm, to_user_pointer(data),
>> bo_size, 0,
>> +
>> DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC,
>> +
>> DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM, 0);
>> +		}
>> +	}
>> +	if (flags & PREFERRED_LOC_ATOMIC_DEVICE) {
>> +		err = xe_vm_madvise_migrate_pages(fd, vm,
>> to_user_pointer(data), bo_size);
>> +		if (err)
>> +			igt_warn("failure in page migration err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data), bo_size,
>> +						DRM_XE_ATOMIC_DEVICE);
>> +		if (err)
>> +			igt_warn("failure in atomic device attr err = %s, vm
>> =%u data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +	if (flags & PREFERRED_LOC_ATOMIC_GL) {
>> +		err = xe_vm_madvise_migrate_pages(fd, vm,
>> to_user_pointer(data), bo_size);
>> +		if (err)
>> +			igt_warn("failure in page migration err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data), bo_size,
>> +						DRM_XE_ATOMIC_GLOBAL);
>> +		if (err)
>> +			igt_warn("failure in atomic global attr err = %s, vm
>> =%u data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +	if (flags & PREFERRED_LOC_ATOMIC_CPU) {
>> +		err = xe_vm_madvise_migrate_pages(fd, vm,
>> to_user_pointer(data), bo_size);
>> +		if (err)
>> +			igt_warn("failure in page migration err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data), bo_size,
>> +						DRM_XE_ATOMIC_CPU);
>> +		if (err)
>> +			igt_warn("failure in atomic cpu attr err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +	if (flags & MADVISE_MULTI_VMA) {
>> +		if (bo_size)
>> +			bo_size = ALIGN(bo_size, SZ_4K);
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data) + bo_size/2,
>> +						bo_size/2,
>> DRM_XE_ATOMIC_DEVICE);
>> +		if (err)
>> +			igt_warn("failure in atomic device attr err = %s, vm
>> =%u data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data) + bo_size,
>> +						bo_size,
>> DRM_XE_ATOMIC_DEVICE);
>> +		if (err)
>> +			igt_warn("failure in atomic multi_vma err = %s, vm
>> =%u data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvixe_pat_attr(fd, vm,
>> to_user_pointer(data), bo_size, 4);
>> +		if (err)
>> +			igt_warn("failure in pat attr index 4 err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvixe_pat_attr(fd, vm,
>> to_user_pointer(data), bo_size, 3);
>> +		if (err)
>> +			igt_warn("failure in pat attr index 3 err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +
>> +		err = xe_vm_madvixe_pat_attr(fd, vm,
>> to_user_pointer(data) + bo_size/2, bo_size/2,
>> +					     8);
> PAT index values differ across platforms.
> It is better to use the library functions in lib/intel_pat to obtain the correct PAT index values for each platform.
> -- Pravalika
>
>> +		if (err)
>> +			igt_warn("failure in pat attr index 8 err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +	if (flags & MADVISE_SPLIT_VMA) {
>> +		if (bo_size)
>> +			bo_size = ALIGN(bo_size, SZ_4K);
>> +
>> +		bo_flags =
>> DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
>> +		bo = xe_bo_create(fd, vm, bo_size,
>> +				  vram_if_possible(fd, eci->gt_id), bo_flags);
>> +		xe_vm_bind_async(fd, vm, 0, bo, 0, to_user_pointer(data) +
>> bo_size/2,
>> +				 bo_size/2, 0, 0);
>> +
>> +		__xe_vm_bind_assert(fd, vm, 0, 0, 0, to_user_pointer(data)
>> + bo_size/2,
>> +				    bo_size/2, DRM_XE_VM_BIND_OP_MAP,
>> +
>> DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR, sync,
>> +				    1, 0, 0);
>> +		xe_wait_ufence(fd, &data[0].vm_sync,
>> USER_FENCE_VALUE, 0, FIVE_SEC);
>> +		data[0].vm_sync = 0;
>> +		gem_close(fd, bo);
>> +		bo = 0;
>> +
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data),
>> +						bo_size/2,
>> DRM_XE_ATOMIC_DEVICE);
>> +		if (err)
>> +			igt_warn("failure in split atomic device attr err = %s,
>> vm =%u data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +	if (flags & MADVISE_ATOMIC_VMA) {
>> +		if (bo_size)
>> +			bo_size = ALIGN(bo_size, SZ_4K);
>> +
>> +		bo_flags =
>> DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
>> +		bo = xe_bo_create(fd, vm, bo_size, vram_if_possible(fd, eci-
>>> gt_id), bo_flags);
>> +		xe_vm_bind_async(fd, vm, 0, bo, 0, to_user_pointer(data),
>> bo_size, 0,
>> +0);
>> +
>> +		__xe_vm_bind_assert(fd, vm, 0, 0, 0, to_user_pointer(data),
>> bo_size,
>> +				    DRM_XE_VM_BIND_OP_MAP,
>> +
>> DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR, sync,
>> +				    1, 0, 0);
>> +		xe_wait_ufence(fd, &data[0].vm_sync,
>> USER_FENCE_VALUE, 0, FIVE_SEC);
>> +		data[0].vm_sync = 0;
>> +		gem_close(fd, bo);
>> +		bo = 0;
>> +
>> +		err = xe_vm_madvise_atomic_attr(fd, vm,
>> to_user_pointer(data), bo_size/2,
>> +						DRM_XE_ATOMIC_GLOBAL);
>> +		if (err)
>> +			igt_warn("failure in atomic vma err = %s, vm =%u
>> data=%"PRIu64"\n",
>> +				 strerror(errno), vm, to_user_pointer(data));
>> +	}
>> +
>>   	if (flags & BO_UNMAP) {
>>   		bo_flags =
>> DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
>>   		bo = xe_bo_create(fd, vm, bo_size,
>> @@ -1307,6 +1541,16 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   		bool fault_inject = (FAULT & flags) && i == n_execs / 2;
>>   		bool fault_injected = (FAULT & flags) && i > n_execs;
>>
>> +		if (flags & MADVISE_MULTI_VMA) {
>> +			addr = addr + bo_size;
>> +			org_data = data;
>> +			data = from_user_pointer(addr);
>> +			batch_offset = (char *)&(data[idx].batch) - (char
>> *)data;
>> +			batch_addr = addr + batch_offset;
>> +			sdi_offset = (char *)&(data[idx].data) - (char *)data;
>> +			sdi_addr = addr + sdi_offset;
>> +		}
>> +
>>   		if (barrier)
>>   			pthread_barrier_wait(barrier);
>>
>> @@ -1316,18 +1560,74 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   				__write_dword(data[idx].batch,
>>   					      sdi_addr + j * orig_size,
>>   					      WRITE_VALUE(&data[idx], idx),
>> &b);
>> -			write_dword(data[idx].batch, sdi_addr + j *
>> orig_size,
>> -				    WRITE_VALUE(&data[idx], idx), &b);
>> +			write_dword(&data[idx], sdi_addr + j * orig_size,
>> +				    WRITE_VALUE(&data[idx], idx), &b,
>> +				    flags & ATOMIC_BATCH ? true : false);
>>   			igt_assert(b <= ARRAY_SIZE(data[idx].batch));
>>   		} else if (!(flags & EVERY_OTHER_CHECK)) {
>>   			b = 0;
>> -			write_dword(data[idx].batch, sdi_addr,
>> -				    WRITE_VALUE(&data[idx], idx), &b);
>> +			write_dword(&data[idx], sdi_addr,
>> +				    WRITE_VALUE(&data[idx], idx), &b,
>> +				    flags & ATOMIC_BATCH ? true : false);
>>   			igt_assert(b <= ARRAY_SIZE(data[idx].batch));
>> +			if (flags & PREFETCH) {
>> +				if (flags & PREFETCH_SPLIT_VMA) {
>> +					bo_size = ALIGN(bo_size, SZ_4K);
>> +
>> +					xe_vm_prefetch_async(fd, vm, 0, 0,
>> addr, bo_size, NULL, 0, 0);
>> +
>> +					igt_info("num_vmas before madvise
>> = %d \n", val);
>> +
>> +					val =
>> xe_vm_print_mem_attr_values_in_range(fd, vm, addr,
>> +bo_size);
>> +
>> +					err =
>> xe_vm_madvise_migrate_pages(fd, vm, to_user_pointer(data), bo_size/2);
>> +					if (err)
>> +						igt_warn("failure in prefetch
>> split vma err = %s, vm =%u data=%"PRIu64"\n",
>> +
>> 	strerror(errno), vm, to_user_pointer(data));
>> +					igt_info("num_vmas after madvise=
>> %d \n", val);
>> +					val =
>> xe_vm_print_mem_attr_values_in_range(fd, vm, addr,  bo_size);
>> +				} else if (flags & PREFETCH_SAME_ATTR) {
>> +					err =
>> xe_vm_madvise_atomic_attr(fd, vm, to_user_pointer(data), bo_size,
>> +
>> 	DRM_XE_ATOMIC_GLOBAL);
>> +					if (err)
>> +						igt_warn("failure in prefetch
>> same attr err = %s, vm =%u data=%"PRIu64"\n",
>> +
>> 	strerror(errno), vm, to_user_pointer(data));
>> +					val =
>> xe_vm_print_mem_attr_values_in_range(fd, vm, addr,  bo_size);
>> +					xe_vm_prefetch_async(fd, vm, 0, 0,
>> addr, bo_size, NULL, 0, DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC);
>> +					err =
>> xe_vm_madvise_atomic_attr(fd, vm, to_user_pointer(data), bo_size/2,
>> +
>> 	DRM_XE_ATOMIC_GLOBAL);
>> +					if (err)
>> +						igt_warn("failure in prefetch
>> atomic attr err = %s, vm =%u data=%"PRIu64"\n",
>> +
>> 	strerror(errno), vm, to_user_pointer(data));
>> +				} else if (flags & PREFETCH_CHANGE_ATTR) {
>> +					err =
>> xe_vm_madvise_atomic_attr(fd, vm, to_user_pointer(data), bo_size,
>> +
>> 	DRM_XE_ATOMIC_GLOBAL);
>> +					if (err)
>> +						igt_warn("failure in prefetch
>> atomic attr err = %s, vm =%u data=%"PRIu64"\n",
>> +
>> 	strerror(errno), vm, to_user_pointer(data));
>> +					val =
>> xe_vm_print_mem_attr_values_in_range(fd, vm, addr,
>> +bo_size);
>> +
>> +					xe_vm_prefetch_async(fd, vm, 0, 0,
>> addr, bo_size, NULL, 0,
>> +DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC);
>> +
>> +					err =
>> xe_vm_madvise_atomic_attr(fd, vm, to_user_pointer(data), bo_size,
>> +
>> 	DRM_XE_ATOMIC_DEVICE);
>> +					if (err)
>> +						igt_warn("failure in prefetch
>> change attr err = %s, vm =%u data=%"PRIu64"\n",
>> +
>> 	strerror(errno), vm, to_user_pointer(data));
>> +					val =
>> xe_vm_print_mem_attr_values_in_range(fd, vm, addr,  bo_size);
>> +					}
>> +				} else {
>> +					b = 0;
>> +					write_dword((struct test_exec_data
>> *)&data[idx], sdi_addr,
>> +						     WRITE_VALUE(&data[idx],
>> idx), &b,
>> +						     flags & ATOMIC_BATCH ?
>> true : false);
>> +						     igt_assert(b <=
>> ARRAY_SIZE(data[idx].batch));
>> +				}
>>   		} else if (flags & EVERY_OTHER_CHECK && !odd(i)) {
>>   			b = 0;
>> -			write_dword(data[idx].batch, sdi_addr,
>> -				    WRITE_VALUE(&data[idx], idx), &b);
>> +			write_dword(&data[idx], sdi_addr,
>> +				    WRITE_VALUE(&data[idx], idx), &b,
>> +				    flags & ATOMIC_BATCH ? true : false);
>>   			igt_assert(b <= ARRAY_SIZE(data[idx].batch));
>>
>>   			aligned_alloc_type = __aligned_alloc(aligned_size,
>> bo_size); @@ -1346,10 +1646,11 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   			__aligned_partial_free(&aligned_alloc_type);
>>
>>   			b = 0;
>> -			write_dword(data[next_idx].batch,
>> +			write_dword(&data[next_idx],
>>   				    to_user_pointer(next_data) +
>>   				    (char *)&data[next_idx].data - (char
>> *)data,
>> -				    WRITE_VALUE(&data[next_idx], next_idx),
>> &b);
>> +				    WRITE_VALUE(&data[next_idx], next_idx),
>> &b,
>> +				    flags & ATOMIC_BATCH ? true : false);
>>   			igt_assert(b <= ARRAY_SIZE(data[next_idx].batch));
>>   		}
>>
>> @@ -1404,9 +1705,18 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   					       exec_queues[e], &timeout);
>>   			igt_assert(err == -ETIME || err == -EIO);
>>   		} else {
>> -			xe_wait_ufence(fd, exec_ufence ? exec_ufence :
>> -				       &data[idx].exec_sync,
>> USER_FENCE_VALUE,
>> -				       exec_queues[e], FIVE_SEC);
>> +			if (flags & PREFERRED_LOC_ATOMIC_CPU) {
>> +				int64_t timeout = QUARTER_SEC;
>> +				err = __xe_wait_ufence(fd, exec_ufence ?
>> exec_ufence :
>> +						       &data[idx].exec_sync,
>> +						       USER_FENCE_VALUE,
>> +						       exec_queues[e],
>> &timeout);
>> +				if (err)
>> +					goto cleanup;
>> +			} else
>> +				xe_wait_ufence(fd, exec_ufence ?
>> exec_ufence :
>> +					       &data[idx].exec_sync,
>> USER_FENCE_VALUE,
>> +					       exec_queues[e], FIVE_SEC);
>>   			if (flags & LOCK && !i)
>>   				munlock(data, bo_size);
>>
>> @@ -1456,17 +1766,17 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   				if (flags & FORK_READ) {
>>   					igt_fork(child, 1)
>>   						igt_assert_eq(data[idx].data,
>> -
>> READ_VALUE(&data[idx]));
>> +							      flags &
>> ATOMIC_BATCH ? VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[idx]));
>>   					if (!(flags & FORK_READ_AFTER))
>>   						igt_assert_eq(data[idx].data,
>> -
>> READ_VALUE(&data[idx]));
>> +							      flags &
>> ATOMIC_BATCH ? VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[idx]));
>>   					igt_waitchildren();
>>   					if (flags & FORK_READ_AFTER)
>>   						igt_assert_eq(data[idx].data,
>> -
>> READ_VALUE(&data[idx]));
>> +							      flags &
>> ATOMIC_BATCH ? VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[idx]));
>>   				} else {
>>   					igt_assert_eq(data[idx].data,
>> -
>> READ_VALUE(&data[idx]));
>> +						      flags & ATOMIC_BATCH ?
>> VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[idx]));
>>
>>   					if (flags &
>> PREFETCH_SYS_BENCHMARK) {
>>   						struct timespec tv = {};
>> @@ -1494,13 +1804,13 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   								((void *)data)
>> + j * orig_size;
>>
>>
>> 	igt_assert_eq(__data[idx].data,
>> -
>> READ_VALUE(&data[idx]));
>> +								      flags &
>> ATOMIC_BATCH ? VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[idx]));
>>   						}
>>   					}
>>   				}
>>   				if (flags & EVERY_OTHER_CHECK)
>>   					igt_assert_eq(data[prev_idx].data,
>> -
>> READ_VALUE(&data[prev_idx]));
>> +						      flags & ATOMIC_BATCH ?
>> VAL_ATOMIC_EXPECTED :
>> +READ_VALUE(&data[prev_idx]));
>>   			}
>>   		}
>>
>> @@ -1521,6 +1831,9 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   			gem_close(fd, bo);
>>   		}
>>
>> +		if (flags & MADVISE_MULTI_VMA)
>> +			data = org_data;
>> +
>>   		if (flags & NEW) {
>>   			if (flags & MMAP) {
>>   				if (flags & FREE)
>> @@ -1610,6 +1923,7 @@ test_exec(int fd, struct
>> drm_xe_engine_class_instance *eci,
>>   				 pf_count, pf_count_after);
>>   	}
>>
>> +cleanup:
>>   	if (bo) {
>>   		sync[0].addr = to_user_pointer(bind_ufence);
>>   		__xe_vm_bind_assert(fd, vm, 0,
>> @@ -1864,7 +2178,7 @@ processes(int fd, int n_exec_queues, int n_execs,
>> size_t bo_size,
>>
>>   struct section {
>>   	const char *name;
>> -	unsigned int flags;
>> +	unsigned long long flags;
>>   };
>>
>>   igt_main
>> @@ -1964,6 +2278,19 @@ igt_main
>>   		{ "malloc-mix-bo", MIX_BO_ALLOC },
>>   		{ NULL },
>>   	};
>> +	const struct section msections[] = {
>> +		{ "preffered-loc-sram-migrate-pages",
>> PREFERRED_LOC_SMEM | MIGRATE_ALL_PAGES | ATOMIC_BATCH },
>> +		{ "preffered-loc-atomic-vram",
>> PREFERRED_LOC_ATOMIC_DEVICE | ATOMIC_BATCH },
>> +		{ "preffered-loc-atomic-gl", PREFERRED_LOC_ATOMIC_GL |
>> ATOMIC_BATCH },
>> +		{ "preffered-loc-atomic-cpu",
>> PREFERRED_LOC_ATOMIC_CPU | ATOMIC_BATCH },
>> +		{ "multi-vma", MADVISE_MULTI_VMA | ATOMIC_BATCH },
>> +		{ "split-vma", MADVISE_SPLIT_VMA | ATOMIC_BATCH },
>> +		{ "atomic-vma", MADVISE_ATOMIC_VMA | ATOMIC_BATCH
>> },
>> +		{ "split-vma-with-mapping", PREFETCH |
>> PREFETCH_SPLIT_VMA | ATOMIC_BATCH },
>> +		{ "range-invalidate-change-attr", PREFETCH |
>> PREFETCH_CHANGE_ATTR | ATOMIC_BATCH },
>> +		{ "no-range-invalidate-same-attr", PREFETCH |
>> PREFETCH_SAME_ATTR | ATOMIC_BATCH },
>> +		{ NULL },
>> +	};
>>   	int fd;
>>   	int num_sections;
>>
>> @@ -1983,15 +2310,22 @@ igt_main
>>   	for (const struct section *s = sections; s[num_sections].name;
>> num_sections++)
>>   		;
>>
>> -	for (int i = 0; i < num_sections * 2; i++) {
>> -		struct section *s = &sections[i % num_sections];
>> +	for (int i = 0; i < num_sections * 3; i++) {
>> +		struct section p = sections[i % num_sections];
>> +		struct section *s = &p;
>>
>> -		if (i/num_sections == 0) {
>> +		if (i/num_sections == 1) {
>>   			static char modified_name[256];
>>   			snprintf(modified_name, sizeof(modified_name),
>> "%s-preferred-loc-smem", s->name);
>>   			s->name = modified_name;
>>   			s->flags |= PREFERRED_LOC_SMEM;
>>   		}
>> +		if (i/num_sections == 2) {
>> +			static char modified_name[256];
>> +			snprintf(modified_name, sizeof(modified_name),
>> "atomic-batch-%s", s->name);
>> +			s->name = modified_name;
>> +			s->flags |= ATOMIC_BATCH;
>> +		}
>>
>>   		igt_subtest_f("once-%s", s->name)
>>   			xe_for_each_engine(fd, hwe)
>> @@ -2175,6 +2509,14 @@ igt_main
>>   			processes_evict(fd, SZ_8M, SZ_1M, s->flags);
>>   	}
>>
>> +	for (const struct section *s = msections; s->name; s++) {
>> +		igt_subtest_f("madvise-%s", s->name) {
>> +			xe_for_each_engine(fd, hwe)
>> +				test_exec(fd, hwe, 1, 1, SZ_64K, 0, 0, NULL,
>> +						NULL, s->flags);
>> +		}
>> +	}
>> +
>>   	igt_fixture {
>>   		xe_device_put(fd);
>>   		drm_close_driver(fd);
>> --
>> 2.43.0