[PATCH i-g-t v3 06/10] tests/intel/xe_svm: Add support for GPU atomic access test for svm

Fri May 17 11:46:54 UTC 2024

Verify GPU atomic access using multiple threads by performing operations on
randomly allocated locations within malloc'ed memory in shared virtual memory.

Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu at intel.com>
Cc: Oak Zeng <oak.zeng at intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
 lib/xe/xe_util.c     | 11 +++++++
 lib/xe/xe_util.h     |  1 +
 tests/intel/xe_svm.c | 71 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)

diff --git a/lib/xe/xe_util.c b/lib/xe/xe_util.c
index de848b8bc..672e9dcef 100644
--- a/lib/xe/xe_util.c
+++ b/lib/xe/xe_util.c
@@ -117,6 +117,17 @@ void insert_store(uint32_t *batch, uint64_t dst_va, uint32_t val)
 	batch[++i] = MI_BATCH_BUFFER_END;
 }
 
+// Function to insert atomic increment command
+void insert_atomic_inc(uint32_t *batch, uint64_t dst_va, uint32_t val)
+{
+	int i = 0;
+
+	batch[i] = MI_ATOMIC | MI_ATOMIC_INC;
+	batch[++i] = dst_va;
+	batch[++i] = dst_va >> 32;
+	batch[++i] = MI_BATCH_BUFFER_END;
+}
+
 /**
  * Creates a command buffer, fills it with commands using the provided fill
  * function, and sets up the execution queue for submission.
diff --git a/lib/xe/xe_util.h b/lib/xe/xe_util.h
index c38f79e60..46e1ccc9a 100644
--- a/lib/xe/xe_util.h
+++ b/lib/xe/xe_util.h
@@ -40,6 +40,7 @@ void xe_create_cmdbuf(struct xe_buffer *cmd_buf, cmdbuf_fill_func_t fill_func,
 uint64_t xe_cmdbuf_exec_ufence_gpuva(struct xe_buffer *cmd_buf);
 uint64_t *xe_cmdbuf_exec_ufence_cpuva(struct xe_buffer *cmd_buf);
 void insert_store(uint32_t *batch, uint64_t dst_va, uint32_t val);
+void insert_atomic_inc(uint32_t *batch, uint64_t dst_va, uint32_t val);
 void xe_submit_cmd(struct xe_buffer *cmdbuf);
 int64_t __xe_submit_cmd(struct xe_buffer *cmdbuf);
 void xe_destroy_buffer(struct xe_buffer *buffer);
diff --git a/tests/intel/xe_svm.c b/tests/intel/xe_svm.c
index d9629246c..f9e8eb2d9 100644
--- a/tests/intel/xe_svm.c
+++ b/tests/intel/xe_svm.c
@@ -33,6 +33,9 @@
  *
  * SUBTEST: svm-huge-page
  * Description: verify SVM basic functionality by using huge page access
+ *
+ * SUBTEST: svm-atomic-access
+ * Description: verify SVM basic functionality by using GPU atomic access any location in malloc'ed memory
  */
 
 #include <fcntl.h>
@@ -47,6 +50,18 @@
 #include "xe/xe_ioctl.h"
 #include "xe/xe_query.h"
 
+#define NUM_THREADS 10
+
+// Thread argument structure
+typedef struct {
+	int fd;
+	uint32_t vm;
+	void *gpu_va;
+	uint64_t dst_va;
+	uint32_t val;
+	struct drm_xe_engine_class_instance *eci;
+} thread_args_t;
+
 /**
  *  @brief Verifies basic workload execution on the GPU.
  *
@@ -202,6 +217,58 @@ static void svm_thp(int fd, uint32_t vm, struct drm_xe_engine_class_instance *ec
 	free(dst);
 }
 
+
+// Thread function for submitting atomic increment commands
+static void* thread_func(void* args)
+{
+	thread_args_t *thread_args = (thread_args_t *)args;
+	struct xe_buffer cmd_buf = {
+		.fd = thread_args->fd,
+		.gpu_addr = (void *)(uintptr_t)thread_args->gpu_va,
+		.vm = thread_args->vm,
+		.size = xe_bb_size(thread_args->fd, PAGE_ALIGN_UFENCE),
+		.placement = vram_if_possible(thread_args->fd, thread_args->eci->gt_id),
+		.flag = DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+	};
+
+	xe_create_cmdbuf(&cmd_buf, insert_atomic_inc, thread_args->dst_va, thread_args->val, thread_args->eci);
+	xe_submit_cmd(&cmd_buf);
+
+	xe_destroy_cmdbuf(&cmd_buf);
+
+	return NULL;
+}
+
+// Test GPU atomic access with multiple threads
+static void svm_atomic_access(int fd, uint32_t vm, struct drm_xe_engine_class_instance *eci)
+{
+	uint64_t gpu_va = 0x1a0000;
+	int val = 0xc0ffee;
+	uint32_t *dst, *dst_to_access;
+	uint32_t size = 1024 * 1024, sz_dw = size / 4;
+	pthread_t threads[NUM_THREADS];
+
+	dst = aligned_alloc(xe_get_default_alignment(fd), size);
+	dst_to_access = dst + (rand() % sz_dw);
+	*dst_to_access = val;
+
+	thread_args_t thread_args = { fd, vm, (void *)(uintptr_t)gpu_va, (uint64_t)dst_to_access, val, eci };
+
+	// Create and launch threads
+	for (int i = 0; i < NUM_THREADS; i++) {
+		pthread_create(&threads[i], NULL, thread_func, &thread_args);
+	}
+
+	// Wait for all threads to finish
+	for (int i = 0; i < NUM_THREADS; i++) {
+		pthread_join(threads[i], NULL);
+	}
+
+	igt_assert_eq(*dst_to_access, val + NUM_THREADS);
+
+	free(dst);
+}
+
 igt_main
 {
 	int fd;
@@ -234,6 +301,10 @@ igt_main
 		xe_for_each_engine(fd, hwe)
 			svm_thp(fd, vm, hwe);
 
+	igt_subtest_f("svm-atomic-access")
+		xe_for_each_engine(fd, hwe)
+			svm_atomic_access(fd, vm, hwe);
+
 	igt_fixture {
 		xe_vm_destroy(fd, vm);
 		drm_close_driver(fd);
-- 
2.25.1