[PATCH i-g-t v3 06/10] tests/intel/xe_svm: Add support for GPU atomic access test for svm
Zeng, Oak
oak.zeng at intel.com
Sat May 18 02:16:18 UTC 2024
> -----Original Message-----
> From: Bommu, Krishnaiah <krishnaiah.bommu at intel.com>
> Sent: Friday, May 17, 2024 7:47 AM
> To: igt-dev at lists.freedesktop.org
> Cc: Bommu, Krishnaiah <krishnaiah.bommu at intel.com>; Zeng, Oak
> <oak.zeng at intel.com>; Ghimiray, Himal Prasad
> <himal.prasad.ghimiray at intel.com>
> Subject: [PATCH i-g-t v3 06/10] tests/intel/xe_svm: Add support for GPU atomic
> access test for svm
>
> Verify GPU atomic access using multiple threads by performing operations on
> randomly allocated locations within malloc'ed memory in shared virtual memory.
>
> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu at intel.com>
> Cc: Oak Zeng <oak.zeng at intel.com>
> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
> lib/xe/xe_util.c | 11 +++++++
> lib/xe/xe_util.h | 1 +
> tests/intel/xe_svm.c | 71 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 83 insertions(+)
>
> diff --git a/lib/xe/xe_util.c b/lib/xe/xe_util.c
> index de848b8bc..672e9dcef 100644
> --- a/lib/xe/xe_util.c
> +++ b/lib/xe/xe_util.c
> @@ -117,6 +117,17 @@ void insert_store(uint32_t *batch, uint64_t dst_va,
> uint32_t val)
> batch[++i] = MI_BATCH_BUFFER_END;
> }
>
> +// Function to insert atomic increment command
> +void insert_atomic_inc(uint32_t *batch, uint64_t dst_va, uint32_t val)
> +{
> + int i = 0;
You can write:
(void)val;
To annotate the parameter val is not used.
> +
> + batch[i] = MI_ATOMIC | MI_ATOMIC_INC;
> + batch[++i] = dst_va;
> + batch[++i] = dst_va >> 32;
> + batch[++i] = MI_BATCH_BUFFER_END;
> +}
> +
> /**
> * Creates a command buffer, fills it with commands using the provided fill
> * function, and sets up the execution queue for submission.
> diff --git a/lib/xe/xe_util.h b/lib/xe/xe_util.h
> index c38f79e60..46e1ccc9a 100644
> --- a/lib/xe/xe_util.h
> +++ b/lib/xe/xe_util.h
> @@ -40,6 +40,7 @@ void xe_create_cmdbuf(struct xe_buffer *cmd_buf,
> cmdbuf_fill_func_t fill_func,
> uint64_t xe_cmdbuf_exec_ufence_gpuva(struct xe_buffer *cmd_buf);
> uint64_t *xe_cmdbuf_exec_ufence_cpuva(struct xe_buffer *cmd_buf);
> void insert_store(uint32_t *batch, uint64_t dst_va, uint32_t val);
> +void insert_atomic_inc(uint32_t *batch, uint64_t dst_va, uint32_t val);
> void xe_submit_cmd(struct xe_buffer *cmdbuf);
> int64_t __xe_submit_cmd(struct xe_buffer *cmdbuf);
> void xe_destroy_buffer(struct xe_buffer *buffer);
> diff --git a/tests/intel/xe_svm.c b/tests/intel/xe_svm.c
> index d9629246c..f9e8eb2d9 100644
> --- a/tests/intel/xe_svm.c
> +++ b/tests/intel/xe_svm.c
> @@ -33,6 +33,9 @@
> *
> * SUBTEST: svm-huge-page
> * Description: verify SVM basic functionality by using huge page access
> + *
> + * SUBTEST: svm-atomic-access
> + * Description: verify SVM basic functionality by using GPU atomic access any
> location in malloc'ed memory
> */
>
> #include <fcntl.h>
> @@ -47,6 +50,18 @@
> #include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
>
> +#define NUM_THREADS 10
> +
> +// Thread argument structure
> +typedef struct {
> + int fd;
> + uint32_t vm;
> + void *gpu_va;
> + uint64_t dst_va;
> + uint32_t val;
> + struct drm_xe_engine_class_instance *eci;
> +} thread_args_t;
> +
> /**
> * @brief Verifies basic workload execution on the GPU.
> *
> @@ -202,6 +217,58 @@ static void svm_thp(int fd, uint32_t vm, struct
> drm_xe_engine_class_instance *ec
> free(dst);
> }
>
> +
> +// Thread function for submitting atomic increment commands
> +static void* thread_func(void* args)
> +{
> + thread_args_t *thread_args = (thread_args_t *)args;
> + struct xe_buffer cmd_buf = {
> + .fd = thread_args->fd,
> + .gpu_addr = (void *)(uintptr_t)thread_args->gpu_va,
So you are creating many cmd buffers, one per thread. But all those command buffers are bind to the same gpu_va. This won't work.
You need to use different gpu va for each command buffer
> + .vm = thread_args->vm,
> + .size = xe_bb_size(thread_args->fd, PAGE_ALIGN_UFENCE),
> + .placement = vram_if_possible(thread_args->fd, thread_args-
> >eci->gt_id),
> + .flag = DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
> + };
> +
> + xe_create_cmdbuf(&cmd_buf, insert_atomic_inc, thread_args->dst_va,
> thread_args->val, thread_args->eci);
> + xe_submit_cmd(&cmd_buf);
> +
> + xe_destroy_cmdbuf(&cmd_buf);
> +
> + return NULL;
> +}
> +
> +// Test GPU atomic access with multiple threads
> +static void svm_atomic_access(int fd, uint32_t vm, struct
> drm_xe_engine_class_instance *eci)
> +{
> + uint64_t gpu_va = 0x1a0000;
> + int val = 0xc0ffee;
> + uint32_t *dst, *dst_to_access;
> + uint32_t size = 1024 * 1024, sz_dw = size / 4;
> + pthread_t threads[NUM_THREADS];
> +
> + dst = aligned_alloc(xe_get_default_alignment(fd), size);
> + dst_to_access = dst + (rand() % sz_dw);
> + *dst_to_access = val;
> +
> + thread_args_t thread_args = { fd, vm, (void *)(uintptr_t)gpu_va,
> (uint64_t)dst_to_access, val, eci };
Same question as before, why uintptr_t?
Oak
> +
> + // Create and launch threads
> + for (int i = 0; i < NUM_THREADS; i++) {
> + pthread_create(&threads[i], NULL, thread_func, &thread_args);
> + }
> +
> + // Wait for all threads to finish
> + for (int i = 0; i < NUM_THREADS; i++) {
> + pthread_join(threads[i], NULL);
> + }
> +
> + igt_assert_eq(*dst_to_access, val + NUM_THREADS);
> +
> + free(dst);
> +}
> +
> igt_main
> {
> int fd;
> @@ -234,6 +301,10 @@ igt_main
> xe_for_each_engine(fd, hwe)
> svm_thp(fd, vm, hwe);
>
> + igt_subtest_f("svm-atomic-access")
> + xe_for_each_engine(fd, hwe)
> + svm_atomic_access(fd, vm, hwe);
> +
> igt_fixture {
> xe_vm_destroy(fd, vm);
> drm_close_driver(fd);
> --
> 2.25.1
More information about the igt-dev
mailing list