[PATCH i-g-t 2/2] tests/amdgpu: add fwm preemption test case for userq
Khatri, Sunil
sukhatri at amd.com
Fri May 16 05:51:31 UTC 2025
Also, just to be sure that we arent arent suing dma/sdma engine as its
not supporter yet in userq. So i am assuming dma write isnt exactly
using the sdma engine ?
Other wise code looks god to me. With the above understanding, code is
Reviewed-by: Sunil Khatri <sunil.khatri at amd.com>
On 5/15/2025 12:13 PM, Mohan Marimuthu, Yogesh wrote:
>
> [Public]
>
>
> [Public]
>
>
> The fwm premption is tested using below algorithm,
> 1) create 2 queues
> 2) submit dma write job with unsatisfied fence on both the queues
> 3) if the dma write passes then the test has failed
>
> Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu at amd.com>
> ---
> lib/amdgpu/amd_PM4.h | 17 ++++
> tests/amdgpu/amd_basic.c | 178 +++++++++++++++++++++++++++++++++++++++
> 2 files changed, 195 insertions(+)
>
> diff --git a/lib/amdgpu/amd_PM4.h b/lib/amdgpu/amd_PM4.h
> index 8f59b4223..d6a8446f4 100644
> --- a/lib/amdgpu/amd_PM4.h
> +++ b/lib/amdgpu/amd_PM4.h
> @@ -195,6 +195,23 @@
> #define PACKET3_INDIRECT_BUFFER 0x3F
> #define PACKET3_PROTECTED_FENCE_SIGNAL 0xd0
> +#define PACKET3_FENCE_WAIT_MULTI 0xd1
> +#define FENCE_WAIT_MULTI_PREEMPTABLE(x) ((x) << 1)
> + /* 0 - no
> + * 1 - yes
> + */
> +#define FENCE_WAIT_MULTI_CACHE_POLICY(x) ((x) << 2)
> + /* 0 - LRU
> + * 1 - Stream
> + * 2 - noa
> + * 3 - Bypass
> + */
> +#define FENCE_WAIT_MULTI_ENGINE(x) ((x) << 8)
> + /* 0 - me
> + * 1 - pfp
> + */
> +#define FENCE_WAIT_MULTI_POLL_INTERVAL(x) ((x) << 16)
> +
> #define PACKET3_WRITE_DATA 0x37
> #define WRITE_DATA_DST_SEL(x) ((x) << 8)
> /* 0 - register
> diff --git a/tests/amdgpu/amd_basic.c b/tests/amdgpu/amd_basic.c
> index 914d27909..00651e8a2 100644
> --- a/tests/amdgpu/amd_basic.c
> +++ b/tests/amdgpu/amd_basic.c
> @@ -689,6 +689,176 @@ amdgpu_sync_dependency_test(amdgpu_device_handle
> device_handle, bool user_queue)
> free(ring_context);
> }
> +static void
> +amdgpu_fwm_preempt_test(amdgpu_device_handle device_handle)
> +{
> + struct amdgpu_ring_context *ring_context_1;
> + struct amdgpu_ring_context *ring_context_2;
> + struct amdgpu_cmd_base *cmd_base_1 = get_cmd_base();
> + struct amdgpu_cmd_base *cmd_base_2 = get_cmd_base();
> + const struct amdgpu_ip_block_version *ip_block =
> get_ip_block(device_handle, AMD_IP_GFX);
> + int r;
> +
> + ring_context_1 = calloc(1, sizeof(struct amdgpu_ring_context));
> + igt_assert(ring_context_1);
> + amdgpu_user_queue_create(device_handle, ring_context_1,
> ip_block->type);
> +
> + ring_context_2 = calloc(1, sizeof(struct amdgpu_ring_context));
> + igt_assert(ring_context_2);
> + amdgpu_user_queue_create(device_handle, ring_context_2,
> ip_block->type);
> +
> + /* allocate bo1 for dma, bo2 for fence and bo3 for ib for render
> context 1 */
> + ring_context_1->write_length = 1024;
> + r = amdgpu_bo_alloc_and_map_sync(device_handle ,
> ring_context_1->write_length, 4096,
> + AMDGPU_GEM_DOMAIN_GTT,
> AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> + AMDGPU_VM_MTYPE_UC, &ring_context_1->bo,
> + (void **)&ring_context_1->bo_cpu,
> &ring_context_1->bo_mc,
> + &ring_context_1->va_handle,
> + ring_context_1->timeline_syncobj_handle,
> + ++ring_context_1->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_1->bo_cpu, 0,
> ring_context_1->write_length);
> +
> + r = amdgpu_bo_alloc_and_map_sync(device_handle, 4096, 4096,
> AMDGPU_GEM_DOMAIN_GTT,
> + AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> AMDGPU_VM_MTYPE_UC,
> + &ring_context_1->bo2, (void
> **)&ring_context_1->bo2_cpu,
> + &ring_context_1->bo_mc2,
> &ring_context_1->va_handle2,
> + ring_context_1->timeline_syncobj_handle,
> + ++ring_context_1->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_1->bo2_cpu, 0, 4096);
> +
> + r = amdgpu_bo_alloc_and_map_sync(device_handle, 8192, 4096,
> AMDGPU_GEM_DOMAIN_GTT,
> + AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> AMDGPU_VM_MTYPE_UC,
> + &ring_context_1->bo3, (void
> **)&ring_context_1->bo3_cpu,
> + &ring_context_1->bo_mc3,
> &ring_context_1->va_handle3,
> + ring_context_1->timeline_syncobj_handle,
> + ++ring_context_1->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_1->bo3_cpu, 0, 4096);
> +
> + /* allocate bo1 for dma, bo2 for fence and bo3 for ib for render
> context 2 */
> + ring_context_2->write_length = 1024;
> + r = amdgpu_bo_alloc_and_map_sync(device_handle ,
> ring_context_2->write_length, 4096,
> + AMDGPU_GEM_DOMAIN_GTT,
> AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> + AMDGPU_VM_MTYPE_UC, &ring_context_2->bo,
> + (void **)&ring_context_2->bo_cpu,
> &ring_context_2->bo_mc,
> + &ring_context_2->va_handle,
> + ring_context_2->timeline_syncobj_handle,
> + ++ring_context_2->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_2->bo_cpu, 0,
> ring_context_2->write_length);
> +
> + r = amdgpu_bo_alloc_and_map_sync(device_handle, 4096, 4096,
> AMDGPU_GEM_DOMAIN_GTT,
> + AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> AMDGPU_VM_MTYPE_UC,
> + &ring_context_2->bo2, (void
> **)&ring_context_2->bo2_cpu,
> + &ring_context_2->bo_mc2,
> &ring_context_2->va_handle2,
> + ring_context_2->timeline_syncobj_handle,
> + ++ring_context_2->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_2->bo2_cpu, 0, 4096);
> +
> + r = amdgpu_bo_alloc_and_map_sync(device_handle, 8192, 4096,
> AMDGPU_GEM_DOMAIN_GTT,
> + AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> AMDGPU_VM_MTYPE_UC,
> + &ring_context_2->bo3, (void
> **)&ring_context_2->bo3_cpu,
> + &ring_context_2->bo_mc3,
> &ring_context_2->va_handle3,
> + ring_context_2->timeline_syncobj_handle,
> + ++ring_context_2->point, true);
> + igt_assert_eq(r, 0);
> + memset((void *)ring_context_2->bo3_cpu, 0, 4096);
> +
> +
> + /* wait for gtt mapping to complete */
> + r = amdgpu_timeline_syncobj_wait(device_handle,
> ring_context_1->timeline_syncobj_handle,
> + ��� ring_context_1->point);
> + igt_assert_eq(r, 0);
> + r = amdgpu_timeline_syncobj_wait(device_handle,
> ring_context_2->timeline_syncobj_handle,
> + ring_context_2->point);
> + igt_assert_eq(r, 0);
> +
> + /* assign cmd buffer for ring context 1 */
> + cmd_base_1->attach_buf(cmd_base_1, (void
> *)ring_context_1->bo3_cpu, 8192);
> +
> + /* create the ib for ring context 1 */
> + cmd_base_1->emit(cmd_base_1, PACKET3(PACKET3_FENCE_WAIT_MULTI, 4
> * 1));
> + cmd_base_1->emit(cmd_base_1, FENCE_WAIT_MULTI_ENGINE(1) |
> FENCE_WAIT_MULTI_PREEMPTABLE(1) |
> + FENCE_WAIT_MULTI_CACHE_POLICY(3) |
> FENCE_WAIT_MULTI_POLL_INTERVAL(4));
> + cmd_base_1->emit(cmd_base_1, ring_context_1->bo_mc2);
> + cmd_base_1->emit(cmd_base_1, ring_context_1->bo_mc2 >> 32);
> + cmd_base_1->emit(cmd_base_1, 10); // random incorrect fence value
> + cmd_base_1->emit(cmd_base_1, 0);
> +
> + cmd_base_1->emit(cmd_base_1, PACKET3(PACKET3_WRITE_DATA, 3));
> + cmd_base_1->emit(cmd_base_1, WRITE_DATA_DST_SEL(5) | WR_CONFIRM |
> + WRITE_DATA_CACHE_POLICY(3));
> + cmd_base_1->emit(cmd_base_1, 0xfffffffc & ring_context_1->bo_mc);
> + cmd_base_1->emit(cmd_base_1, (0xffffffff00000000 &
> ring_context_1->bo_mc) >> 32);
> + cmd_base_1->emit(cmd_base_1, 0xdead0000);
> +
> + ring_context_1->pm4_dw = cmd_base_1->cdw;
> + amdgpu_user_queue_submit(device_handle, ring_context_1,
> ip_block->type,
> + ring_context_1->bo_mc3, true);
> +
> + /* if fwm packet got skipped by firmware then 0xdead000 will be
> written */
> + usleep(1000 * 100);
> + igt_assert_eq_u32(*ring_context_1->bo_cpu, 0);
> +
> + /* assign cmd buffer for ring context 2 */
> + cmd_base_2->attach_buf(cmd_base_2, (void
> *)ring_context_2->bo3_cpu, 8192);
> +
> + /* create the ib for ring context 2 */
> + cmd_base_2->emit(cmd_base_2, PACKET3(PACKET3_FENCE_WAIT_MULTI, 4
> * 1));
> + cmd_base_2->emit(cmd_base_2, FENCE_WAIT_MULTI_ENGINE(1) |
> FENCE_WAIT_MULTI_PREEMPTABLE(1) |
> + FENCE_WAIT_MULTI_CACHE_POLICY(3) |
> FENCE_WAIT_MULTI_POLL_INTERVAL(4));
> + cmd_base_2->emit(cmd_base_2, ring_context_2->bo_mc2);
> + cmd_base_2->emit(cmd_base_2, ring_context_2->bo_mc2 >> 32);
> + cmd_base_2->emit(cmd_base_2, 10); // random incorrect fence value
> + cmd_base_2->emit(cmd_base_2, 0);
> +
> + cmd_base_2->emit(cmd_base_2, PACKET3(PACKET3_WRITE_DATA, 3));
> + cmd_base_2->emit(cmd_base_2, WRITE_DATA_DST_SEL(5) | WR_CONFIRM |
> + WRITE_DATA_CACHE_POLICY(3));
> + cmd_base_2->emit(cmd_base_2, 0xfffffffc & ring_context_2->bo_mc);
> + cmd_base_2->emit(cmd_base_2, (0xffffffff00000000 &
> ring_context_2->bo_mc) >> 32);
> + cmd_base_2->emit(cmd_base_2, 0xdead0000);
> +
> + ring_context_2->pm4_dw = cmd_base_2->cdw;
> + amdgpu_user_queue_submit(device_handle, ring_context_2,
> ip_block->type,
> + ring_context_2->bo_mc3, true);
> +
> + /* if fwm packet got skipped by firmware then 0xdead000 will be
> written */
> + usleep(1000 * 10);
> + igt_assert_eq_u32(*ring_context_1->bo_cpu, 0);
> + igt_assert_eq_u32(*ring_context_2->bo_cpu, 0);
> +
> + /* set the correct fence value to finish executing the ib */
> + *ring_context_1->bo2_cpu = 10;
> + *ring_context_2->bo2_cpu = 10;
> + usleep(1000 * 10);
> + igt_assert_eq_u32(*ring_context_1->bo_cpu, 0xdead0000);
> + igt_assert_eq_u32(*ring_context_2->bo_cpu, 0xdead0000);
> +
> + amdgpu_user_queue_destroy(device_handle, ring_context_1,
> ip_block->type);
> + amdgpu_user_queue_destroy(device_handle, ring_context_2,
> ip_block->type);
> +
> + amdgpu_bo_unmap_and_free(ring_context_1->bo,
> ring_context_1->va_handle,
> + ring_context_1->bo_mc,
> ring_context_1->write_length);
> + amdgpu_bo_unmap_and_free(ring_context_1->bo2,
> ring_context_1->va_handle2,
> + ring_context_1->bo_mc2, 4096);
> + amdgpu_bo_unmap_and_free(ring_context_1->bo3,
> ring_context_1->va_handle3,
> + ring_context_1->bo_mc3, 8192);
> + amdgpu_bo_unmap_and_free(ring_context_2->bo,
> ring_context_2->va_handle,
> + ring_context_2->bo_mc,
> ring_context_2->write_length);
> + amdgpu_bo_unmap_and_free(ring_context_2->bo2,
> ring_context_2->va_handle2,
> + ring_context_2->bo_mc2, 4096);
> + amdgpu_bo_unmap_and_free(ring_context_2->bo3,
> ring_context_2->va_handle3,
> + ring_context_2->bo_mc3, 8192);
> + free_cmd_base(cmd_base_1);
> + free_cmd_base(cmd_base_2);
> + free(ring_context_1);
> + free(ring_context_2);
> +}
> +
> igt_main
> {
> amdgpu_device_handle device;
> @@ -815,6 +985,14 @@ igt_main
> amdgpu_sync_dependency_test(device, true);
> }
> }
> +
> + igt_describe("Check-FWM-preempt-using-GFX-UMQ");
> + igt_subtest_with_dynamic("fwm-prempt-test-with-IP-GFX-UMQ") {
> + if (userq_arr_cap[AMD_IP_GFX]) {
> + igt_dynamic_f("fwm-preempt-test-with-gfx-umq")
> + amdgpu_fwm_preempt_test(device);
> + }
> + }
> #endif
> igt_fixture {
> --
> 2.43.0
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/igt-dev/attachments/20250516/858de348/attachment-0001.htm>
More information about the igt-dev
mailing list