<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Calibri;font-size:10pt;color:#008000;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[Public]<br>
</p>
<br>
<div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
The fwm premption is tested using below algorithm,<br>
1) create 2 queues<br>
2) submit dma write job with unsatisfied fence on both the queues<br>
3) if the dma write passes then the test has failed<br>
<br>
v2: reduce timeout to 2ms<br>
<br>
Cc: Prosyak, Vitaly <Vitaly.Prosyak@amd.com><br>
Cc: Jesse.Zhang <Jesse.zhang@amd.com><br>
Cc: Sunil Khatri <sunil.khatri@amd.com><br>
Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu@amd.com><br>
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com><br>
---<br>
lib/amdgpu/amd_PM4.h | 17 ++++<br>
tests/amdgpu/amd_basic.c | 178 +++++++++++++++++++++++++++++++++++++++<br>
2 files changed, 195 insertions(+)<br>
<br>
diff --git a/lib/amdgpu/amd_PM4.h b/lib/amdgpu/amd_PM4.h index 8f59b4223..d6a8446f4 100644<br>
--- a/lib/amdgpu/amd_PM4.h<br>
+++ b/lib/amdgpu/amd_PM4.h<br>
@@ -195,6 +195,23 @@<br>
#define PACKET3_INDIRECT_BUFFER 0x3F<br>
#define PACKET3_PROTECTED_FENCE_SIGNAL 0xd0<br>
<br>
+#define PACKET3_FENCE_WAIT_MULTI 0xd1<br>
+#define FENCE_WAIT_MULTI_PREEMPTABLE(x) ((x) << 1)<br>
+ /* 0 - no<br>
+ * 1 - yes<br>
+ */<br>
+#define FENCE_WAIT_MULTI_CACHE_POLICY(x) ((x) << 2)<br>
+ /* 0 - LRU<br>
+ * 1 - Stream<br>
+ * 2 - noa<br>
+ * 3 - Bypass<br>
+ */<br>
+#define FENCE_WAIT_MULTI_ENGINE(x) ((x) << 8)<br>
+ /* 0 - me<br>
+ * 1 - pfp<br>
+ */<br>
+#define FENCE_WAIT_MULTI_POLL_INTERVAL(x) ((x) << 16)<br>
+<br>
#define PACKET3_WRITE_DATA 0x37<br>
#define WRITE_DATA_DST_SEL(x) ((x) << 8)<br>
/* 0 - register<br>
diff --git a/tests/amdgpu/amd_basic.c b/tests/amdgpu/amd_basic.c index 914d27909..075057960 100644<br>
--- a/tests/amdgpu/amd_basic.c<br>
+++ b/tests/amdgpu/amd_basic.c<br>
@@ -689,6 +689,176 @@ amdgpu_sync_dependency_test(amdgpu_device_handle device_handle, bool user_queue)<br>
free(ring_context);<br>
}<br>
<br>
+static void<br>
+amdgpu_fwm_preempt_test(amdgpu_device_handle device_handle) {<br>
+ struct amdgpu_ring_context *ring_context_1;<br>
+ struct amdgpu_ring_context *ring_context_2;<br>
+ struct amdgpu_cmd_base *cmd_base_1 = get_cmd_base();<br>
+ struct amdgpu_cmd_base *cmd_base_2 = get_cmd_base();<br>
+ const struct amdgpu_ip_block_version *ip_block = get_ip_block(device_handle, AMD_IP_GFX);<br>
+ int r;<br>
+<br>
+ ring_context_1 = calloc(1, sizeof(struct amdgpu_ring_context));<br>
+ igt_assert(ring_context_1);<br>
+ amdgpu_user_queue_create(device_handle, ring_context_1,<br>
+ ip_block->type);<br>
+<br>
+ ring_context_2 = calloc(1, sizeof(struct amdgpu_ring_context));<br>
+ igt_assert(ring_context_2);<br>
+ amdgpu_user_queue_create(device_handle, ring_context_2,<br>
+ ip_block->type);<br>
+<br>
+ /* allocate bo1 for dma, bo2 for fence and bo3 for ib for render context 1 */<br>
+ ring_context_1->write_length = 1024;<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle , ring_context_1->write_length, 4096,<br>
+ AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC,<br>
+ AMDGPU_VM_MTYPE_UC, &ring_context_1->bo,<br>
+ (void **)&ring_context_1->bo_cpu, &ring_context_1->bo_mc,<br>
+ &ring_context_1->va_handle,<br>
+ ring_context_1->timeline_syncobj_handle,<br>
+ ++ring_context_1->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_1->bo_cpu, 0,<br>
+ ring_context_1->write_length);<br>
+<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle, 4096, 4096, AMDGPU_GEM_DOMAIN_GTT,<br>
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, AMDGPU_VM_MTYPE_UC,<br>
+ &ring_context_1->bo2, (void **)&ring_context_1->bo2_cpu,<br>
+ &ring_context_1->bo_mc2, &ring_context_1->va_handle2,<br>
+ ring_context_1->timeline_syncobj_handle,<br>
+ ++ring_context_1->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_1->bo2_cpu, 0, 4096);<br>
+<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle, 8192, 4096, AMDGPU_GEM_DOMAIN_GTT,<br>
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, AMDGPU_VM_MTYPE_UC,<br>
+ &ring_context_1->bo3, (void **)&ring_context_1->bo3_cpu,<br>
+ &ring_context_1->bo_mc3, &ring_context_1->va_handle3,<br>
+ ring_context_1->timeline_syncobj_handle,<br>
+ ++ring_context_1->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_1->bo3_cpu, 0, 4096);<br>
+<br>
+ /* allocate bo1 for dma, bo2 for fence and bo3 for ib for render context 2 */<br>
+ ring_context_2->write_length = 1024;<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle , ring_context_2->write_length, 4096,<br>
+ AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC,<br>
+ AMDGPU_VM_MTYPE_UC, &ring_context_2->bo,<br>
+ (void **)&ring_context_2->bo_cpu, &ring_context_2->bo_mc,<br>
+ &ring_context_2->va_handle,<br>
+ ring_context_2->timeline_syncobj_handle,<br>
+ ++ring_context_2->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_2->bo_cpu, 0,<br>
+ ring_context_2->write_length);<br>
+<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle, 4096, 4096, AMDGPU_GEM_DOMAIN_GTT,<br>
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, AMDGPU_VM_MTYPE_UC,<br>
+ &ring_context_2->bo2, (void **)&ring_context_2->bo2_cpu,<br>
+ &ring_context_2->bo_mc2, &ring_context_2->va_handle2,<br>
+ ring_context_2->timeline_syncobj_handle,<br>
+ ++ring_context_2->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_2->bo2_cpu, 0, 4096);<br>
+<br>
+ r = amdgpu_bo_alloc_and_map_sync(device_handle, 8192, 4096, AMDGPU_GEM_DOMAIN_GTT,<br>
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, AMDGPU_VM_MTYPE_UC,<br>
+ &ring_context_2->bo3, (void **)&ring_context_2->bo3_cpu,<br>
+ &ring_context_2->bo_mc3, &ring_context_2->va_handle3,<br>
+ ring_context_2->timeline_syncobj_handle,<br>
+ ++ring_context_2->point, true);<br>
+ igt_assert_eq(r, 0);<br>
+ memset((void *)ring_context_2->bo3_cpu, 0, 4096);<br>
+<br>
+<br>
+ /* wait for gtt mapping to complete */<br>
+ r = amdgpu_timeline_syncobj_wait(device_handle, ring_context_1->timeline_syncobj_handle,<br>
+ ring_context_1->point);<br>
+ igt_assert_eq(r, 0);<br>
+ r = amdgpu_timeline_syncobj_wait(device_handle, ring_context_2->timeline_syncobj_handle,<br>
+ ring_context_2->point);<br>
+ igt_assert_eq(r, 0);<br>
+<br>
+ /* assign cmd buffer for ring context 1 */<br>
+ cmd_base_1->attach_buf(cmd_base_1, (void<br>
+ *)ring_context_1->bo3_cpu, 8192);<br>
+<br>
+ /* create the ib for ring context 1 */<br>
+ cmd_base_1->emit(cmd_base_1, PACKET3(PACKET3_FENCE_WAIT_MULTI, 4 * 1));<br>
+ cmd_base_1->emit(cmd_base_1, FENCE_WAIT_MULTI_ENGINE(1) | FENCE_WAIT_MULTI_PREEMPTABLE(1) |<br>
+ FENCE_WAIT_MULTI_CACHE_POLICY(3) | FENCE_WAIT_MULTI_POLL_INTERVAL(4));<br>
+ cmd_base_1->emit(cmd_base_1, ring_context_1->bo_mc2);<br>
+ cmd_base_1->emit(cmd_base_1, ring_context_1->bo_mc2 >> 32);<br>
+ cmd_base_1->emit(cmd_base_1, 10); // random incorrect fence value<br>
+ cmd_base_1->emit(cmd_base_1, 0);<br>
+<br>
+ cmd_base_1->emit(cmd_base_1, PACKET3(PACKET3_WRITE_DATA, 3));<br>
+ cmd_base_1->emit(cmd_base_1, WRITE_DATA_DST_SEL(5) | WR_CONFIRM |<br>
+ WRITE_DATA_CACHE_POLICY(3));<br>
+ cmd_base_1->emit(cmd_base_1, 0xfffffffc & ring_context_1->bo_mc);<br>
+ cmd_base_1->emit(cmd_base_1, (0xffffffff00000000 & ring_context_1->bo_mc) >> 32);<br>
+ cmd_base_1->emit(cmd_base_1, 0xdead0000);<br>
Can we use the existing function write_linear to replace this?</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Regards<br>
Jesse</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="margin: 0px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
The gfx_ring_write_linear() function does not add packets to existing cmd_base, it adds to ring_context->pm4 and does*pm4_dw = i.</div>
<div class="elementToProof" style="margin: 0px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
To use gfx_ring_write_linear(), I will have to do following which does not look good IMHO.</div>
<div class="elementToProof" style="margin: 0px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
ring_context_1->pm4 = ring_context_1->bo3_cpu + cmd_base_1->cdw;</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
uint32_t ret_pm4_dw;</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
gfx_ring_write_linear(...);</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
cmd_base_1->cdw += ret_pm4_dw;</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Thank you,</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Yogesh<br>
<br>
+<br>
+ ring_context_1->pm4_dw = cmd_base_1->cdw;<br>
+ amdgpu_user_queue_submit(device_handle, ring_context_1, ip_block->type,<br>
+ ring_context_1->bo_mc3, true);<br>
+<br>
+ /* if fwm packet got skipped by firmware then 0xdead000 will be written */<br>
+ usleep(1000 * 2);<br>
+ igt_assert_eq_u32(*ring_context_1->bo_cpu, 0);<br>
+<br>
+ /* assign cmd buffer for ring context 2 */<br>
+ cmd_base_2->attach_buf(cmd_base_2, (void<br>
+ *)ring_context_2->bo3_cpu, 8192);<br>
+<br>
+ /* create the ib for ring context 2 */<br>
+ cmd_base_2->emit(cmd_base_2, PACKET3(PACKET3_FENCE_WAIT_MULTI, 4 * 1));<br>
+ cmd_base_2->emit(cmd_base_2, FENCE_WAIT_MULTI_ENGINE(1) | FENCE_WAIT_MULTI_PREEMPTABLE(1) |<br>
+ FENCE_WAIT_MULTI_CACHE_POLICY(3) | FENCE_WAIT_MULTI_POLL_INTERVAL(4));<br>
+ cmd_base_2->emit(cmd_base_2, ring_context_2->bo_mc2);<br>
+ cmd_base_2->emit(cmd_base_2, ring_context_2->bo_mc2 >> 32);<br>
+ cmd_base_2->emit(cmd_base_2, 10); // random incorrect fence value<br>
+ cmd_base_2->emit(cmd_base_2, 0);<br>
+<br>
+ cmd_base_2->emit(cmd_base_2, PACKET3(PACKET3_WRITE_DATA, 3));<br>
+ cmd_base_2->emit(cmd_base_2, WRITE_DATA_DST_SEL(5) | WR_CONFIRM |<br>
+ WRITE_DATA_CACHE_POLICY(3));<br>
+ cmd_base_2->emit(cmd_base_2, 0xfffffffc & ring_context_2->bo_mc);<br>
+ cmd_base_2->emit(cmd_base_2, (0xffffffff00000000 & ring_context_2->bo_mc) >> 32);<br>
+ cmd_base_2->emit(cmd_base_2, 0xdead0000);<br>
+<br>
+ ring_context_2->pm4_dw = cmd_base_2->cdw;<br>
+ amdgpu_user_queue_submit(device_handle, ring_context_2, ip_block->type,<br>
+ ring_context_2->bo_mc3, true);<br>
+<br>
+ /* if fwm packet got skipped by firmware then 0xdead000 will be written */<br>
+ usleep(1000 * 2);<br>
+ igt_assert_eq_u32(*ring_context_1->bo_cpu, 0);<br>
+ igt_assert_eq_u32(*ring_context_2->bo_cpu, 0);<br>
+<br>
+ /* set the correct fence value to finish executing the ib */<br>
+ *ring_context_1->bo2_cpu = 10;<br>
+ *ring_context_2->bo2_cpu = 10;<br>
+ usleep(1000 * 2);<br>
+ igt_assert_eq_u32(*ring_context_1->bo_cpu, 0xdead0000);<br>
+ igt_assert_eq_u32(*ring_context_2->bo_cpu, 0xdead0000);<br>
+<br>
+ amdgpu_user_queue_destroy(device_handle, ring_context_1, ip_block->type);<br>
+ amdgpu_user_queue_destroy(device_handle, ring_context_2,<br>
+ ip_block->type);<br>
+<br>
+ amdgpu_bo_unmap_and_free(ring_context_1->bo, ring_context_1->va_handle,<br>
+ ring_context_1->bo_mc, ring_context_1->write_length);<br>
+ amdgpu_bo_unmap_and_free(ring_context_1->bo2, ring_context_1->va_handle2,<br>
+ ring_context_1->bo_mc2, 4096);<br>
+ amdgpu_bo_unmap_and_free(ring_context_1->bo3, ring_context_1->va_handle3,<br>
+ ring_context_1->bo_mc3, 8192);<br>
+ amdgpu_bo_unmap_and_free(ring_context_2->bo, ring_context_2->va_handle,<br>
+ ring_context_2->bo_mc, ring_context_2->write_length);<br>
+ amdgpu_bo_unmap_and_free(ring_context_2->bo2, ring_context_2->va_handle2,<br>
+ ring_context_2->bo_mc2, 4096);<br>
+ amdgpu_bo_unmap_and_free(ring_context_2->bo3, ring_context_2->va_handle3,<br>
+ ring_context_2->bo_mc3, 8192);<br>
+ free_cmd_base(cmd_base_1);<br>
+ free_cmd_base(cmd_base_2);<br>
+ free(ring_context_1);<br>
+ free(ring_context_2);<br>
+}<br>
+<br>
igt_main<br>
{<br>
amdgpu_device_handle device;<br>
@@ -815,6 +985,14 @@ igt_main<br>
amdgpu_sync_dependency_test(device, true);<br>
}<br>
}<br>
+<br>
+ igt_describe("Check-FWM-preempt-using-GFX-UMQ");<br>
+ igt_subtest_with_dynamic("fwm-prempt-test-with-IP-GFX-UMQ") {<br>
+ if (userq_arr_cap[AMD_IP_GFX]) {<br>
+ igt_dynamic_f("fwm-preempt-test-with-gfx-umq")<br>
+ amdgpu_fwm_preempt_test(device);<br>
+ }<br>
+ }<br>
#endif<br>
<br>
igt_fixture {<br>
--<br>
2.43.0<br>
</div>
</div>
</body>
</html>