[PATCH i-g-t 1/2] lib/amdgpu: Add SDMA support for user queues
Jesse.Zhang
Jesse.Zhang at amd.com
Mon Jun 30 09:23:41 UTC 2025
This commit adds support for submitting SDMA work to user queues.
The changes include:
1. Added SDMA-specific packet definitions in amd_sdma.h for indirect buffer
operations and protected fence signaling.
2. Modified amdgpu_user_queue_submit() to handle SDMA-specific requirements:
- Added alignment padding with NOP packets to ensure 8-DWORD alignment
- Implemented SDMA-specific indirect buffer submission format
- Added proper handling of CSA (Context Save Area) addresses
- Included SDMA-specific fence signaling
- Adjusted wptr handling for SDMA queues
The changes ensure proper synchronization and alignment requirements are met
for SDMA operations while maintaining backward compatibility with existing
queue types.
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
lib/amdgpu/amd_sdma.h | 4 ++++
lib/amdgpu/amd_userq.c | 50 +++++++++++++++++++++++++++++-------------
2 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/lib/amdgpu/amd_sdma.h b/lib/amdgpu/amd_sdma.h
index 3e568e47a..2a9339cfb 100644
--- a/lib/amdgpu/amd_sdma.h
+++ b/lib/amdgpu/amd_sdma.h
@@ -103,5 +103,9 @@
# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
#define SDMA_NOP 0x0
+#define SDMA_OP_INDIRECT 0x4
+#define SDMA_OP_PROTECTED_FENCE 0x5
+#define SDMA6_SUB_OP_PROTECTED_FENCE 0x1
+#define SDMA7_SUB_OP_PROTECTED_FENCE 0x3
#endif
diff --git a/lib/amdgpu/amd_userq.c b/lib/amdgpu/amd_userq.c
index f8e1a4b45..10e82a5a4 100644
--- a/lib/amdgpu/amd_userq.c
+++ b/lib/amdgpu/amd_userq.c
@@ -6,6 +6,7 @@
#include "amd_userq.h"
#include "amd_memory.h"
#include "amd_PM4.h"
+#include "amd_sdma.h"
#include "ioctl_wrappers.h"
#ifdef AMDGPU_USERQ_ENABLED
@@ -136,22 +137,39 @@ void amdgpu_user_queue_submit(amdgpu_device_handle device, struct amdgpu_ring_co
uint64_t timeout = ring_context->time_out ? ring_context->time_out : INT64_MAX;
amdgpu_pkt_begin();
- /* Prepare the Indirect IB to submit the IB to user queue */
- amdgpu_pkt_add_dw(PACKET3(PACKET3_INDIRECT_BUFFER, 2));
- amdgpu_pkt_add_dw(lower_32_bits(mc_address));
- amdgpu_pkt_add_dw(upper_32_bits(mc_address));
-
- if (ip_type == AMD_IP_GFX)
- amdgpu_pkt_add_dw(control | S_3F3_INHERIT_VMID_MQD_GFX(1));
- else
- amdgpu_pkt_add_dw(control | S_3F3_VALID_COMPUTE(1)
- | S_3F3_INHERIT_VMID_MQD_COMPUTE(1));
-
- amdgpu_pkt_add_dw(PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0));
-
- /* empty dword is needed for fence signal pm4 */
- amdgpu_pkt_add_dw(0);
+ if (ip_type == AMD_IP_DMA) {
+ /* For SDMA, we need to align the IB to 8 DW boundary */
+ unsigned int nop_count = (2 - lower_32_bits(*ring_context->wptr_cpu)) & 7;
+ for (unsigned int i = 0; i < nop_count; i++)
+ amdgpu_pkt_add_dw(SDMA_PKT_HEADER_OP(SDMA_NOP));
+ amdgpu_pkt_add_dw(SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT));
+ amdgpu_pkt_add_dw(lower_32_bits(mc_address) & 0xffffffe0); // 32-byte aligned
+ amdgpu_pkt_add_dw(upper_32_bits(mc_address));
+ amdgpu_pkt_add_dw(control); // IB length in DWORDS
+ amdgpu_pkt_add_dw(lower_32_bits(ring_context->csa.mc_addr)); // CSA MC address low
+ amdgpu_pkt_add_dw(upper_32_bits(ring_context->csa.mc_addr)); // CSA MC address high
+ if (ring_context->hw_ip_info.hw_ip_version_major <= 6)
+ amdgpu_pkt_add_dw(SDMA_PACKET(SDMA_OP_PROTECTED_FENCE, SDMA6_SUB_OP_PROTECTED_FENCE, 0));
+ else
+ amdgpu_pkt_add_dw(SDMA_PACKET(SDMA_OP_PROTECTED_FENCE, SDMA7_SUB_OP_PROTECTED_FENCE, 0));
+ } else {
+ /* Prepare the Indirect IB to submit the IB to user queue */
+ amdgpu_pkt_add_dw(PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ amdgpu_pkt_add_dw(lower_32_bits(mc_address));
+ amdgpu_pkt_add_dw(upper_32_bits(mc_address));
+
+ if (ip_type == AMD_IP_GFX)
+ amdgpu_pkt_add_dw(control | S_3F3_INHERIT_VMID_MQD_GFX(1));
+ else
+ amdgpu_pkt_add_dw(control | S_3F3_VALID_COMPUTE(1)
+ | S_3F3_INHERIT_VMID_MQD_COMPUTE(1));
+
+ amdgpu_pkt_add_dw(PACKET3(PACKET3_PROTECTED_FENCE_SIGNAL, 0));
+
+ /* empty dword is needed for fence signal pm4 */
+ amdgpu_pkt_add_dw(0);
+ }
#if DETECT_CC_GCC && (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
asm volatile ("mfence" : : : "memory");
#endif
@@ -163,6 +181,8 @@ void amdgpu_user_queue_submit(amdgpu_device_handle device, struct amdgpu_ring_co
asm volatile ("mfence" : : : "memory");
#endif
+ if (ip_type == AMD_IP_DMA)
+ *ring_context->wptr_cpu = *ring_context->wptr_cpu <<2;
/* Update the door bell */
ring_context->doorbell_cpu[DOORBELL_INDEX] = *ring_context->wptr_cpu;
--
2.49.0
More information about the igt-dev
mailing list