[PATCH 1/3] lib/amdgpu: add support for gang cs
vitaly.prosyak at amd.com
vitaly.prosyak at amd.com
Thu Jan 25 03:44:11 UTC 2024
From: Vitaly Prosyak <vitaly.prosyak at amd.com>
When gang command submission is used we need to add fields
for the second buf and second pm4 packet.
Add ASIC-dependent implementation of WAIT_REG_MEM used to poll on
location in the register or memory space until a reference value
is satisfied.
Cc: Jesse Zhang <jesse.zhang at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Christian Koenig <christian.koenig at amd.com>
Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu at amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
Acked-by: Christian Koenig <christian.koenig at amd.com>
---
lib/amdgpu/amd_ip_blocks.c | 35 +++++++++++++++++++++++++++++++++++
lib/amdgpu/amd_ip_blocks.h | 20 ++++++++++++++++----
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/lib/amdgpu/amd_ip_blocks.c b/lib/amdgpu/amd_ip_blocks.c
index a7ccfa38b..79ce7b5a8 100644
--- a/lib/amdgpu/amd_ip_blocks.c
+++ b/lib/amdgpu/amd_ip_blocks.c
@@ -288,6 +288,39 @@ gfx_ring_copy_linear(const struct amdgpu_ip_funcs *func,
return 0;
}
+static int
+gfx_ring_wait_reg_mem(const struct amdgpu_ip_funcs *func,
+ const struct amdgpu_ring_context *ring_context,
+ uint32_t *pm4_dw)
+{
+ uint32_t i;
+
+ i = *pm4_dw;
+ ring_context->pm4[i++] = PACKET3(PACKET3_WAIT_REG_MEM, 5);
+ ring_context->pm4[i++] = (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* == */
+ WAIT_REG_MEM_ENGINE(0)); /* me */
+ ring_context->pm4[i++] = lower_32_bits(ring_context->bo_mc);
+ ring_context->pm4[i++] = upper_32_bits(ring_context->bo_mc);
+ ring_context->pm4[i++] = func->deadbeaf; /* reference value */
+ ring_context->pm4[i++] = 0xffffffff; /* and mask */
+ ring_context->pm4[i++] = 0x00000004; /* poll interval */
+ *pm4_dw = i;
+
+ return 0;
+}
+
+static int
+sdma_ring_wait_reg_mem(const struct amdgpu_ip_funcs *func,
+ const struct amdgpu_ring_context *ring_context,
+ uint32_t *pm4_dw)
+{
+ int r;
+
+ r = gfx_ring_wait_reg_mem(func, ring_context, pm4_dw);
+ return r;
+}
+
/* we may cobine these two functions later */
static int
x_compare(const struct amdgpu_ip_funcs *func,
@@ -336,6 +369,7 @@ static struct amdgpu_ip_funcs gfx_v8_x_ip_funcs = {
.compare = x_compare,
.compare_pattern = x_compare_pattern,
.get_reg_offset = gfx_v8_0_get_reg_offset,
+ .wait_reg_mem = gfx_ring_wait_reg_mem,
};
static struct amdgpu_ip_funcs sdma_v3_x_ip_funcs = {
@@ -351,6 +385,7 @@ static struct amdgpu_ip_funcs sdma_v3_x_ip_funcs = {
.compare = x_compare,
.compare_pattern = x_compare_pattern,
.get_reg_offset = gfx_v8_0_get_reg_offset,
+ .wait_reg_mem = sdma_ring_wait_reg_mem,
};
struct amdgpu_ip_block_version gfx_v8_x_ip_block = {
diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
index aef433e7f..4cad30d1e 100644
--- a/lib/amdgpu/amd_ip_blocks.h
+++ b/lib/amdgpu/amd_ip_blocks.h
@@ -31,22 +31,31 @@ struct amdgpu_ring_context {
int res_cnt; /* num of bo in amdgpu_bo_handle resources[2] */
uint32_t write_length; /* length of data */
+ uint32_t write_length2; /* length of data for second packet */
uint32_t *pm4; /* data of the packet */
uint32_t pm4_size; /* max allocated packet size */
bool secure; /* secure or not */
- uint64_t bo_mc; /* result from amdgpu_bo_alloc_and_map */
- uint64_t bo_mc2; /* result from amdgpu_bo_alloc_and_map */
+ uint64_t bo_mc; /* GPU address of first buffer */
+ uint64_t bo_mc2; /* GPU address for p4 packet */
+ uint64_t bo_mc3; /* GPU address of second buffer */
+ uint64_t bo_mc4; /* GPU address of second p4 packet */
uint32_t pm4_dw; /* actual size of pm4 */
+ uint32_t pm4_dw2; /* actual size of second pm4 */
- volatile uint32_t *bo_cpu;
- volatile uint32_t *bo2_cpu;
+ volatile uint32_t *bo_cpu; /* cpu adddress of mapped GPU buf */
+ volatile uint32_t *bo2_cpu; /* cpu adddress of mapped pm4 */
+ volatile uint32_t *bo3_cpu; /* cpu adddress of mapped GPU second buf */
+ volatile uint32_t *bo4_cpu; /* cpu adddress of mapped second pm4 */
uint32_t bo_cpu_origin;
amdgpu_bo_handle bo;
amdgpu_bo_handle bo2;
+ amdgpu_bo_handle bo3;
+ amdgpu_bo_handle bo4;
+
amdgpu_bo_handle boa_vram[2];
amdgpu_bo_handle boa_gtt[2];
@@ -56,6 +65,8 @@ struct amdgpu_ring_context {
amdgpu_bo_handle resources[4]; /* amdgpu_bo_alloc_and_map */
amdgpu_va_handle va_handle; /* amdgpu_bo_alloc_and_map */
amdgpu_va_handle va_handle2; /* amdgpu_bo_alloc_and_map */
+ amdgpu_va_handle va_handle3; /* amdgpu_bo_alloc_and_map */
+ amdgpu_va_handle va_handle4; /* amdgpu_bo_alloc_and_map */
struct amdgpu_cs_ib_info ib_info; /* amdgpu_bo_list_create */
struct amdgpu_cs_request ibs_request; /* amdgpu_cs_query_fence_status */
@@ -76,6 +87,7 @@ struct amdgpu_ip_funcs {
int (*compare)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);
int (*compare_pattern)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);
int (*get_reg_offset)(enum general_reg reg);
+ int (*wait_reg_mem)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, uint32_t *pm4_dw);
};
--
2.25.1
More information about the igt-dev
mailing list