[igt-dev] [PATCH 3/3] lib/amdgpu: add shaders for gfx11
vitaly.prosyak at amd.com
vitaly.prosyak at amd.com
Thu Sep 14 01:17:32 UTC 2023
From: Jesse Zhang <jesse.zhang at amd.com>
add memcpy shader for gfx11
Cc: Luben Tuikov <luben.tuikov at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Christian Koenig <christian.koenig at amd.com>
v2 : added disassembly comments (Vitaly)
Signed-off-by: Jesse zhang <jesse.zhang at amd.com>
Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
---
lib/amdgpu/amd_dispatch.c | 21 ++++----
lib/amdgpu/amd_dispatch_helpers.c | 30 +++++++++--
lib/amdgpu/amd_shaders.c | 88 ++++++++++++++++++++++++++++++-
3 files changed, 122 insertions(+), 17 deletions(-)
diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
index 075f897ac..f17240f5c 100644
--- a/lib/amdgpu/amd_dispatch.c
+++ b/lib/amdgpu/amd_dispatch.c
@@ -254,6 +254,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
base_cmd->emit(base_cmd,0x74fac);
else if (version == 10)
base_cmd->emit(base_cmd,0x1104bfac);
+ else if (version == 11)
+ base_cmd->emit(base_cmd,0x1003dfac);
/* Writes the UAV constant data to the SGPRs. */
base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
@@ -265,6 +267,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
base_cmd->emit(base_cmd, 0x74fac);
else if (version == 10)
base_cmd->emit(base_cmd, 0x1104bfac);
+ else if (version == 11)
+ base_cmd->emit(base_cmd, 0x1003dfac);
/* clear mmCOMPUTE_RESOURCE_LIMITS */
base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
@@ -506,21 +510,18 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
igt_assert_eq(r, 0);
if (!info.available_rings)
- printf("SKIP ... as there's no ring for ip %d\n", ip_type);
+ igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
version = info.hw_ip_version_major;
- if (version != 9 && version != 10) {
- printf("SKIP ... unsupported gfx version %d\n", version);
+ if (version != 9 && version != 10 /*&& version != 11*/) {
+ igt_info("SKIP ... unsupported gfx version %d\n", version);
return;
}
- //TODO IGT
- //if (version < 9)
- // version = 9;
for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
amdgpu_memcpy_dispatch_test(device_handle, ip_type,
ring_id, version, 0);
amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type,
- ring_id, version, AMDGPU_CTX_NO_RESET);
+ ring_id, version, AMDGPU_CTX_UNKNOWN_RESET);
amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id,
version, 0);
@@ -536,11 +537,11 @@ void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_ty
r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
igt_assert_eq(r, 0);
if (!info.available_rings)
- printf("SKIP ... as there's no graphics ring\n");
+ igt_info("SKIP ... as there's no graphics ring\n");
version = info.hw_ip_version_major;
- if (version != 9 && version != 10) {
- printf("SKIP ... unsupported gfx version %d\n", version);
+ if (version != 9 && version != 10 && version != 11) {
+ igt_info("SKIP ... unsupported gfx version %d\n", version);
return;
}
if (version < 9)
diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
index 8f06d841d..11ce8284a 100644
--- a/lib/amdgpu/amd_dispatch_helpers.c
+++ b/lib/amdgpu/amd_dispatch_helpers.c
@@ -118,8 +118,16 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
{0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
{0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
};
+ static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
+ {0x2e12, 0x600C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
+ {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
+ {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
+ {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
+ {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
+ };
static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
+ static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
int offset_prev = base->cdw;
int j;
@@ -130,11 +138,23 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
base->emit(base, shader_addr >> 8);
base->emit(base, shader_addr >> 40);
/* write sh regs */
- for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
- base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
- /* - Gfx9ShRegBase */
- base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
- base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+ if (version == 11) {
+ for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
+ base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
+ /* - Gfx11ShRegBase */
+ base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
+ if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
+ bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
+
+ base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
+ }
+ } else {
+ for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
+ base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
+ /* - Gfx9ShRegBase */
+ base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
+ base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+ }
}
if (version == 10) {
/* mmCOMPUTE_PGM_RSRC3 */
diff --git a/lib/amdgpu/amd_shaders.c b/lib/amdgpu/amd_shaders.c
index 7672f67c0..cbea12827 100644
--- a/lib/amdgpu/amd_shaders.c
+++ b/lib/amdgpu/amd_shaders.c
@@ -150,10 +150,9 @@ int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, uint32_t family_id)
shader = &memcpy_cs_hang_slow_rv;
break;
case AMDGPU_FAMILY_NV:
+ default:
shader = &memcpy_cs_hang_slow_nv;
break;
- default:
- return -1;
}
memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
@@ -275,6 +274,85 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
};
+ /**
+ * shader main
+ * asic(GFX11)
+ * type(CS)
+ * s_version UC_VERSION_GFX11 | UC_VERSION_W64_BIT // 000000000000: B0802006
+ * s_set_inst_prefetch_distance 0x0003 // 000000000004: BF840003
+ * v_and_b32 v0, lit(0x000003ff), v0 // 000000000008: 360000FF 000003FF
+ * v_mov_b32 v1, s5 // 000000000010: 7E020205
+ * v_mov_b32 v2, s6 // 000000000014: 7E040206
+ * v_mov_b32 v3, s7 // 000000000018: 7E060207
+ * s_delay_alu instid0(VALU_DEP_4) // 00000000001C: BF870004
+ * v_lshl_add_u32 v4, s8, 6, v0 // 000000000020: D6460004 04010C08
+ * v_mov_b32 v0, s4 // 000000000028: 7E000204
+ * buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen // 00000000002C: E01C0000 80800004
+ * s_sendmsg sendmsg(MSG_DEALLOC_VGPRS, 0, 0) // 000000000034: BFB60003
+ * s_endpgm // 000000000038: BFB00000
+ */
+ static const uint32_t bufferclear_cs_shader_gfx11[] = {
+ 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
+ 0x7E020205, 0x7E040206, 0x7E060207, 0xBF870004,
+ 0xD6460004, 0x04010C08, 0x7E000204, 0xE01C0000,
+ 0x80800004, 0xBFB60003, 0xBFB00000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000
+ };
+
+ /**
+ * shader main
+ * asic(GFX11)
+ * type(CS)
+ * s_version UC_VERSION_GFX11 | UC_VERSION_W64_BIT // 000000000000: B0802006
+ * s_set_inst_prefetch_distance 0x0003 // 000000000004: BF840003
+ * v_and_b32 v0, lit(0x000003ff), v0 // 000000000008: 360000FF 000003FF
+ * s_delay_alu instid0(VALU_DEP_1) // 000000000010: BF870001
+ * v_lshl_add_u32 v1, s8, 6, v0 // 000000000014: D6460001 04010C08
+ * buffer_load_format_xyzw v[2:5], v1, s[0:3], 0 idxen // 00000000001C: E00C0000 80800201
+ * s_waitcnt vmcnt(0) // 000000000024: BF8903F7
+ * buffer_store_format_xyzw v[2:5], v1, s[4:7], 0 idxen // 000000000028: E01C0000 80810201
+ * s_sendmsg sendmsg(MSG_DEALLOC_VGPRS, 0, 0) // 000000000030: BFB60003
+ * s_endpgm // 000000000034: BFB00000
+ * end
+ */
+ static const uint32_t buffercopy_cs_shader_gfx11[] = {
+ 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
+ 0xBF870001, 0xD6460001, 0x04010C08, 0xE00C0000,
+ 0x80800201, 0xBF8903F7, 0xE01C0000, 0x80810201,
+ 0xBFB60003, 0xBFB00000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
+ 0xBF9F0000, 0xBF9F0000
+ };
+
uint32_t shader_size;
const uint32_t *shader;
@@ -286,6 +364,9 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
} else if (version == 10) {
shader = bufferclear_cs_shader_gfx10;
shader_size = sizeof(bufferclear_cs_shader_gfx10);
+ } else if (version == 11) {
+ shader = bufferclear_cs_shader_gfx11;
+ shader_size = sizeof(bufferclear_cs_shader_gfx11);
}
break;
case CS_BUFFERCOPY:
@@ -295,6 +376,9 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
} else if (version == 10) {
shader = buffercopy_cs_shader_gfx10;
shader_size = sizeof(buffercopy_cs_shader_gfx10);
+ } else if (version == 11) {
+ shader = buffercopy_cs_shader_gfx11;
+ shader_size = sizeof(buffercopy_cs_shader_gfx11);
}
break;
case CS_HANG:
--
2.25.1
More information about the igt-dev
mailing list