[igt-dev] [PATCH 3/3] lib/amdgpu: add shaders for gfx11
Luben Tuikov
luben.tuikov at amd.com
Thu Sep 14 01:58:58 UTC 2023
On 2023-09-13 21:17, vitaly.prosyak at amd.com wrote:
> From: Jesse Zhang <jesse.zhang at amd.com>
>
> add memcpy shader for gfx11
>
> Cc: Luben Tuikov <luben.tuikov at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
>
> v2 : added disassembly comments (Vitaly)
Thanks for adding the disassembly comments!
Reviewed-by: Luben Tuikov <luben.tuikov at amd.com>
Regards,
Luben
>
> Signed-off-by: Jesse zhang <jesse.zhang at amd.com>
> Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> ---
> lib/amdgpu/amd_dispatch.c | 21 ++++----
> lib/amdgpu/amd_dispatch_helpers.c | 30 +++++++++--
> lib/amdgpu/amd_shaders.c | 88 ++++++++++++++++++++++++++++++-
> 3 files changed, 122 insertions(+), 17 deletions(-)
>
> diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
> index 075f897ac..f17240f5c 100644
> --- a/lib/amdgpu/amd_dispatch.c
> +++ b/lib/amdgpu/amd_dispatch.c
> @@ -254,6 +254,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> base_cmd->emit(base_cmd,0x74fac);
> else if (version == 10)
> base_cmd->emit(base_cmd,0x1104bfac);
> + else if (version == 11)
> + base_cmd->emit(base_cmd,0x1003dfac);
>
> /* Writes the UAV constant data to the SGPRs. */
> base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
> @@ -265,6 +267,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> base_cmd->emit(base_cmd, 0x74fac);
> else if (version == 10)
> base_cmd->emit(base_cmd, 0x1104bfac);
> + else if (version == 11)
> + base_cmd->emit(base_cmd, 0x1003dfac);
>
> /* clear mmCOMPUTE_RESOURCE_LIMITS */
> base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> @@ -506,21 +510,18 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
> r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
> igt_assert_eq(r, 0);
> if (!info.available_rings)
> - printf("SKIP ... as there's no ring for ip %d\n", ip_type);
> + igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>
> version = info.hw_ip_version_major;
> - if (version != 9 && version != 10) {
> - printf("SKIP ... unsupported gfx version %d\n", version);
> + if (version != 9 && version != 10 /*&& version != 11*/) {
> + igt_info("SKIP ... unsupported gfx version %d\n", version);
> return;
> }
> - //TODO IGT
> - //if (version < 9)
> - // version = 9;
> for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
> amdgpu_memcpy_dispatch_test(device_handle, ip_type,
> ring_id, version, 0);
> amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type,
> - ring_id, version, AMDGPU_CTX_NO_RESET);
> + ring_id, version, AMDGPU_CTX_UNKNOWN_RESET);
>
> amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id,
> version, 0);
> @@ -536,11 +537,11 @@ void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_ty
> r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
> igt_assert_eq(r, 0);
> if (!info.available_rings)
> - printf("SKIP ... as there's no graphics ring\n");
> + igt_info("SKIP ... as there's no graphics ring\n");
>
> version = info.hw_ip_version_major;
> - if (version != 9 && version != 10) {
> - printf("SKIP ... unsupported gfx version %d\n", version);
> + if (version != 9 && version != 10 && version != 11) {
> + igt_info("SKIP ... unsupported gfx version %d\n", version);
> return;
> }
> if (version < 9)
> diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
> index 8f06d841d..11ce8284a 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.c
> +++ b/lib/amdgpu/amd_dispatch_helpers.c
> @@ -118,8 +118,16 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
> {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> };
> + static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
> + {0x2e12, 0x600C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
> + {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
> + {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
> + {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> + {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> + };
>
> static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
> + static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
> int offset_prev = base->cdw;
> int j;
>
> @@ -130,11 +138,23 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
> base->emit(base, shader_addr >> 8);
> base->emit(base, shader_addr >> 40);
> /* write sh regs */
> - for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
> - base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> - /* - Gfx9ShRegBase */
> - base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> - base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> + if (version == 11) {
> + for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
> + base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> + /* - Gfx11ShRegBase */
> + base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> + if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> + bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
> +
> + base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> + }
> + } else {
> + for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
> + base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> + /* - Gfx9ShRegBase */
> + base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> + base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> + }
> }
> if (version == 10) {
> /* mmCOMPUTE_PGM_RSRC3 */
> diff --git a/lib/amdgpu/amd_shaders.c b/lib/amdgpu/amd_shaders.c
> index 7672f67c0..cbea12827 100644
> --- a/lib/amdgpu/amd_shaders.c
> +++ b/lib/amdgpu/amd_shaders.c
> @@ -150,10 +150,9 @@ int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, uint32_t family_id)
> shader = &memcpy_cs_hang_slow_rv;
> break;
> case AMDGPU_FAMILY_NV:
> + default:
> shader = &memcpy_cs_hang_slow_nv;
> break;
> - default:
> - return -1;
> }
>
> memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
> @@ -275,6 +274,85 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
> 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
> };
>
> + /**
> + * shader main
> + * asic(GFX11)
> + * type(CS)
> + * s_version UC_VERSION_GFX11 | UC_VERSION_W64_BIT // 000000000000: B0802006
> + * s_set_inst_prefetch_distance 0x0003 // 000000000004: BF840003
> + * v_and_b32 v0, lit(0x000003ff), v0 // 000000000008: 360000FF 000003FF
> + * v_mov_b32 v1, s5 // 000000000010: 7E020205
> + * v_mov_b32 v2, s6 // 000000000014: 7E040206
> + * v_mov_b32 v3, s7 // 000000000018: 7E060207
> + * s_delay_alu instid0(VALU_DEP_4) // 00000000001C: BF870004
> + * v_lshl_add_u32 v4, s8, 6, v0 // 000000000020: D6460004 04010C08
> + * v_mov_b32 v0, s4 // 000000000028: 7E000204
> + * buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen // 00000000002C: E01C0000 80800004
> + * s_sendmsg sendmsg(MSG_DEALLOC_VGPRS, 0, 0) // 000000000034: BFB60003
> + * s_endpgm // 000000000038: BFB00000
> + */
> + static const uint32_t bufferclear_cs_shader_gfx11[] = {
> + 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
> + 0x7E020205, 0x7E040206, 0x7E060207, 0xBF870004,
> + 0xD6460004, 0x04010C08, 0x7E000204, 0xE01C0000,
> + 0x80800004, 0xBFB60003, 0xBFB00000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000
> + };
> +
> + /**
> + * shader main
> + * asic(GFX11)
> + * type(CS)
> + * s_version UC_VERSION_GFX11 | UC_VERSION_W64_BIT // 000000000000: B0802006
> + * s_set_inst_prefetch_distance 0x0003 // 000000000004: BF840003
> + * v_and_b32 v0, lit(0x000003ff), v0 // 000000000008: 360000FF 000003FF
> + * s_delay_alu instid0(VALU_DEP_1) // 000000000010: BF870001
> + * v_lshl_add_u32 v1, s8, 6, v0 // 000000000014: D6460001 04010C08
> + * buffer_load_format_xyzw v[2:5], v1, s[0:3], 0 idxen // 00000000001C: E00C0000 80800201
> + * s_waitcnt vmcnt(0) // 000000000024: BF8903F7
> + * buffer_store_format_xyzw v[2:5], v1, s[4:7], 0 idxen // 000000000028: E01C0000 80810201
> + * s_sendmsg sendmsg(MSG_DEALLOC_VGPRS, 0, 0) // 000000000030: BFB60003
> + * s_endpgm // 000000000034: BFB00000
> + * end
> + */
> + static const uint32_t buffercopy_cs_shader_gfx11[] = {
> + 0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
> + 0xBF870001, 0xD6460001, 0x04010C08, 0xE00C0000,
> + 0x80800201, 0xBF8903F7, 0xE01C0000, 0x80810201,
> + 0xBFB60003, 0xBFB00000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> + 0xBF9F0000, 0xBF9F0000
> + };
> +
> uint32_t shader_size;
> const uint32_t *shader;
>
> @@ -286,6 +364,9 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
> } else if (version == 10) {
> shader = bufferclear_cs_shader_gfx10;
> shader_size = sizeof(bufferclear_cs_shader_gfx10);
> + } else if (version == 11) {
> + shader = bufferclear_cs_shader_gfx11;
> + shader_size = sizeof(bufferclear_cs_shader_gfx11);
> }
> break;
> case CS_BUFFERCOPY:
> @@ -295,6 +376,9 @@ int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
> } else if (version == 10) {
> shader = buffercopy_cs_shader_gfx10;
> shader_size = sizeof(buffercopy_cs_shader_gfx10);
> + } else if (version == 11) {
> + shader = buffercopy_cs_shader_gfx11;
> + shader_size = sizeof(buffercopy_cs_shader_gfx11);
> }
> break;
> case CS_HANG:
More information about the igt-dev
mailing list