[igt-dev] [PATCH 3/3] lib/amdgpu: add shaders for gfx11

Luben Tuikov luben.tuikov at amd.com
Thu Sep 14 01:58:58 UTC 2023


On 2023-09-13 21:17, vitaly.prosyak at amd.com wrote:
> From: Jesse Zhang <jesse.zhang at amd.com>
> 
> add memcpy shader for gfx11
> 
> Cc: Luben Tuikov <luben.tuikov at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> 
> v2 : added disassembly comments (Vitaly)

Thanks for adding the disassembly comments!

Reviewed-by: Luben Tuikov <luben.tuikov at amd.com>

Regards,
Luben

> 
> Signed-off-by: Jesse zhang <jesse.zhang at amd.com>
> Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> ---
>  lib/amdgpu/amd_dispatch.c         | 21 ++++----
>  lib/amdgpu/amd_dispatch_helpers.c | 30 +++++++++--
>  lib/amdgpu/amd_shaders.c          | 88 ++++++++++++++++++++++++++++++-
>  3 files changed, 122 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
> index 075f897ac..f17240f5c 100644
> --- a/lib/amdgpu/amd_dispatch.c
> +++ b/lib/amdgpu/amd_dispatch.c
> @@ -254,6 +254,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  		base_cmd->emit(base_cmd,0x74fac);
>  	else if (version == 10)
>  		base_cmd->emit(base_cmd,0x1104bfac);
> +	else if (version == 11)
> +		base_cmd->emit(base_cmd,0x1003dfac);
>  
>  	/* Writes the UAV constant data to the SGPRs. */
>  	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
> @@ -265,6 +267,8 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  		base_cmd->emit(base_cmd, 0x74fac);
>  	else if (version == 10)
>  		base_cmd->emit(base_cmd, 0x1104bfac);
> +	else if (version == 11)
> +		base_cmd->emit(base_cmd, 0x1003dfac);
>  
>  	/* clear mmCOMPUTE_RESOURCE_LIMITS */
>  	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> @@ -506,21 +510,18 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
>  	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
>  	igt_assert_eq(r, 0);
>  	if (!info.available_rings)
> -		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
> +		igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>  
>  	version = info.hw_ip_version_major;
> -	if (version != 9 && version != 10) {
> -		printf("SKIP ... unsupported gfx version %d\n", version);
> +	if (version != 9 && version != 10 /*&& version != 11*/) {
> +		igt_info("SKIP ... unsupported gfx version %d\n", version);
>  		return;
>  	}
> -	//TODO IGT
> -	//if (version < 9)
> -	//	version = 9;
>  	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
>  		amdgpu_memcpy_dispatch_test(device_handle, ip_type,
>  					    ring_id,  version, 0);
>  		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type,
> -						      ring_id, version, AMDGPU_CTX_NO_RESET);
> +						      ring_id, version, AMDGPU_CTX_UNKNOWN_RESET);
>  
>  		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id,
>  					    version, 0);
> @@ -536,11 +537,11 @@ void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_ty
>  	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
>  	igt_assert_eq(r, 0);
>  	if (!info.available_rings)
> -		printf("SKIP ... as there's no graphics ring\n");
> +		igt_info("SKIP ... as there's no graphics ring\n");
>  
>  	version = info.hw_ip_version_major;
> -	if (version != 9 && version != 10) {
> -		printf("SKIP ... unsupported gfx version %d\n", version);
> +	if (version != 9 && version != 10 && version != 11) {
> +		igt_info("SKIP ... unsupported gfx version %d\n", version);
>  		return;
>  	}
>  	if (version < 9)
> diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
> index 8f06d841d..11ce8284a 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.c
> +++ b/lib/amdgpu/amd_dispatch_helpers.c
> @@ -118,8 +118,16 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
>  		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y,	0x00000001 },
>  		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z,	0x00000001 }
>  	};
> +	static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
> +		{0x2e12, 0x600C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x600C0041 },
> +		{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
> +		{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
> +		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> +		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> +	};
>  
>  	static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
> +	static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
>  	int offset_prev = base->cdw;
>  	int j;
>  
> @@ -130,11 +138,23 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
>  	base->emit(base, shader_addr >> 8);
>  	base->emit(base, shader_addr >> 40);
>  	/* write sh regs */
> -	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
> -		base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> -		/* - Gfx9ShRegBase */
> -		base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> -		base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> +	if (version == 11) {
> +		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
> +			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> +			/* - Gfx11ShRegBase */
> +			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> +			if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> +				bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
> +
> +			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> +		}
> +	} else {
> +		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
> +			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> +			/* - Gfx9ShRegBase */
> +			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> +			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> +		}
>  	}
>  	if (version == 10) {
>  		/* mmCOMPUTE_PGM_RSRC3 */
> diff --git a/lib/amdgpu/amd_shaders.c b/lib/amdgpu/amd_shaders.c
> index 7672f67c0..cbea12827 100644
> --- a/lib/amdgpu/amd_shaders.c
> +++ b/lib/amdgpu/amd_shaders.c
> @@ -150,10 +150,9 @@ int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, uint32_t family_id)
>  			shader = &memcpy_cs_hang_slow_rv;
>  			break;
>  		case AMDGPU_FAMILY_NV:
> +		default:
>  			shader = &memcpy_cs_hang_slow_nv;
>  			break;
> -		default:
> -			return -1;
>  	}
>  
>  	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
> @@ -275,6 +274,85 @@ int  amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
>  		0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
>  	};
>  
> +	/**
> +	 * shader main
> +	 * asic(GFX11)
> +	 * type(CS)
> +	 * s_version     UC_VERSION_GFX11 | UC_VERSION_W64_BIT   // 000000000000: B0802006
> +	 * s_set_inst_prefetch_distance  0x0003                  // 000000000004: BF840003
> +	 * v_and_b32     v0, lit(0x000003ff), v0                 // 000000000008: 360000FF 000003FF
> +	 * v_mov_b32     v1, s5                                  // 000000000010: 7E020205
> +	 * v_mov_b32     v2, s6                                  // 000000000014: 7E040206
> +	 * v_mov_b32     v3, s7                                  // 000000000018: 7E060207
> +	 * s_delay_alu   instid0(VALU_DEP_4)                     // 00000000001C: BF870004
> +	 * v_lshl_add_u32  v4, s8, 6, v0                         // 000000000020: D6460004 04010C08
> +	 * v_mov_b32     v0, s4                                  // 000000000028: 7E000204
> +	 * buffer_store_format_xyzw  v[0:3], v4, s[0:3], 0 idxen // 00000000002C: E01C0000 80800004
> +	 * s_sendmsg     sendmsg(MSG_DEALLOC_VGPRS, 0, 0)        // 000000000034: BFB60003
> +	 * s_endpgm                                              // 000000000038: BFB00000
> +	 */
> +	static const uint32_t bufferclear_cs_shader_gfx11[] = {
> +		0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
> +		0x7E020205, 0x7E040206, 0x7E060207, 0xBF870004,
> +		0xD6460004, 0x04010C08, 0x7E000204, 0xE01C0000,
> +		0x80800004, 0xBFB60003, 0xBFB00000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000
> +	};
> +
> +	/**
> +	 * shader main
> +	 * asic(GFX11)
> +	 * type(CS)
> +	 * s_version     UC_VERSION_GFX11 | UC_VERSION_W64_BIT   // 000000000000: B0802006
> +	 * s_set_inst_prefetch_distance  0x0003                  // 000000000004: BF840003
> +	 * v_and_b32     v0, lit(0x000003ff), v0                 // 000000000008: 360000FF 000003FF
> +	 * s_delay_alu   instid0(VALU_DEP_1)                     // 000000000010: BF870001
> +	 * v_lshl_add_u32  v1, s8, 6, v0                         // 000000000014: D6460001 04010C08
> +	 * buffer_load_format_xyzw  v[2:5], v1, s[0:3], 0 idxen  // 00000000001C: E00C0000 80800201
> +	 * s_waitcnt     vmcnt(0)                                // 000000000024: BF8903F7
> +	 * buffer_store_format_xyzw  v[2:5], v1, s[4:7], 0 idxen // 000000000028: E01C0000 80810201
> +	 * s_sendmsg     sendmsg(MSG_DEALLOC_VGPRS, 0, 0)        // 000000000030: BFB60003
> +	 * s_endpgm                                              // 000000000034: BFB00000
> +	 * end
> +	 */
> +	static const uint32_t buffercopy_cs_shader_gfx11[] = {
> +		0xB0802006, 0xBF840003, 0x360000FF, 0x000003FF,
> +		0xBF870001, 0xD6460001, 0x04010C08, 0xE00C0000,
> +		0x80800201, 0xBF8903F7, 0xE01C0000, 0x80810201,
> +		0xBFB60003, 0xBFB00000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000, 0xBF9F0000, 0xBF9F0000,
> +		0xBF9F0000, 0xBF9F0000
> +	};
> +
>  	uint32_t shader_size;
>  	const uint32_t *shader;
>  
> @@ -286,6 +364,9 @@ int  amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
>  			} else if (version == 10) {
>  				shader = bufferclear_cs_shader_gfx10;
>  				shader_size = sizeof(bufferclear_cs_shader_gfx10);
> +			} else if (version == 11) {
> +				shader = bufferclear_cs_shader_gfx11;
> +				shader_size = sizeof(bufferclear_cs_shader_gfx11);
>  			}
>  			break;
>  		case CS_BUFFERCOPY:
> @@ -295,6 +376,9 @@ int  amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
>  			} else if (version == 10) {
>  				shader = buffercopy_cs_shader_gfx10;
>  				shader_size = sizeof(buffercopy_cs_shader_gfx10);
> +			} else if (version == 11) {
> +				shader = buffercopy_cs_shader_gfx11;
> +				shader_size = sizeof(buffercopy_cs_shader_gfx11);
>  			}
>  			break;
>  		case CS_HANG:



More information about the igt-dev mailing list