[PATCH i-g-t v2] tests/amdgpu: add amd dispatch subtest

vitaly prosyak vprosyak at amd.com
Wed Jul 3 03:13:53 UTC 2024


The change looks good to me.

Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>

On 2024-07-02 23:09, Jesse Zhang wrote:
> Add more cases to trigger gpu reset.
> 1. Using invalid user data to trigger a gpu reset.
> 2. Use invalid shadow program address to trigger gpu reset.
> 3. Use invalid shader settings to trigger a gpu reset.
>
> V2: Rename cases and map them for easier maintenance.(Vitaly)
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
> ---
>  lib/amdgpu/amd_dispatch.c         | 35 +++++++++++++++++----------
>  lib/amdgpu/amd_dispatch.h         |  5 ++--
>  lib/amdgpu/amd_dispatch_helpers.c | 39 ++++++++++++++++++++++++-------
>  lib/amdgpu/amd_dispatch_helpers.h |  9 ++++++-
>  tests/amdgpu/amd_dispatch.c       | 31 +++++++++++++++++++++---
>  5 files changed, 93 insertions(+), 26 deletions(-)
>
> diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
> index df7d56ea7..12ef7874c 100644
> --- a/lib/amdgpu/amd_dispatch.c
> +++ b/lib/amdgpu/amd_dispatch.c
> @@ -70,7 +70,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
>  	amdgpu_dispatch_write_cumask(base_cmd, version);
>  
>  	/* Writes shader state to HW */
> -	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> +	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
>  
>  	/* Write constant data */
>  	/* Writes the UAV constant data to the SGPRs. */
> @@ -162,7 +162,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
>  void
>  amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  			    uint32_t ip_type, uint32_t ring, uint32_t version,
> -			    int hang)
> +			    enum shader_error_type hang)
>  {
>  	amdgpu_context_handle context_handle;
>  	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
> @@ -202,7 +202,7 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  	igt_assert_eq(r, 0);
>  	memset(ptr_shader, 0, bo_shader_size);
>  
> -	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
> +	cs_type = hang == BACKEND_SE_GC_SHADER_INVALID_SHADER ? CS_HANG : CS_BUFFERCOPY;
>  	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
>  	igt_assert_eq(r, 0);
>  
> @@ -217,22 +217,28 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  					&bo_dst, (void **)&ptr_dst,
>  					&mc_address_dst, &va_dst);
>  	igt_assert_eq(r, 0);
> -
>  	///TODO helper function for this bloc
>  	amdgpu_dispatch_init(ip_type, base_cmd,  version);
>  	/*  Issue commands to set cu mask used in current dispatch */
>  	amdgpu_dispatch_write_cumask(base_cmd, version);
> +
> +	if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR)
> +		mc_address_shader = 0;
>  	/* Writes shader state to HW */
> -	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> +	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, hang);
>  	memset(ptr_src, 0x55, bo_dst_size);
>  
>  	/* Write constant data */
>  	/* Writes the texture resource constants data to the SGPRs */
>  	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
>  	base_cmd->emit(base_cmd, 0x240);
> -	base_cmd->emit(base_cmd, mc_address_src);
> -
> -	base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> +	if (hang == BACKEND_SE_GC_SHADER_INVALID_USER_DATA) {
> +		base_cmd->emit(base_cmd, mc_address_src);
> +		base_cmd->emit(base_cmd, 0);
> +	} else {
> +		base_cmd->emit(base_cmd, mc_address_src);
> +		base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> +	}
>  
>  	base_cmd->emit(base_cmd, 0x400);
>  	if (version == 9)
> @@ -247,8 +253,13 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  	/* Writes the UAV constant data to the SGPRs. */
>  	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
>  	base_cmd->emit(base_cmd, 0x244);
> -	base_cmd->emit(base_cmd, mc_address_dst);
> -	base_cmd->emit(base_cmd, (mc_address_dst >> 32) | 0x100000);
> +	if (hang == BACKEND_SE_GC_SHADER_INVALID_USER_DATA) {
> +		base_cmd->emit(base_cmd, mc_address_src);
> +		base_cmd->emit(base_cmd, 0);
> +	} else {
> +		base_cmd->emit(base_cmd, mc_address_src);
> +		base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> +	}
>  	base_cmd->emit(base_cmd, 0x400);
>  	if (version == 9)
>  		base_cmd->emit(base_cmd, 0x74fac);
> @@ -401,7 +412,7 @@ amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
>  	amdgpu_dispatch_write_cumask(base_cmd, version);
>  
>  	/* Writes shader state to HW */
> -	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> +	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
>  
>  	/* Write constant data */
>  	/* Writes the texture resource constants data to the SGPRs */
> @@ -536,7 +547,7 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
>  	}
>  }
>  
> -void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, int hang)
> +void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, enum shader_error_type hang)
>  {
>  	int r;
>  	struct drm_amdgpu_info_hw_ip info;
> diff --git a/lib/amdgpu/amd_dispatch.h b/lib/amdgpu/amd_dispatch.h
> index 4df8b1355..5f05ee693 100644
> --- a/lib/amdgpu/amd_dispatch.h
> +++ b/lib/amdgpu/amd_dispatch.h
> @@ -25,15 +25,16 @@
>  #define AMD_DISPATCH_H
>  
>  #include <amdgpu.h>
> +#include "amd_dispatch_helpers.h"
>  
>  void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle,
> -			      uint32_t ip_type, int hang);
> +			      uint32_t ip_type, enum shader_error_type hang);
>  
>  void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
>  					uint32_t ip_type,
>  					uint32_t ring,
>  					uint32_t version,
> -					int hang);
> +					enum shader_error_type hang);
>  
>  void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
>  				      uint32_t ip_type);
> diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
> index b0a5f550e..6b2e8f39f 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.c
> +++ b/lib/amdgpu/amd_dispatch_helpers.c
> @@ -114,7 +114,7 @@ int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base * base, uint32_t version
>   }
>  
>  
> -int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version)
> +int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version, enum shader_error_type hang)
>  {
>  	static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
>  		{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	0x000C0041 },
> @@ -123,6 +123,7 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
>  		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y,	0x00000001 },
>  		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z,	0x00000001 }
>  	};
> +
>  	static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
>  		{0x2e12, 0x600C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x600C0041 },
>  		{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
> @@ -131,6 +132,14 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
>  		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
>  	};
>  
> +	static uint32_t bufferclear_cs_shader_invalid_registers[][2] = {
> +		{0x2e12, 0xffffffff},	//{ mmCOMPUTE_PGM_RSRC1,	  0x600C0041 },
> +		{0x2e13, 0xffffffff},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
> +		{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
> +		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> +		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> +	};
> +
>  	static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
>  	static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
>  	int offset_prev = base->cdw;
> @@ -146,19 +155,33 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
>  	if ((version == 11) || (version == 12)) {
>  		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
>  			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> -			/* - Gfx11ShRegBase */
> -			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> -			if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> -				bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
> +			if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING) {
> +				/* - Gfx11ShRegBase */
> +				base->emit(base,bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
> +				if (bufferclear_cs_shader_invalid_registers[j][0] ==0x2E12)
> +					bufferclear_cs_shader_invalid_registers[j][1] &= ~(1<<29);
> +
> +				base->emit(base,bufferclear_cs_shader_invalid_registers[j][1]);
> +			} else {
> +				/* - Gfx11ShRegBase */
> +				base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> +				if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> +					bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
>  
> -			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> +				base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> +			}
>  		}
>  	} else {
>  		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
>  			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
>  			/* - Gfx9ShRegBase */
> -			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> -			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> +			if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING) {
> +				base->emit(base, bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
> +				base->emit(base, bufferclear_cs_shader_invalid_registers[j][1]);
> +			} else {
> +				base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> +				base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> +			}
>  		}
>  	}
>  	if (version == 10) {
> diff --git a/lib/amdgpu/amd_dispatch_helpers.h b/lib/amdgpu/amd_dispatch_helpers.h
> index a129e8e07..7ae88cd78 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.h
> +++ b/lib/amdgpu/amd_dispatch_helpers.h
> @@ -25,6 +25,13 @@
>  #define AMD_DISPATCH_HELPERS_H
>  
>  #include <amdgpu.h>
> +enum  shader_error_type {
> +	BACKEND_SE_GC_SHADER_EXECSUCESS,
> +	BACKEND_SE_GC_SHADER_INVALID_SHADER,
> +	BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR,    /* COMPUTE_PGM */
> +	BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING, /* COMPUTE_PGM_RSRC */
> +	BACKEND_SE_GC_SHADER_INVALID_USER_DATA /* COMPUTE_USER_DATA */
> +};
>  
>  struct amdgpu_cmd_base;
>  
> @@ -32,6 +39,6 @@ int amdgpu_dispatch_init( uint32_t ip_type,struct amdgpu_cmd_base *base_cmd, uin
>  
>  int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base *base_cmd, uint32_t version);
>  
> -int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version);
> +int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version, enum shader_error_type);
>  
>  #endif
> diff --git a/tests/amdgpu/amd_dispatch.c b/tests/amdgpu/amd_dispatch.c
> index 323284306..00564903f 100644
> --- a/tests/amdgpu/amd_dispatch.c
> +++ b/tests/amdgpu/amd_dispatch.c
> @@ -10,6 +10,7 @@
>  #include <sys/sysmacros.h>
>  #include "lib/amdgpu/amd_memory.h"
>  #include "lib/amdgpu/amd_command_submission.h"
> +#include "lib/amdgpu/amd_dispatch_helpers.h"
>  #include "lib/amdgpu/amd_dispatch.h"
>  
>  static void
> @@ -31,9 +32,9 @@ amdgpu_dispatch_hang_gfx(amdgpu_device_handle device_handle)
>  }
>  
>  static void
> -amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle)
> +amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle, enum shader_error_type error)
>  {
> -	amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, 1);
> +	amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, error);
>  }
>  
>  static void
> @@ -125,7 +126,31 @@ igt_main
>  	igt_subtest_with_dynamic("amdgpu-dispatch-hang-test-compute-with-IP-COMPUTE") {
>  		if (arr_cap[AMD_IP_COMPUTE]) {
>  			igt_dynamic_f("amdgpu-dispatch-hang-test-compute")
> -			amdgpu_dispatch_hang_compute(device);
> +			amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_SHADER);
> +		}
> +	}
> +
> +	igt_describe("Test GPU reset using a invalid shader program address to hang the job on compute ring");
> +	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-program-addr-test-compute-with-IP-COMPUTE") {
> +		if (arr_cap[AMD_IP_COMPUTE]) {
> +			igt_dynamic_f("amdgpu-dispatch-invalid-program-addr-test-compute")
> +			amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR);
> +		}
> +	}
> +
> +	igt_describe("Test GPU reset using a invalid shader program setting to hang the job on compute ring");
> +	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-setting-test-compute-with-IP-COMPUTE") {
> +		if (arr_cap[AMD_IP_COMPUTE]) {
> +			igt_dynamic_f("amdgpu-dispatch-invalid-setting-test-compute")
> +			amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING);
> +		}
> +	}
> +
> +	igt_describe("Test GPU reset using a invalid shader user data to hang the job on compute ring");
> +	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-user-data-test-compute-with-IP-COMPUTE") {
> +		if (arr_cap[AMD_IP_COMPUTE]) {
> +			igt_dynamic_f("amdgpu-dispatch-invalid-user-data-test-compute")
> +			amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_USER_DATA);
>  		}
>  	}
>  


More information about the igt-dev mailing list