[PATCH i-g-t v2] tests/amdgpu: add amd dispatch subtest
vitaly prosyak
vprosyak at amd.com
Wed Jul 3 03:13:53 UTC 2024
The change looks good to me.
Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
On 2024-07-02 23:09, Jesse Zhang wrote:
> Add more cases to trigger gpu reset.
> 1. Using invalid user data to trigger a gpu reset.
> 2. Use invalid shadow program address to trigger gpu reset.
> 3. Use invalid shader settings to trigger a gpu reset.
>
> V2: Rename cases and map them for easier maintenance.(Vitaly)
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
> ---
> lib/amdgpu/amd_dispatch.c | 35 +++++++++++++++++----------
> lib/amdgpu/amd_dispatch.h | 5 ++--
> lib/amdgpu/amd_dispatch_helpers.c | 39 ++++++++++++++++++++++++-------
> lib/amdgpu/amd_dispatch_helpers.h | 9 ++++++-
> tests/amdgpu/amd_dispatch.c | 31 +++++++++++++++++++++---
> 5 files changed, 93 insertions(+), 26 deletions(-)
>
> diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
> index df7d56ea7..12ef7874c 100644
> --- a/lib/amdgpu/amd_dispatch.c
> +++ b/lib/amdgpu/amd_dispatch.c
> @@ -70,7 +70,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
> amdgpu_dispatch_write_cumask(base_cmd, version);
>
> /* Writes shader state to HW */
> - amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> + amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
>
> /* Write constant data */
> /* Writes the UAV constant data to the SGPRs. */
> @@ -162,7 +162,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
> void
> amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> uint32_t ip_type, uint32_t ring, uint32_t version,
> - int hang)
> + enum shader_error_type hang)
> {
> amdgpu_context_handle context_handle;
> amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
> @@ -202,7 +202,7 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> igt_assert_eq(r, 0);
> memset(ptr_shader, 0, bo_shader_size);
>
> - cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
> + cs_type = hang == BACKEND_SE_GC_SHADER_INVALID_SHADER ? CS_HANG : CS_BUFFERCOPY;
> r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
> igt_assert_eq(r, 0);
>
> @@ -217,22 +217,28 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> &bo_dst, (void **)&ptr_dst,
> &mc_address_dst, &va_dst);
> igt_assert_eq(r, 0);
> -
> ///TODO helper function for this bloc
> amdgpu_dispatch_init(ip_type, base_cmd, version);
> /* Issue commands to set cu mask used in current dispatch */
> amdgpu_dispatch_write_cumask(base_cmd, version);
> +
> + if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR)
> + mc_address_shader = 0;
> /* Writes shader state to HW */
> - amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> + amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, hang);
> memset(ptr_src, 0x55, bo_dst_size);
>
> /* Write constant data */
> /* Writes the texture resource constants data to the SGPRs */
> base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
> base_cmd->emit(base_cmd, 0x240);
> - base_cmd->emit(base_cmd, mc_address_src);
> -
> - base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> + if (hang == BACKEND_SE_GC_SHADER_INVALID_USER_DATA) {
> + base_cmd->emit(base_cmd, mc_address_src);
> + base_cmd->emit(base_cmd, 0);
> + } else {
> + base_cmd->emit(base_cmd, mc_address_src);
> + base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> + }
>
> base_cmd->emit(base_cmd, 0x400);
> if (version == 9)
> @@ -247,8 +253,13 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> /* Writes the UAV constant data to the SGPRs. */
> base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
> base_cmd->emit(base_cmd, 0x244);
> - base_cmd->emit(base_cmd, mc_address_dst);
> - base_cmd->emit(base_cmd, (mc_address_dst >> 32) | 0x100000);
> + if (hang == BACKEND_SE_GC_SHADER_INVALID_USER_DATA) {
> + base_cmd->emit(base_cmd, mc_address_src);
> + base_cmd->emit(base_cmd, 0);
> + } else {
> + base_cmd->emit(base_cmd, mc_address_src);
> + base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
> + }
> base_cmd->emit(base_cmd, 0x400);
> if (version == 9)
> base_cmd->emit(base_cmd, 0x74fac);
> @@ -401,7 +412,7 @@ amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
> amdgpu_dispatch_write_cumask(base_cmd, version);
>
> /* Writes shader state to HW */
> - amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
> + amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
>
> /* Write constant data */
> /* Writes the texture resource constants data to the SGPRs */
> @@ -536,7 +547,7 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
> }
> }
>
> -void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, int hang)
> +void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, enum shader_error_type hang)
> {
> int r;
> struct drm_amdgpu_info_hw_ip info;
> diff --git a/lib/amdgpu/amd_dispatch.h b/lib/amdgpu/amd_dispatch.h
> index 4df8b1355..5f05ee693 100644
> --- a/lib/amdgpu/amd_dispatch.h
> +++ b/lib/amdgpu/amd_dispatch.h
> @@ -25,15 +25,16 @@
> #define AMD_DISPATCH_H
>
> #include <amdgpu.h>
> +#include "amd_dispatch_helpers.h"
>
> void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle,
> - uint32_t ip_type, int hang);
> + uint32_t ip_type, enum shader_error_type hang);
>
> void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
> uint32_t ip_type,
> uint32_t ring,
> uint32_t version,
> - int hang);
> + enum shader_error_type hang);
>
> void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
> uint32_t ip_type);
> diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
> index b0a5f550e..6b2e8f39f 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.c
> +++ b/lib/amdgpu/amd_dispatch_helpers.c
> @@ -114,7 +114,7 @@ int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base * base, uint32_t version
> }
>
>
> -int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version)
> +int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version, enum shader_error_type hang)
> {
> static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
> {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
> @@ -123,6 +123,7 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
> {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> };
> +
> static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
> {0x2e12, 0x600C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
> {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
> @@ -131,6 +132,14 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
> {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> };
>
> + static uint32_t bufferclear_cs_shader_invalid_registers[][2] = {
> + {0x2e12, 0xffffffff}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
> + {0x2e13, 0xffffffff}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
> + {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
> + {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
> + {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
> + };
> +
> static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
> static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
> int offset_prev = base->cdw;
> @@ -146,19 +155,33 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
> if ((version == 11) || (version == 12)) {
> for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
> base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> - /* - Gfx11ShRegBase */
> - base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> - if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> - bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
> + if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING) {
> + /* - Gfx11ShRegBase */
> + base->emit(base,bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
> + if (bufferclear_cs_shader_invalid_registers[j][0] ==0x2E12)
> + bufferclear_cs_shader_invalid_registers[j][1] &= ~(1<<29);
> +
> + base->emit(base,bufferclear_cs_shader_invalid_registers[j][1]);
> + } else {
> + /* - Gfx11ShRegBase */
> + base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
> + if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
> + bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
>
> - base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> + base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
> + }
> }
> } else {
> for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
> base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
> /* - Gfx9ShRegBase */
> - base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> - base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> + if (hang == BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING) {
> + base->emit(base, bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
> + base->emit(base, bufferclear_cs_shader_invalid_registers[j][1]);
> + } else {
> + base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
> + base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
> + }
> }
> }
> if (version == 10) {
> diff --git a/lib/amdgpu/amd_dispatch_helpers.h b/lib/amdgpu/amd_dispatch_helpers.h
> index a129e8e07..7ae88cd78 100644
> --- a/lib/amdgpu/amd_dispatch_helpers.h
> +++ b/lib/amdgpu/amd_dispatch_helpers.h
> @@ -25,6 +25,13 @@
> #define AMD_DISPATCH_HELPERS_H
>
> #include <amdgpu.h>
> +enum shader_error_type {
> + BACKEND_SE_GC_SHADER_EXECSUCESS,
> + BACKEND_SE_GC_SHADER_INVALID_SHADER,
> + BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR, /* COMPUTE_PGM */
> + BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING, /* COMPUTE_PGM_RSRC */
> + BACKEND_SE_GC_SHADER_INVALID_USER_DATA /* COMPUTE_USER_DATA */
> +};
>
> struct amdgpu_cmd_base;
>
> @@ -32,6 +39,6 @@ int amdgpu_dispatch_init( uint32_t ip_type,struct amdgpu_cmd_base *base_cmd, uin
>
> int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base *base_cmd, uint32_t version);
>
> -int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version);
> +int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version, enum shader_error_type);
>
> #endif
> diff --git a/tests/amdgpu/amd_dispatch.c b/tests/amdgpu/amd_dispatch.c
> index 323284306..00564903f 100644
> --- a/tests/amdgpu/amd_dispatch.c
> +++ b/tests/amdgpu/amd_dispatch.c
> @@ -10,6 +10,7 @@
> #include <sys/sysmacros.h>
> #include "lib/amdgpu/amd_memory.h"
> #include "lib/amdgpu/amd_command_submission.h"
> +#include "lib/amdgpu/amd_dispatch_helpers.h"
> #include "lib/amdgpu/amd_dispatch.h"
>
> static void
> @@ -31,9 +32,9 @@ amdgpu_dispatch_hang_gfx(amdgpu_device_handle device_handle)
> }
>
> static void
> -amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle)
> +amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle, enum shader_error_type error)
> {
> - amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, 1);
> + amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, error);
> }
>
> static void
> @@ -125,7 +126,31 @@ igt_main
> igt_subtest_with_dynamic("amdgpu-dispatch-hang-test-compute-with-IP-COMPUTE") {
> if (arr_cap[AMD_IP_COMPUTE]) {
> igt_dynamic_f("amdgpu-dispatch-hang-test-compute")
> - amdgpu_dispatch_hang_compute(device);
> + amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_SHADER);
> + }
> + }
> +
> + igt_describe("Test GPU reset using a invalid shader program address to hang the job on compute ring");
> + igt_subtest_with_dynamic("amdgpu-dispatch-invalid-program-addr-test-compute-with-IP-COMPUTE") {
> + if (arr_cap[AMD_IP_COMPUTE]) {
> + igt_dynamic_f("amdgpu-dispatch-invalid-program-addr-test-compute")
> + amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR);
> + }
> + }
> +
> + igt_describe("Test GPU reset using a invalid shader program setting to hang the job on compute ring");
> + igt_subtest_with_dynamic("amdgpu-dispatch-invalid-setting-test-compute-with-IP-COMPUTE") {
> + if (arr_cap[AMD_IP_COMPUTE]) {
> + igt_dynamic_f("amdgpu-dispatch-invalid-setting-test-compute")
> + amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING);
> + }
> + }
> +
> + igt_describe("Test GPU reset using a invalid shader user data to hang the job on compute ring");
> + igt_subtest_with_dynamic("amdgpu-dispatch-invalid-user-data-test-compute-with-IP-COMPUTE") {
> + if (arr_cap[AMD_IP_COMPUTE]) {
> + igt_dynamic_f("amdgpu-dispatch-invalid-user-data-test-compute")
> + amdgpu_dispatch_hang_compute(device, BACKEND_SE_GC_SHADER_INVALID_USER_DATA);
> }
> }
>
More information about the igt-dev
mailing list