[PATCH i-g-t] tests/amdgpu: add amd dispatch subtest
Jesse Zhang
jesse.zhang at amd.com
Tue Jul 2 10:58:44 UTC 2024
Add more cases to trigger gpu reset.
1. Using invalid user data to trigger a gpu reset.
2. Use invalid shadow program address to trigger gpu reset.
3. Use invalid shader settings to trigger a gpu reset.
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
---
lib/amdgpu/amd_dispatch.c | 35 +++++++++++++++++----------
lib/amdgpu/amd_dispatch.h | 5 ++--
lib/amdgpu/amd_dispatch_helpers.c | 39 ++++++++++++++++++++++++-------
lib/amdgpu/amd_dispatch_helpers.h | 9 ++++++-
tests/amdgpu/amd_dispatch.c | 31 +++++++++++++++++++++---
5 files changed, 93 insertions(+), 26 deletions(-)
diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
index df7d56ea7..47d32dd40 100644
--- a/lib/amdgpu/amd_dispatch.c
+++ b/lib/amdgpu/amd_dispatch.c
@@ -70,7 +70,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
amdgpu_dispatch_write_cumask(base_cmd, version);
/* Writes shader state to HW */
- amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+ amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
/* Write constant data */
/* Writes the UAV constant data to the SGPRs. */
@@ -162,7 +162,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
void
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
uint32_t ip_type, uint32_t ring, uint32_t version,
- int hang)
+ enum shader_error_type hang)
{
amdgpu_context_handle context_handle;
amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
@@ -202,7 +202,7 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
igt_assert_eq(r, 0);
memset(ptr_shader, 0, bo_shader_size);
- cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
+ cs_type = hang == SHADER_INVALID_SHADER ? CS_HANG : CS_BUFFERCOPY;
r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
igt_assert_eq(r, 0);
@@ -217,22 +217,28 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
&bo_dst, (void **)&ptr_dst,
&mc_address_dst, &va_dst);
igt_assert_eq(r, 0);
-
///TODO helper function for this bloc
amdgpu_dispatch_init(ip_type, base_cmd, version);
/* Issue commands to set cu mask used in current dispatch */
amdgpu_dispatch_write_cumask(base_cmd, version);
+
+ if (hang == SHADER_INVALID_PROGRAM_ADDR)
+ mc_address_shader = 0;
/* Writes shader state to HW */
- amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+ amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, hang);
memset(ptr_src, 0x55, bo_dst_size);
/* Write constant data */
/* Writes the texture resource constants data to the SGPRs */
base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
base_cmd->emit(base_cmd, 0x240);
- base_cmd->emit(base_cmd, mc_address_src);
-
- base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+ if (hang == SHADER_INVALID_USER_DATA) {
+ base_cmd->emit(base_cmd, mc_address_src);
+ base_cmd->emit(base_cmd, 0);
+ } else {
+ base_cmd->emit(base_cmd, mc_address_src);
+ base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+ }
base_cmd->emit(base_cmd, 0x400);
if (version == 9)
@@ -247,8 +253,13 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
/* Writes the UAV constant data to the SGPRs. */
base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
base_cmd->emit(base_cmd, 0x244);
- base_cmd->emit(base_cmd, mc_address_dst);
- base_cmd->emit(base_cmd, (mc_address_dst >> 32) | 0x100000);
+ if (hang == SHADER_INVALID_USER_DATA) {
+ base_cmd->emit(base_cmd, mc_address_src);
+ base_cmd->emit(base_cmd, 0);
+ } else {
+ base_cmd->emit(base_cmd, mc_address_src);
+ base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+ }
base_cmd->emit(base_cmd, 0x400);
if (version == 9)
base_cmd->emit(base_cmd, 0x74fac);
@@ -401,7 +412,7 @@ amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
amdgpu_dispatch_write_cumask(base_cmd, version);
/* Writes shader state to HW */
- amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+ amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
/* Write constant data */
/* Writes the texture resource constants data to the SGPRs */
@@ -536,7 +547,7 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
}
}
-void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, int hang)
+void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, enum shader_error_type hang)
{
int r;
struct drm_amdgpu_info_hw_ip info;
diff --git a/lib/amdgpu/amd_dispatch.h b/lib/amdgpu/amd_dispatch.h
index 4df8b1355..5f05ee693 100644
--- a/lib/amdgpu/amd_dispatch.h
+++ b/lib/amdgpu/amd_dispatch.h
@@ -25,15 +25,16 @@
#define AMD_DISPATCH_H
#include <amdgpu.h>
+#include "amd_dispatch_helpers.h"
void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle,
- uint32_t ip_type, int hang);
+ uint32_t ip_type, enum shader_error_type hang);
void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
uint32_t ip_type,
uint32_t ring,
uint32_t version,
- int hang);
+ enum shader_error_type hang);
void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
uint32_t ip_type);
diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
index b0a5f550e..e255fb2ab 100644
--- a/lib/amdgpu/amd_dispatch_helpers.c
+++ b/lib/amdgpu/amd_dispatch_helpers.c
@@ -114,7 +114,7 @@ int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base * base, uint32_t version
}
-int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version)
+int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version, enum shader_error_type hang)
{
static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
{0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
@@ -123,6 +123,7 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
{0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
{0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
};
+
static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
{0x2e12, 0x600C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
{0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
@@ -131,6 +132,14 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
{0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
};
+ static uint32_t bufferclear_cs_shader_invalid_registers[][2] = {
+ {0x2e12, 0xffffffff}, //{ mmCOMPUTE_PGM_RSRC1, 0x600C0041 },
+ {0x2e13, 0xffffffff}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
+ {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
+ {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
+ {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
+ };
+
static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
int offset_prev = base->cdw;
@@ -146,19 +155,33 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
if ((version == 11) || (version == 12)) {
for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
- /* - Gfx11ShRegBase */
- base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
- if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
- bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
+ if (hang == SHADER_INVALID_PROGRAM_SETTING) {
+ /* - Gfx11ShRegBase */
+ base->emit(base,bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
+ if (bufferclear_cs_shader_invalid_registers[j][0] ==0x2E12)
+ bufferclear_cs_shader_invalid_registers[j][1] &= ~(1<<29);
+
+ base->emit(base,bufferclear_cs_shader_invalid_registers[j][1]);
+ } else {
+ /* - Gfx11ShRegBase */
+ base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
+ if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
+ bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
- base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
+ base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
+ }
}
} else {
for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
/* - Gfx9ShRegBase */
- base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
- base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+ if (hang == SHADER_INVALID_PROGRAM_SETTING) {
+ base->emit(base, bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
+ base->emit(base, bufferclear_cs_shader_invalid_registers[j][1]);
+ } else {
+ base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
+ base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+ }
}
}
if (version == 10) {
diff --git a/lib/amdgpu/amd_dispatch_helpers.h b/lib/amdgpu/amd_dispatch_helpers.h
index a129e8e07..e01afb6fb 100644
--- a/lib/amdgpu/amd_dispatch_helpers.h
+++ b/lib/amdgpu/amd_dispatch_helpers.h
@@ -25,6 +25,13 @@
#define AMD_DISPATCH_HELPERS_H
#include <amdgpu.h>
+enum shader_error_type {
+ SHADER_EXECSUCESS,
+ SHADER_INVALID_SHADER,
+ SHADER_INVALID_PROGRAM_ADDR, /* COMPUTE_PGM */
+ SHADER_INVALID_PROGRAM_SETTING, /* COMPUTE_PGM_RSRC */
+ SHADER_INVALID_USER_DATA /* COMPUTE_USER_DATA */
+};
struct amdgpu_cmd_base;
@@ -32,6 +39,6 @@ int amdgpu_dispatch_init( uint32_t ip_type,struct amdgpu_cmd_base *base_cmd, uin
int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base *base_cmd, uint32_t version);
-int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version);
+int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version, enum shader_error_type);
#endif
diff --git a/tests/amdgpu/amd_dispatch.c b/tests/amdgpu/amd_dispatch.c
index 323284306..26e11cc84 100644
--- a/tests/amdgpu/amd_dispatch.c
+++ b/tests/amdgpu/amd_dispatch.c
@@ -10,6 +10,7 @@
#include <sys/sysmacros.h>
#include "lib/amdgpu/amd_memory.h"
#include "lib/amdgpu/amd_command_submission.h"
+#include "lib/amdgpu/amd_dispatch_helpers.h"
#include "lib/amdgpu/amd_dispatch.h"
static void
@@ -31,9 +32,9 @@ amdgpu_dispatch_hang_gfx(amdgpu_device_handle device_handle)
}
static void
-amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle)
+amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle, enum shader_error_type error)
{
- amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, 1);
+ amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, error);
}
static void
@@ -125,7 +126,31 @@ igt_main
igt_subtest_with_dynamic("amdgpu-dispatch-hang-test-compute-with-IP-COMPUTE") {
if (arr_cap[AMD_IP_COMPUTE]) {
igt_dynamic_f("amdgpu-dispatch-hang-test-compute")
- amdgpu_dispatch_hang_compute(device);
+ amdgpu_dispatch_hang_compute(device, SHADER_INVALID_SHADER);
+ }
+ }
+
+ igt_describe("Test GPU reset using a invalid shader program address to hang the job on compute ring");
+ igt_subtest_with_dynamic("amdgpu-dispatch-invalid-program-addr-test-compute-with-IP-COMPUTE") {
+ if (arr_cap[AMD_IP_COMPUTE]) {
+ igt_dynamic_f("amdgpu-dispatch-invalid-program-addr-test-compute")
+ amdgpu_dispatch_hang_compute(device, SHADER_INVALID_PROGRAM_ADDR);
+ }
+ }
+
+ igt_describe("Test GPU reset using a invalid shader program setting to hang the job on compute ring");
+ igt_subtest_with_dynamic("amdgpu-dispatch-invalid-setting-test-compute-with-IP-COMPUTE") {
+ if (arr_cap[AMD_IP_COMPUTE]) {
+ igt_dynamic_f("amdgpu-dispatch-invalid-setting-test-compute")
+ amdgpu_dispatch_hang_compute(device, SHADER_INVALID_PROGRAM_SETTING);
+ }
+ }
+
+ igt_describe("Test GPU reset using a invalid shader user data to hang the job on compute ring");
+ igt_subtest_with_dynamic("amdgpu-dispatch-invalid-user-data-test-compute-with-IP-COMPUTE") {
+ if (arr_cap[AMD_IP_COMPUTE]) {
+ igt_dynamic_f("amdgpu-dispatch-invalid-user-data-test-compute")
+ amdgpu_dispatch_hang_compute(device, SHADER_INVALID_USER_DATA);
}
}
--
2.25.1
More information about the igt-dev
mailing list