[PATCH i-g-t v2] tests/amdgpu: add KGQ test in queue reset
Jesse.zhang@amd.com
jesse.zhang at amd.com
Fri Aug 9 02:42:51 UTC 2024
Enhance the queue reset, add KGQ test.
V2:
Some improvements regarding the selection of testing ring (Vitaly)
Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Christian Koenig <christian.koenig at amd.com>
Cc: Vitaly Prosyak <vitaly.prosyak at amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
---
lib/amdgpu/amd_ip_blocks.h | 7 ++
tests/amdgpu/amd_queue_reset.c | 145 ++++++++++++++-------------------
2 files changed, 69 insertions(+), 83 deletions(-)
diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
index 7fd883608..1b39d9945 100644
--- a/lib/amdgpu/amd_ip_blocks.h
+++ b/lib/amdgpu/amd_ip_blocks.h
@@ -45,6 +45,13 @@ enum cmd_error_type {
BACKEND_SE_GC_SHADER_INVALID_USER_DATA /* COMPUTE_USER_DATA */
};
+struct dynamic_test{
+ enum cmd_error_type test;
+ const char *name;
+ const char *describe;
+};
+#define for_each_test(t, T) for(typeof(*T) *t = T; t->name; t++)
+
/* aux struct to hold misc parameters for convenience to maintain */
struct amdgpu_ring_context {
diff --git a/tests/amdgpu/amd_queue_reset.c b/tests/amdgpu/amd_queue_reset.c
index 9ea0a1f24..4ae69d6c2 100644
--- a/tests/amdgpu/amd_queue_reset.c
+++ b/tests/amdgpu/amd_queue_reset.c
@@ -846,25 +846,36 @@ free_contexts(amdgpu_device_handle device, amdgpu_context_handle *p_contexts,
}
}
-/* TODO add logic to iterate for all */
static bool
-get_next_rings(unsigned int ring_begin, unsigned int available_rings,
- unsigned int *next_ring, unsigned int *next_next_ring)
+get_next_rings(unsigned int ring_begin, struct drm_amdgpu_info_hw_ip info[],
+ unsigned int *good_job_ring, unsigned int *bad_job_ring, unsigned int order)
{
- bool ret = false;
unsigned int ring_id;
- for (ring_id = ring_begin; (1 << ring_id) & available_rings; ring_id++) {
- *next_ring = ring_id;
- *next_next_ring = ring_id + 1;
+ /* Check good job ring is available. By default good job run on compute ring */
+ for (ring_id = ring_begin; (1 << ring_id) & info[0].available_rings; ring_id++) {
+ if ((1 << *good_job_ring) & info[0].available_rings) {
+ *good_job_ring = ring_id;
+ /* check bad job ring is available */
+ for (ring_id = ring_begin; (1 << ring_id) & info[order].available_rings; ring_id++) {
+ /* if order is 0, bad job run on compute ring,
+ * It should skip good ring and find next ring to run bad job.
+ */
+ if (!order)
+ *bad_job_ring = *good_job_ring + 1;
+ else
+ *bad_job_ring = ring_id;
+ if ((1 << *bad_job_ring) & info[order].available_rings) {
+ return true;
+ }
+ }
- if ((*next_ring & available_rings) && (*next_next_ring & available_rings)) {
- ret = true;
- break;
}
}
- return ret;
+
+ return false;
}
+
igt_main
{
char cmdline[2048];
@@ -878,7 +889,7 @@ igt_main
posix_spawn_file_actions_t action;
amdgpu_device_handle device;
struct amdgpu_gpu_info gpu_info = {0};
- struct drm_amdgpu_info_hw_ip info = {0};
+ struct drm_amdgpu_info_hw_ip info[2] = {0};
int fd = -1;
int fd_shm = -1;
struct shmbuf *sh_mem = NULL;
@@ -888,7 +899,7 @@ igt_main
unsigned int ring_id_good = 0;
unsigned int ring_id_bad = 1;
- enum amd_ip_block_type ip_test = AMD_IP_COMPUTE;
+ enum amd_ip_block_type ip_tests[2] = {AMD_IP_COMPUTE/*keep first*/, AMD_IP_GFX};
enum amd_ip_block_type ip_background = AMD_IP_COMPUTE;
amdgpu_context_handle *arr_context_handle = NULL;
@@ -897,14 +908,27 @@ igt_main
* which are shared between child processes ( test/monitor/main and
* separate for background
*/
- unsigned int arr_err[] = {
- CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
- CMD_STREAM_EXEC_INVALID_OPCODE,
- //CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC,TODO not job timeout, debug why for n31
- //CMD_STREAM_TRANS_BAD_REG_ADDRESS, TODO amdgpu: device lost from bus! for n31
- BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR,
- BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING,
- BACKEND_SE_GC_SHADER_INVALID_USER_DATA
+ struct dynamic_test arr_err[] = {
+ {CMD_STREAM_EXEC_INVALID_PACKET_LENGTH, "CMD_STREAM_EXEC_INVALID_PACKET_LENGTH",
+ "Stressful-and-multiple-cs-of-bad and good length-operations-using-multiple-processes"},
+ {CMD_STREAM_EXEC_INVALID_OPCODE, "CMD_STREAM_EXEC_INVALID_OPCODE",
+ "Stressful-and-multiple-cs-of-bad and good opcode-operations-using-multiple-processes"},
+ //TODO not job timeout, debug why for n31.
+ //{CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC,"CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC",
+ // "Stressful-and-multiple-cs-of-bad and good mem-sync-operations-using-multiple-processes"},
+ //TODO amdgpu: device lost from bus! for n31
+ //{CMD_STREAM_TRANS_BAD_REG_ADDRESS,"CMD_STREAM_TRANS_BAD_REG_ADDRESS",
+ // "Stressful-and-multiple-cs-of-bad and good reg-operations-using-multiple-processes"},
+ {BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR, "BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR",
+ "Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes"},
+ //TODO KGQ cannot revocer by queue reset, it maybe need a fw bugfix on naiv31
+ //{BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING,"BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING",
+ // "Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes"},
+ {BACKEND_SE_GC_SHADER_INVALID_USER_DATA, "BACKEND_SE_GC_SHADER_INVALID_USER_DATA",
+ "Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes"},
+ {BACKEND_SE_GC_SHADER_INVALID_SHADER, "BACKEND_SE_GC_SHADER_INVALID_SHADER",
+ "Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes"},
+ {}
};
int const_num_of_tests;
@@ -921,7 +945,7 @@ igt_main
if (is_run_subtest_parameter_found(argc, argv))
const_num_of_tests = 1;
else
- const_num_of_tests = ARRAY_SIZE(arr_err);
+ const_num_of_tests = sizeof(arr_err)/sizeof(struct dynamic_test) * ARRAY_SIZE(ip_tests);
if (!is_background_parameter_found(argc, argv)) {
add_background_parameter(&argc, argv);
@@ -943,8 +967,11 @@ igt_main
r = amdgpu_query_gpu_info(device, &gpu_info);
igt_assert_eq(r, 0);
- r = amdgpu_query_hw_ip_info(device, ip_test, 0, &info);
- igt_assert_eq(r, 0);
+ for (int i = 0; i < ARRAY_SIZE(ip_tests); i++) {
+ r = amdgpu_query_hw_ip_info(device, ip_tests[i], 0, &info[i]);
+ igt_assert_eq(r, 0);
+ }
+
r = setup_amdgpu_ip_blocks(major, minor, &gpu_info, device);
igt_assert_eq(r, 0);
@@ -959,68 +986,20 @@ igt_main
igt_require(sh_mem != NULL);
run_all(device, arr_context_handle,
- process, sh_mem, const_num_of_tests, info.hw_ip_version_major,
+ process, sh_mem, const_num_of_tests, info[0].hw_ip_version_major,
&monitor_child, &test_child);
}
- igt_describe("Stressful-and-multiple-cs-of-bad and good length-operations-using-multiple-processes");
- igt_subtest("amdgpu-compute-CMD_STREAM_EXEC_INVALID_PACKET_LENGTH") {
- if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- set_next_test_to_run(sh_mem, CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
- ip_background, ip_test, ring_id_good, ring_id_bad);
- }
- }
-
- igt_describe("Stressful-and-multiple-cs-of-bad and good opcode-operations-using-multiple-processes");
- igt_subtest("amdgpu-compute-CMD_STREAM_EXEC_INVALID_OPCODE") {
- if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- set_next_test_to_run(sh_mem, CMD_STREAM_EXEC_INVALID_OPCODE,
- ip_background, ip_test, ring_id_good, ring_id_bad);
- }
- }
-
- /* TODO not job timeout, debug why for nv32
- *igt_describe("Stressful-and-multiple-cs-of-bad and good mem-sync-operations-using-multiple-processes");
- *igt_subtest_with_dynamic("amdgpu-compute-CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC") {
- * if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- * igt_dynamic_f("amdgpu-compute-CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC")
- * set_next_test_to_run(sh_mem, CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC,
- * ip_background, ip_test, ring_id_good, ring_id_bad);
- * }
- */
-
- /* TODO amdgpu: device lost from bus! for nv32
- *igt_describe("Stressful-and-multiple-cs-of-bad and good reg-operations-using-multiple-processes");
- *igt_subtest_with_dynamic("amdgpu-compute-CMD_STREAM_TRANS_BAD_REG_ADDRESS") {
- * if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- * igt_dynamic_f("amdgpu-compute-CMD_STREAM_TRANS_BAD_MEM_ADDRESS")
- * set_next_test_to_run(sh_mem, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
- * ip_background, ip_test, ring_id_good, ring_id_bad);
- * }
- */
-
- //amdgpu_ring_soft_recovery
- igt_describe("Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes");
- igt_subtest("Handful-by-soft-recovery-amdgpu-compute-BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR") {
- if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- set_next_test_to_run(sh_mem, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR,
- ip_background, ip_test, ring_id_good, ring_id_bad);
- }
- }
-
- igt_describe("Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes");
- igt_subtest("amdgpu-compute-BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING") {
- if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- set_next_test_to_run(sh_mem, BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING,
- ip_background, ip_test, ring_id_good, ring_id_bad);
- }
- }
-
- igt_describe("Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes");
- igt_subtest("amdgpu-compute-BACKEND_SE_GC_SHADER_INVALID_USER_DATA") {
- if (arr_cap[ip_test] && get_next_rings(ring_id_good, info.available_rings, &ring_id_good, &ring_id_bad)) {
- set_next_test_to_run(sh_mem, BACKEND_SE_GC_SHADER_INVALID_USER_DATA,
- ip_background, ip_test, ring_id_good, ring_id_bad);
+ for (int i = 0; i < ARRAY_SIZE(ip_tests); i++) {
+ for (struct dynamic_test *it = &arr_err[0]; it->name; it++) {
+ igt_describe("Stressful-and-multiple-cs-of-bad and good length-operations-using-multiple-processes");
+ igt_subtest_with_dynamic_f("amdgpu-%s-%s", ip_tests[i] == AMD_IP_COMPUTE ? "compute":"gfx", it->name) {
+ if (arr_cap[ip_tests[i]] && get_next_rings(ring_id_good, info, &ring_id_good, &ring_id_bad, i)) {
+ igt_dynamic_f("amdgpu-%s", it->name);
+ set_next_test_to_run(sh_mem, it->test,
+ ip_background, ip_tests[i], ring_id_good, ring_id_bad);
+ }
+ }
}
}
--
2.25.1
More information about the igt-dev
mailing list