[PATCH i-g-t v4] tests/amd_queue_reset: add sdma test in queue reset
Jesse.zhang@amd.com
jesse.zhang at amd.com
Mon Sep 2 09:48:55 UTC 2024
To enhance queue reset, add sdma ip test.
v4: 1.add sdma support flag,
2.add a function about calcuating num of tests,
3.remove !strstr(it->name, "CMD").(Vitaly)
4.temporarily ignore memory page has hardware error (EHWPOISON)
Cc: Vitaly Prosyak <vitaly.prosyak at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Christian Koenig <christian.koenig at amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
---
lib/amdgpu/amd_command_submission.c | 2 +-
lib/amdgpu/amd_ip_blocks.h | 1 +
tests/amdgpu/amd_queue_reset.c | 43 +++++++++++++++++++++--------
3 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/lib/amdgpu/amd_command_submission.c b/lib/amdgpu/amd_command_submission.c
index a0c72fb47..025e8bb7a 100644
--- a/lib/amdgpu/amd_command_submission.c
+++ b/lib/amdgpu/amd_command_submission.c
@@ -77,7 +77,7 @@ int amdgpu_test_exec_cs_helper(amdgpu_device_handle device, unsigned int ip_type
if (expect_failure)
igt_info("amdgpu_cs_submit %d PID %d\n", r, getpid());
else {
- if (r != -ECANCELED && r != -ENODATA) /* we allow ECANCELED or ENODATA for good jobs temporally */
+ if (r != -ECANCELED && r != -ENODATA && r != -EHWPOISON) /* we allow ECANCELED, ENODATA or -EHWPOISON for good jobs temporally */
igt_assert_eq(r, 0);
}
diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
index 3e729f4c0..6a8f97d24 100644
--- a/lib/amdgpu/amd_ip_blocks.h
+++ b/lib/amdgpu/amd_ip_blocks.h
@@ -62,6 +62,7 @@ struct dynamic_test{
const char *name;
const char *describe;
struct asic_id_filter exclude_filter[_MAX_NUM_ASIC_ID_EXCLUDE_FILTER];
+ bool support_sdma;
};
#define for_each_test(t, T) for(typeof(*T) *t = T; t->name; t++)
diff --git a/tests/amdgpu/amd_queue_reset.c b/tests/amdgpu/amd_queue_reset.c
index 537f653f9..b257ec3c0 100644
--- a/tests/amdgpu/amd_queue_reset.c
+++ b/tests/amdgpu/amd_queue_reset.c
@@ -1022,6 +1022,23 @@ reset_rings_numbers(unsigned int *ring_id_good, unsigned int *ring_id_bad,
*ring_id_job_bad = 0;
}
+static int
+get_num_of_tests(struct dynamic_test *arr_err, enum amd_ip_block_type *ip_tests, int num_ip)
+{
+ int i, cnt=0;
+
+ for (i = 0; i < num_ip; i++) {
+ for (struct dynamic_test *it = arr_err; it->name; it++) {
+ if(*ip_tests == AMD_IP_DMA && (!it->support_sdma))
+ continue;
+ cnt++;
+ }
+ ip_tests++;
+ }
+
+ return cnt;
+}
+
igt_main
{
char cmdline[2048];
@@ -1035,7 +1052,6 @@ igt_main
posix_spawn_file_actions_t action;
amdgpu_device_handle device;
struct amdgpu_gpu_info gpu_info = {0};
- struct drm_amdgpu_info_hw_ip info[2] = {0};
int fd = -1;
int fd_shm = -1;
struct shmbuf *sh_mem = NULL;
@@ -1047,8 +1063,9 @@ igt_main
unsigned int ring_id_job_good;
unsigned int ring_id_job_bad;
- enum amd_ip_block_type ip_tests[2] = {AMD_IP_COMPUTE/*keep first*/, AMD_IP_GFX};
+ enum amd_ip_block_type ip_tests[] = {AMD_IP_COMPUTE/*keep first*/, AMD_IP_GFX, AMD_IP_DMA};
enum amd_ip_block_type ip_background = AMD_IP_COMPUTE;
+ struct drm_amdgpu_info_hw_ip info[ARRAY_SIZE(ip_tests)] = {0};
amdgpu_context_handle *arr_context_handle = NULL;
@@ -1059,10 +1076,10 @@ igt_main
struct dynamic_test arr_err[] = {
{CMD_STREAM_EXEC_INVALID_PACKET_LENGTH, "CMD_STREAM_EXEC_INVALID_PACKET_LENGTH",
"Stressful-and-multiple-cs-of-bad and good length-operations-using-multiple-processes",
- { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } } },
+ { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } }, true },
{CMD_STREAM_EXEC_INVALID_OPCODE, "CMD_STREAM_EXEC_INVALID_OPCODE",
"Stressful-and-multiple-cs-of-bad and good opcode-operations-using-multiple-processes",
- { {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_UNKNOWN, -1, -1 } } },
+ { {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_UNKNOWN, -1, -1 } }, true },
//TODO not job timeout, debug why for n31.
//{CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC,"CMD_STREAM_TRANS_BAD_MEM_ADDRESS_BY_SYNC",
// "Stressful-and-multiple-cs-of-bad and good mem-sync-operations-using-multiple-processes"},
@@ -1071,16 +1088,16 @@ igt_main
// "Stressful-and-multiple-cs-of-bad and good reg-operations-using-multiple-processes"},
{BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR, "BACKEND_SE_GC_SHADER_INVALID_PROGRAM_ADDR",
"Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes",
- { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } } },
+ { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } }, false },
//TODO KGQ cannot recover by queue reset, it maybe need a fw bugfix on naiv31
//{BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING,"BACKEND_SE_GC_SHADER_INVALID_PROGRAM_SETTING",
// "Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes"},
{BACKEND_SE_GC_SHADER_INVALID_USER_DATA, "BACKEND_SE_GC_SHADER_INVALID_USER_DATA",
"Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes",
- { {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } } },
+ { {FAMILY_UNKNOWN, -1, -1 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } }, false },
{BACKEND_SE_GC_SHADER_INVALID_SHADER, "BACKEND_SE_GC_SHADER_INVALID_SHADER",
"Stressful-and-multiple-cs-of-bad and good shader-operations-using-multiple-processes",
- { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } } },
+ { {FAMILY_UNKNOWN, 0x1, 0x10 }, {FAMILY_AI, 0x32, 0x3C }, {FAMILY_AI, 0x3C, 0xFF } }, false },
{}
};
@@ -1098,8 +1115,7 @@ igt_main
if (is_run_subtest_parameter_found(argc, argv))
const_num_of_tests = 1;
else
- const_num_of_tests = (sizeof(arr_err)/sizeof(struct dynamic_test) - 1) * ARRAY_SIZE(ip_tests);
-
+ const_num_of_tests = get_num_of_tests(&arr_err[0], &ip_tests[0], ARRAY_SIZE(ip_tests));
fd = drm_open_driver(DRIVER_AMDGPU);
err = amdgpu_device_initialize(fd, &major, &minor, &device);
@@ -1139,16 +1155,21 @@ igt_main
process, sh_mem, const_num_of_tests, info[0].hw_ip_version_major,
&monitor_child, &test_child);
}
+
for (int i = 0; i < ARRAY_SIZE(ip_tests); i++) {
reset_rings_numbers(&ring_id_good, &ring_id_bad, &ring_id_job_good, &ring_id_job_bad);
for (struct dynamic_test *it = &arr_err[0]; it->name; it++) {
+ if(ip_tests[i] == AMD_IP_DMA && (!it->support_sdma))
+ continue;
igt_describe("Stressful-and-multiple-cs-of-bad-and-good-length-operations-using-multiple-processes");
- igt_subtest_with_dynamic_f("amdgpu-%s-%s", ip_tests[i] == AMD_IP_COMPUTE ? "COMPUTE":"GFX", it->name) {
+ igt_subtest_with_dynamic_f("amdgpu-%s-%s", ip_tests[i] == AMD_IP_COMPUTE ? "COMPUTE":
+ ip_tests[i] == AMD_IP_GFX ? "GFX":"SDMA", it->name) {
if (arr_cap[ip_tests[i]] && is_sub_test_queue_reset_enable(&gpu_info, it->exclude_filter, it) &&
get_next_rings(&ring_id_good, &ring_id_bad, info[0].available_rings,
info[i].available_rings, ip_background != ip_tests[i], &ring_id_job_good, &ring_id_job_bad)) {
igt_dynamic_f("amdgpu-%s-ring-good-%d-bad-%d-%s", it->name, ring_id_job_good, ring_id_job_bad,
- ip_tests[i] == AMD_IP_COMPUTE ? "COMPUTE":"GFX")
+ ip_tests[i] == AMD_IP_COMPUTE ? "COMPUTE":
+ ip_tests[i] == AMD_IP_GFX? "GFX":"SDMA")
set_next_test_to_run(sh_mem, it->test, ip_background, ip_tests[i], ring_id_job_good, ring_id_job_bad);
} else {
set_next_test_to_skip(sh_mem);
--
2.25.1
More information about the igt-dev
mailing list