[PATCH libdrm 1/4] tests/amdgpu: add dispatch hang test
Flora Cui
flora.cui at amd.com
Wed Nov 13 04:58:09 UTC 2019
add compute/gfx dispatch hang test for gfx9
Signed-off-by: Flora Cui <flora.cui at amd.com>
---
tests/amdgpu/amdgpu_test.c | 12 +++++++
tests/amdgpu/amdgpu_test.h | 1 +
tests/amdgpu/basic_tests.c | 67 ++++++++++++++++++++++++++++-------
tests/amdgpu/deadlock_tests.c | 14 ++++++++
4 files changed, 81 insertions(+), 13 deletions(-)
diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index 94bc3056..3ac9d8d2 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -460,6 +460,18 @@ static void amdgpu_disable_suites()
"illegal mem access test (set amdgpu.vm_fault_stop=2)", CU_FALSE))
fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+ /* This test was ran on GFX9 only */
+ //if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
+ if (amdgpu_set_test_active(DEADLOCK_TESTS_STR,
+ "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE))
+ fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
+ /* This test was ran on GFX9 only */
+ //if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
+ if (amdgpu_set_test_active(DEADLOCK_TESTS_STR,
+ "compute ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE))
+ fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE))
fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
index 0cb6ee98..2b01bf41 100644
--- a/tests/amdgpu/amdgpu_test.h
+++ b/tests/amdgpu/amdgpu_test.h
@@ -241,6 +241,7 @@ CU_BOOL suite_syncobj_timeline_tests_enable(void);
*/
extern CU_TestInfo syncobj_timeline_tests[];
+void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type);
/**
* Helper functions
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index a57dcbb4..71c9220d 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -311,7 +311,8 @@ static uint32_t shader_bin[] = {
enum cs_type {
CS_BUFFERCLEAR,
- CS_BUFFERCOPY
+ CS_BUFFERCOPY,
+ CS_HANG
};
static const uint32_t bufferclear_cs_shader_gfx9[] = {
@@ -473,6 +474,14 @@ static const uint32_t cached_cmd_gfx9[] = {
0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
};
+unsigned int memcpy_ps_hang[] = {
+ 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
+ 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
+ 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
+ 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
+ 0xF800180F, 0x03020100, 0xBF810000
+};
+
int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
unsigned alignment, unsigned heap, uint64_t alloc_flags,
uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
@@ -2189,6 +2198,10 @@ static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
shader = buffercopy_cs_shader_gfx9;
shader_size = sizeof(buffercopy_cs_shader_gfx9);
break;
+ case CS_HANG:
+ shader = memcpy_ps_hang;
+ shader_size = sizeof(memcpy_ps_hang);
+ break;
default:
return -1;
break;
@@ -2409,7 +2422,8 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
uint32_t ip_type,
- uint32_t ring)
+ uint32_t ring,
+ int hang)
{
amdgpu_context_handle context_handle;
amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
@@ -2425,7 +2439,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
int bo_cmd_size = 4096;
struct amdgpu_cs_request ibs_request = {0};
struct amdgpu_cs_ib_info ib_info= {0};
- uint32_t expired;
+ uint32_t expired, hang_state, hangs;
+ enum cs_type cs_type;
amdgpu_bo_list_handle bo_list;
struct amdgpu_cs_fence fence_status = {0};
@@ -2446,7 +2461,8 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
CU_ASSERT_EQUAL(r, 0);
memset(ptr_shader, 0, bo_shader_size);
- r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
+ cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
+ r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
CU_ASSERT_EQUAL(r, 0);
r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
@@ -2532,14 +2548,21 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
r = amdgpu_cs_query_fence_status(&fence_status,
AMDGPU_TIMEOUT_INFINITE,
0, &expired);
- CU_ASSERT_EQUAL(r, 0);
- CU_ASSERT_EQUAL(expired, true);
- /* verify if memcpy test result meets with expected */
- i = 0;
- while(i < bo_dst_size) {
- CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
- i++;
+ if (!hang) {
+ CU_ASSERT_EQUAL(r, 0);
+ CU_ASSERT_EQUAL(expired, true);
+
+ /* verify if memcpy test result meets with expected */
+ i = 0;
+ while(i < bo_dst_size) {
+ CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
+ i++;
+ }
+ } else {
+ r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
+ CU_ASSERT_EQUAL(r, 0);
+ CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
}
r = amdgpu_bo_list_destroy(bo_list);
@@ -2573,7 +2596,7 @@ static void amdgpu_compute_dispatch_test(void)
for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
- amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
+ amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
}
}
@@ -2590,7 +2613,25 @@ static void amdgpu_gfx_dispatch_test(void)
for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
- amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
+ amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
+ }
+}
+
+void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
+{
+ int r;
+ struct drm_amdgpu_info_hw_ip info;
+ uint32_t ring_id;
+
+ r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
+ CU_ASSERT_EQUAL(r, 0);
+ if (!info.available_rings)
+ printf("SKIP ... as there's no ring for ip %d\n", ip_type);
+
+ for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
+ amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
+ amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
+ amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
}
}
diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
index 7d028829..61342d1a 100644
--- a/tests/amdgpu/deadlock_tests.c
+++ b/tests/amdgpu/deadlock_tests.c
@@ -114,6 +114,8 @@ static void amdgpu_deadlock_compute(void);
static void amdgpu_illegal_reg_access();
static void amdgpu_illegal_mem_access();
static void amdgpu_deadlock_sdma(void);
+static void amdgpu_dispatch_hang_gfx(void);
+static void amdgpu_dispatch_hang_compute(void);
CU_BOOL suite_deadlock_tests_enable(void)
{
@@ -188,6 +190,8 @@ CU_TestInfo deadlock_tests[] = {
{ "sdma ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_sdma },
{ "illegal reg access test", amdgpu_illegal_reg_access },
{ "illegal mem access test (set amdgpu.vm_fault_stop=2)", amdgpu_illegal_mem_access },
+ { "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_gfx },
+ { "compute ring bad dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_compute },
CU_TEST_INFO_NULL,
};
@@ -488,3 +492,13 @@ static void amdgpu_illegal_mem_access()
{
bad_access_helper(0);
}
+
+static void amdgpu_dispatch_hang_gfx(void)
+{
+ amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX);
+}
+
+static void amdgpu_dispatch_hang_compute(void)
+{
+ amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
+}
--
2.17.1
More information about the dri-devel
mailing list