[PATCH i-g-t] tests/amdgpu: add amd dispatch subtest

Jesse Zhang jesse.zhang at amd.com
Tue Jul 2 10:58:44 UTC 2024


Add more cases to trigger gpu reset.
1. Using invalid user data to trigger a gpu reset.
2. Use invalid shadow program address to trigger gpu reset.
3. Use invalid shader settings to trigger a gpu reset.

Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
---
 lib/amdgpu/amd_dispatch.c         | 35 +++++++++++++++++----------
 lib/amdgpu/amd_dispatch.h         |  5 ++--
 lib/amdgpu/amd_dispatch_helpers.c | 39 ++++++++++++++++++++++++-------
 lib/amdgpu/amd_dispatch_helpers.h |  9 ++++++-
 tests/amdgpu/amd_dispatch.c       | 31 +++++++++++++++++++++---
 5 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
index df7d56ea7..47d32dd40 100644
--- a/lib/amdgpu/amd_dispatch.c
+++ b/lib/amdgpu/amd_dispatch.c
@@ -70,7 +70,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
 	amdgpu_dispatch_write_cumask(base_cmd, version);
 
 	/* Writes shader state to HW */
-	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
 
 	/* Write constant data */
 	/* Writes the UAV constant data to the SGPRs. */
@@ -162,7 +162,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
 void
 amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 			    uint32_t ip_type, uint32_t ring, uint32_t version,
-			    int hang)
+			    enum shader_error_type hang)
 {
 	amdgpu_context_handle context_handle;
 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
@@ -202,7 +202,7 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	igt_assert_eq(r, 0);
 	memset(ptr_shader, 0, bo_shader_size);
 
-	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
+	cs_type = hang == SHADER_INVALID_SHADER ? CS_HANG : CS_BUFFERCOPY;
 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
 	igt_assert_eq(r, 0);
 
@@ -217,22 +217,28 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 					&bo_dst, (void **)&ptr_dst,
 					&mc_address_dst, &va_dst);
 	igt_assert_eq(r, 0);
-
 	///TODO helper function for this bloc
 	amdgpu_dispatch_init(ip_type, base_cmd,  version);
 	/*  Issue commands to set cu mask used in current dispatch */
 	amdgpu_dispatch_write_cumask(base_cmd, version);
+
+	if (hang == SHADER_INVALID_PROGRAM_ADDR)
+		mc_address_shader = 0;
 	/* Writes shader state to HW */
-	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, hang);
 	memset(ptr_src, 0x55, bo_dst_size);
 
 	/* Write constant data */
 	/* Writes the texture resource constants data to the SGPRs */
 	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
 	base_cmd->emit(base_cmd, 0x240);
-	base_cmd->emit(base_cmd, mc_address_src);
-
-	base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+	if (hang == SHADER_INVALID_USER_DATA) {
+		base_cmd->emit(base_cmd, mc_address_src);
+		base_cmd->emit(base_cmd, 0);
+	} else {
+		base_cmd->emit(base_cmd, mc_address_src);
+		base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+	}
 
 	base_cmd->emit(base_cmd, 0x400);
 	if (version == 9)
@@ -247,8 +253,13 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	/* Writes the UAV constant data to the SGPRs. */
 	base_cmd->emit(base_cmd, PACKET3_COMPUTE(PKT3_SET_SH_REG, 4));
 	base_cmd->emit(base_cmd, 0x244);
-	base_cmd->emit(base_cmd, mc_address_dst);
-	base_cmd->emit(base_cmd, (mc_address_dst >> 32) | 0x100000);
+	if (hang == SHADER_INVALID_USER_DATA) {
+		base_cmd->emit(base_cmd, mc_address_src);
+		base_cmd->emit(base_cmd, 0);
+	} else {
+		base_cmd->emit(base_cmd, mc_address_src);
+		base_cmd->emit(base_cmd, (mc_address_src >> 32) | 0x100000);
+	}
 	base_cmd->emit(base_cmd, 0x400);
 	if (version == 9)
 		base_cmd->emit(base_cmd, 0x74fac);
@@ -401,7 +412,7 @@ amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
 	amdgpu_dispatch_write_cumask(base_cmd, version);
 
 	/* Writes shader state to HW */
-	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version);
+	amdgpu_dispatch_write2hw(base_cmd, mc_address_shader, version, 0);
 
 	/* Write constant data */
 	/* Writes the texture resource constants data to the SGPRs */
@@ -536,7 +547,7 @@ amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
 	}
 }
 
-void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, int hang)
+void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, enum shader_error_type hang)
 {
 	int r;
 	struct drm_amdgpu_info_hw_ip info;
diff --git a/lib/amdgpu/amd_dispatch.h b/lib/amdgpu/amd_dispatch.h
index 4df8b1355..5f05ee693 100644
--- a/lib/amdgpu/amd_dispatch.h
+++ b/lib/amdgpu/amd_dispatch.h
@@ -25,15 +25,16 @@
 #define AMD_DISPATCH_H
 
 #include <amdgpu.h>
+#include "amd_dispatch_helpers.h"
 
 void amdgpu_gfx_dispatch_test(amdgpu_device_handle device_handle,
-			      uint32_t ip_type, int hang);
+			      uint32_t ip_type, enum shader_error_type hang);
 
 void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 					uint32_t ip_type,
 					uint32_t ring,
 					uint32_t version,
-					int hang);
+					enum shader_error_type hang);
 
 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,
 				      uint32_t ip_type);
diff --git a/lib/amdgpu/amd_dispatch_helpers.c b/lib/amdgpu/amd_dispatch_helpers.c
index b0a5f550e..e255fb2ab 100644
--- a/lib/amdgpu/amd_dispatch_helpers.c
+++ b/lib/amdgpu/amd_dispatch_helpers.c
@@ -114,7 +114,7 @@ int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base * base, uint32_t version
  }
 
 
-int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version)
+int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr, uint32_t version, enum shader_error_type hang)
 {
 	static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
 		{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	0x000C0041 },
@@ -123,6 +123,7 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
 		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y,	0x00000001 },
 		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z,	0x00000001 }
 	};
+
 	static uint32_t bufferclear_cs_shader_registers_gfx11[][2] = {
 		{0x2e12, 0x600C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x600C0041 },
 		{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
@@ -131,6 +132,14 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
 		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
 	};
 
+	static uint32_t bufferclear_cs_shader_invalid_registers[][2] = {
+		{0x2e12, 0xffffffff},	//{ mmCOMPUTE_PGM_RSRC1,	  0x600C0041 },
+		{0x2e13, 0xffffffff},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
+		{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
+		{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
+		{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
+	};
+
 	static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx9);
 	static const uint32_t bufferclear_cs_shader_registers_num_gfx11 = ARRAY_SIZE(bufferclear_cs_shader_registers_gfx11);
 	int offset_prev = base->cdw;
@@ -146,19 +155,33 @@ int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base * base, uint64_t shader_addr
 	if ((version == 11) || (version == 12)) {
 		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx11; j++) {
 			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
-			/* - Gfx11ShRegBase */
-			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
-			if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
-				bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
+			if (hang == SHADER_INVALID_PROGRAM_SETTING) {
+				/* - Gfx11ShRegBase */
+				base->emit(base,bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
+				if (bufferclear_cs_shader_invalid_registers[j][0] ==0x2E12)
+					bufferclear_cs_shader_invalid_registers[j][1] &= ~(1<<29);
+
+				base->emit(base,bufferclear_cs_shader_invalid_registers[j][1]);
+			} else {
+				/* - Gfx11ShRegBase */
+				base->emit(base,bufferclear_cs_shader_registers_gfx11[j][0] - 0x2c00);
+				if (bufferclear_cs_shader_registers_gfx11[j][0] ==0x2E12)
+					bufferclear_cs_shader_registers_gfx11[j][1] &= ~(1<<29);
 
-			base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
+				base->emit(base,bufferclear_cs_shader_registers_gfx11[j][1]);
+			}
 		}
 	} else {
 		for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
 			base->emit(base, PACKET3_COMPUTE(PKT3_SET_SH_REG, 1));
 			/* - Gfx9ShRegBase */
-			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
-			base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+			if (hang == SHADER_INVALID_PROGRAM_SETTING) {
+				base->emit(base, bufferclear_cs_shader_invalid_registers[j][0] - 0x2c00);
+				base->emit(base, bufferclear_cs_shader_invalid_registers[j][1]);
+			} else {
+				base->emit(base,bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00);
+				base->emit(base,bufferclear_cs_shader_registers_gfx9[j][1]);
+			}
 		}
 	}
 	if (version == 10) {
diff --git a/lib/amdgpu/amd_dispatch_helpers.h b/lib/amdgpu/amd_dispatch_helpers.h
index a129e8e07..e01afb6fb 100644
--- a/lib/amdgpu/amd_dispatch_helpers.h
+++ b/lib/amdgpu/amd_dispatch_helpers.h
@@ -25,6 +25,13 @@
 #define AMD_DISPATCH_HELPERS_H
 
 #include <amdgpu.h>
+enum  shader_error_type {
+	SHADER_EXECSUCESS,
+	SHADER_INVALID_SHADER,
+	SHADER_INVALID_PROGRAM_ADDR,    /* COMPUTE_PGM */
+	SHADER_INVALID_PROGRAM_SETTING, /* COMPUTE_PGM_RSRC */
+	SHADER_INVALID_USER_DATA /* COMPUTE_USER_DATA */
+};
 
 struct amdgpu_cmd_base;
 
@@ -32,6 +39,6 @@ int amdgpu_dispatch_init( uint32_t ip_type,struct amdgpu_cmd_base *base_cmd, uin
 
 int amdgpu_dispatch_write_cumask(struct amdgpu_cmd_base *base_cmd, uint32_t version);
 
-int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version);
+int amdgpu_dispatch_write2hw(struct amdgpu_cmd_base *base_cmd, uint64_t shader_addr, uint32_t version, enum shader_error_type);
 
 #endif
diff --git a/tests/amdgpu/amd_dispatch.c b/tests/amdgpu/amd_dispatch.c
index 323284306..26e11cc84 100644
--- a/tests/amdgpu/amd_dispatch.c
+++ b/tests/amdgpu/amd_dispatch.c
@@ -10,6 +10,7 @@
 #include <sys/sysmacros.h>
 #include "lib/amdgpu/amd_memory.h"
 #include "lib/amdgpu/amd_command_submission.h"
+#include "lib/amdgpu/amd_dispatch_helpers.h"
 #include "lib/amdgpu/amd_dispatch.h"
 
 static void
@@ -31,9 +32,9 @@ amdgpu_dispatch_hang_gfx(amdgpu_device_handle device_handle)
 }
 
 static void
-amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle)
+amdgpu_dispatch_hang_compute(amdgpu_device_handle device_handle, enum shader_error_type error)
 {
-	amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, 1);
+	amdgpu_gfx_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, error);
 }
 
 static void
@@ -125,7 +126,31 @@ igt_main
 	igt_subtest_with_dynamic("amdgpu-dispatch-hang-test-compute-with-IP-COMPUTE") {
 		if (arr_cap[AMD_IP_COMPUTE]) {
 			igt_dynamic_f("amdgpu-dispatch-hang-test-compute")
-			amdgpu_dispatch_hang_compute(device);
+			amdgpu_dispatch_hang_compute(device, SHADER_INVALID_SHADER);
+		}
+	}
+
+	igt_describe("Test GPU reset using a invalid shader program address to hang the job on compute ring");
+	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-program-addr-test-compute-with-IP-COMPUTE") {
+		if (arr_cap[AMD_IP_COMPUTE]) {
+			igt_dynamic_f("amdgpu-dispatch-invalid-program-addr-test-compute")
+			amdgpu_dispatch_hang_compute(device, SHADER_INVALID_PROGRAM_ADDR);
+		}
+	}
+
+	igt_describe("Test GPU reset using a invalid shader program setting to hang the job on compute ring");
+	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-setting-test-compute-with-IP-COMPUTE") {
+		if (arr_cap[AMD_IP_COMPUTE]) {
+			igt_dynamic_f("amdgpu-dispatch-invalid-setting-test-compute")
+			amdgpu_dispatch_hang_compute(device, SHADER_INVALID_PROGRAM_SETTING);
+		}
+	}
+
+	igt_describe("Test GPU reset using a invalid shader user data to hang the job on compute ring");
+	igt_subtest_with_dynamic("amdgpu-dispatch-invalid-user-data-test-compute-with-IP-COMPUTE") {
+		if (arr_cap[AMD_IP_COMPUTE]) {
+			igt_dynamic_f("amdgpu-dispatch-invalid-user-data-test-compute")
+			amdgpu_dispatch_hang_compute(device, SHADER_INVALID_USER_DATA);
 		}
 	}
 
-- 
2.25.1



More information about the igt-dev mailing list