[igt-dev] [PATCH 2/7] lib/amdgpu: added binary shaders with disassembled annotations

vitaly.prosyak at amd.com vitaly.prosyak at amd.com
Thu Aug 11 00:29:15 UTC 2022


From: Vitaly Prosyak <vitaly.prosyak at amd.com>

 UMR was used to dissemble binary shaders.
 Add shader type declarations enum cs_type
 and enum ps_type.

Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
---
 lib/amdgpu/amd_shaders.c         | 338 ++++++++++++++++++++++++++++++-
 lib/amdgpu/amd_shaders.h         |  14 +-
 lib/amdgpu/amd_shared_dispatch.h |  42 ++++
 3 files changed, 387 insertions(+), 7 deletions(-)
 create mode 100644 lib/amdgpu/amd_shared_dispatch.h

diff --git a/lib/amdgpu/amd_shaders.c b/lib/amdgpu/amd_shaders.c
index 31722744f..5505e504e 100644
--- a/lib/amdgpu/amd_shaders.c
+++ b/lib/amdgpu/amd_shaders.c
@@ -22,9 +22,9 @@
  *
  *
  */
-
+#include <amdgpu.h>
 #include "amd_shaders.h"
-
+#include <amdgpu_drm.h>
 
 #define CODE_OFFSET 512
 #define DATA_OFFSET 1024
@@ -33,19 +33,345 @@
 		      ((num & 0x0000ff00) << 8) | \
 		      ((num & 0x00ff0000) >> 8) | \
 		      ((num & 0x000000ff) << 24))
-
-
-static  uint32_t shader_bin[] = {
+/**
+ *
+ *s_mov_b32 s2, 0
+ * s_cmp_gt_u32 s2, 0x98967f
+ * ;;
+ * s_cbranch_scc1 4
+ * s_add_i32 s2, s2, 1
+ * s_cmp_gt_u32 s2, 0x98967f
+ * ;;
+ * s_cbranch_scc0 65532
+ * s_mov_b32 s3, 0xf000
+ * ;;
+ * s_mov_b32 s2, -1
+ * v_mov_b32_e32 v0, 42
+ * buffer_store_dword v0, off, s[0:3], 0
+ * ;;
+ * s_endpgm
+ */
+static  const
+uint32_t shader_bin[] = {
 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
 };
 
-const uint32_t * get_shader_bin(uint32_t *size_bytes, uint32_t *code_offset, uint32_t *data_offset)
+const uint32_t *
+get_shader_bin(uint32_t *size_bytes, uint32_t *code_offset, uint32_t *data_offset)
 {
 	*size_bytes = sizeof(shader_bin);
 	*code_offset =  CODE_OFFSET;
 	*data_offset = DATA_OFFSET;
 	return shader_bin;
 }
+
+struct amdgpu_test_shader {
+	uint32_t *shader;
+	uint32_t header_length;
+	uint32_t body_length;
+	uint32_t foot_length;
+};
+
+int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, uint32_t family_id)
+{
+	/**
+	 * v_sub_f32_e32 v0, s8, v134
+	 * buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0)
+	 * buffer_store_format_xyzw v[1:4], v0, s[4:7], 0 idxen
+	 * ;;
+	 * s_endpgm
+	 */
+	unsigned int memcpy_cs_hang_slow_ai_codes[] = {
+	    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
+	    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
+	};
+
+	struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
+		memcpy_cs_hang_slow_ai_codes,
+		4,
+		3,
+		1
+	};
+	/**
+	 * s_lshl_b32 s0, s12, 6
+	 * v_add_u32_e32 v0, vcc, s0, v0
+	 * buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0)
+	 * buffer_store_format_xyzw v[1:4], v0, s[8:11], 0 idxen
+	 * ;;
+	 * s_endpgm
+	 */
+	unsigned int memcpy_cs_hang_slow_rv_codes[] = {
+	    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
+	    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
+	};
+
+	struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
+		memcpy_cs_hang_slow_rv_codes,
+		4,
+		3,
+		1
+	};
+	/**
+	 * v_interp_mov_f32_e32 v209, p10, attr0.x
+	 * v_sub_f32_e32 v0, s8, v134
+	 * buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0)
+	 * buffer_store_format_xyzw v[1:4], v0, s[4:7], 0 idxen
+	 * ;;
+	 * s_endpgm
+	 */
+	unsigned int memcpy_cs_hang_slow_nv_codes[] = {
+	    0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
+	    0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
+	};
+
+	struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
+	        memcpy_cs_hang_slow_nv_codes,
+	        4,
+	        3,
+	        1
+	};
+	struct amdgpu_test_shader *shader;
+	int i, loop = 0x10000;
+
+	switch (family_id) {
+		case AMDGPU_FAMILY_AI:
+			shader = &memcpy_cs_hang_slow_ai;
+			break;
+		case AMDGPU_FAMILY_RV:
+			shader = &memcpy_cs_hang_slow_rv;
+			break;
+		case AMDGPU_FAMILY_NV:
+			shader = &memcpy_cs_hang_slow_nv;
+			break;
+		default:
+			return -1;
+	}
+
+	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
+
+	for (i = 0; i < loop; i++)
+		memcpy(ptr + shader->header_length + shader->body_length * i,
+			shader->shader + shader->header_length,
+			shader->body_length * sizeof(uint32_t));
+
+	memcpy(ptr + shader->header_length + shader->body_length * loop,
+		shader->shader + shader->header_length + shader->body_length,
+		shader->foot_length * sizeof(uint32_t));
+
+	return 0;
+}
+
+int  amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version)
+{
+	/**
+	 * v_and_b32_e32 v0, 0x3ff, v0
+	 * ;;
+	 * ...
+	 * v_sub_f32_e32 v0, s8, v134
+	 * v_mov_b32_e32 v1, 0
+	 * v_mov_b32_e32 v2, s4
+	 * mov_b32_e32 v3, s5
+	 * v_mov_b32_e32 v4, s6
+	 * v_mov_b32_e32 v5, s7
+	 * buffer_store_format_xyzw v[2:5], v0, s[0:3], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+	 * s_endpgm
+	 */
+	static const uint32_t bufferclear_cs_shader_gfx9[] = {
+	    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
+	    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
+	    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
+	    0xbf810000
+	};
+
+	/**
+	 *
+	 * v_and_b32_e32 v0, 0x3ff, v0
+	 * ;;
+	 * ...
+	 * v_sub_f32_e32 v0, s8, v134
+	 * v_mov_b32_e32 v1, 0
+	 * buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0)
+	 * buffer_store_format_xyzw v[2:5], v0, s[4:7], 0 idxen
+	 * ;;
+	 * s_endpgm
+	 */
+	static const uint32_t buffercopy_cs_shader_gfx9[] = {
+	    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
+	    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
+	    0xe01c2000, 0x80010200, 0xbf810000
+	};
+
+	/**
+	 * ...
+	 * s_bcnt0_i32_b32 exec_lo, exec_lo
+	 * ...
+	 * ...
+	 * s_dcache_inv
+	 * ;;
+	 * ...
+	 * ...
+	 * ...
+	 * s_waitcnt lgkmcnt(0)
+	 * image_sample v[0:3], v2, s[4:11], s[0:3] dmask:0xf
+	 * ;;
+	 * s_not_b32 exec_lo, s12
+	 * s_waitcnt vmcnt(0)
+	 * s_nop 0
+	 * s_nop 0
+	 * ...
+	 * v_add_f32_e32 v129, v0, v0
+	 *s_endpgm
+	 */
+	static const uint32_t memcpy_ps_hang[] = {
+			0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
+			0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
+			0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
+			0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
+			0xF800180F, 0x03020100, 0xBF810000
+	};
+
+	/**
+	 * v_interp_mov_f32_e32 v209, invalid_param_4, attr0.x
+	 * v_sub_f32_e32 v0, s8, v134
+	 * v_mov_b32_e32 v0, s4
+	 * v_mov_b32_e32 v1, s5
+	 * v_mov_b32_e32 v2, s6
+	 * v_mov_b32_e32 v3, s7
+	 * buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
+	 * ;;
+	  *s_endpgm
+	  */
+	static const uint32_t bufferclear_cs_shader_gfx10[] = {
+		0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
+		0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
+		0xBF810000
+	};
+
+	/**
+	 * v_interp_mov_f32_e32 v209, p20, attr0.x
+	 * v_sub_f32_e32 v0, s8, v134
+	 * buffer_load_format_xyzw v[2:5], v1, s[0:3], 0 idxen
+	 * ;;
+	 * s_waitcnt vmcnt(0)
+	 * buffer_store_format_xyzw v[2:5], v1, s[4:7], 0 idxen
+	 * ;;
+	 * s_endpgm
+	 */
+	static const uint32_t buffercopy_cs_shader_gfx10[] = {
+		0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
+		0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
+	};
+
+	uint32_t shader_size;
+	const uint32_t *shader;
+
+	switch (cs_type) {
+		case CS_BUFFERCLEAR:
+			if (version == 9) {
+				shader = bufferclear_cs_shader_gfx9;
+				shader_size = sizeof(bufferclear_cs_shader_gfx9);
+			} else if (version == 10) {
+				shader = bufferclear_cs_shader_gfx10;
+				shader_size = sizeof(bufferclear_cs_shader_gfx10);
+			}
+			break;
+		case CS_BUFFERCOPY:
+			if (version == 9) {
+				shader = buffercopy_cs_shader_gfx9;
+				shader_size = sizeof(buffercopy_cs_shader_gfx9);
+			} else if (version == 10) {
+				shader = buffercopy_cs_shader_gfx10;
+				shader_size = sizeof(buffercopy_cs_shader_gfx10);
+			}
+			break;
+		case CS_HANG:
+			shader = memcpy_ps_hang;
+			shader_size = sizeof(memcpy_ps_hang);
+			break;
+		default:
+			return -1;
+	}
+
+	memcpy(ptr, shader, shader_size);
+	return 0;
+}
+
+
+int
+amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
+{
+	/**
+	 * s_mov_b32 m0, s12
+	 * s_mov_b64 s[14:15], exec
+	 * s_wqm_b64 exec, exec
+	 * v_interp_p1_f32_e32 v2, v0, attr0.x
+	 * v_interp_p2_f32_e32 v2, v1, attr0.x
+	 * v_interp_p1_f32_e32 v3, v0, attr0.y
+	 * v_interp_p2_f32_e32 v3, v1, attr0.y
+	 * image_sample v[0:3], v2, s[0:7], s[8:11] dmask:0xf
+	 * ;;
+	 * s_mov_b64 exec, s[14:15]
+	 * s_waitcnt vmcnt(0)
+	 * s_nop 0
+	 * s_nop 0
+	 * s_nop 0
+	 * s_nop 0
+	 * exp mrt0 v0, v1, v2, v3 done vm
+	 * ;;
+	 * s_endpgm
+	 */
+	unsigned int memcpy_ps_hang_slow_ai_codes[] = {
+		0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
+		0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
+		0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
+		0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
+		0x03020100, 0xbf810000
+	};
+
+	struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
+		memcpy_ps_hang_slow_ai_codes,
+		7,
+		2,
+		9
+	};
+
+	struct amdgpu_test_shader *shader;
+	int i, loop = 0x40000;
+
+	switch (family) {
+		case AMDGPU_FAMILY_AI:
+		case AMDGPU_FAMILY_RV:
+		case AMDGPU_FAMILY_NV: /* TODO check for correctness */
+		shader = &memcpy_ps_hang_slow_ai;
+			break;
+		default:
+			return -1;
+			break;
+	}
+
+	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
+
+	for (i = 0; i < loop; i++)
+		memcpy(ptr + shader->header_length + shader->body_length * i,
+			shader->shader + shader->header_length,
+			shader->body_length * sizeof(uint32_t));
+
+	memcpy(ptr + shader->header_length + shader->body_length * loop,
+		shader->shader + shader->header_length + shader->body_length,
+		shader->foot_length * sizeof(uint32_t));
+
+	return 0;
+}
diff --git a/lib/amdgpu/amd_shaders.h b/lib/amdgpu/amd_shaders.h
index 1abb23ac3..22fc474fc 100644
--- a/lib/amdgpu/amd_shaders.h
+++ b/lib/amdgpu/amd_shaders.h
@@ -26,7 +26,19 @@
 #define AMD_SHADERS_H
 
 #include "drmtest.h"
+#include <amdgpu.h>
+#include "amd_shared_dispatch.h"
 
-const uint32_t * get_shader_bin(uint32_t *size_bytes, uint32_t *code_offset, uint32_t *data_offset);
+const uint32_t *
+get_shader_bin(uint32_t *size_bytes, uint32_t *code_offset, uint32_t *data_offset);
+
+int
+amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, uint32_t family_id);
+
+int
+amdgpu_dispatch_load_cs_shader(uint8_t *ptr, int cs_type, uint32_t version);
+
+int
+amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family);
 
 #endif
diff --git a/lib/amdgpu/amd_shared_dispatch.h b/lib/amdgpu/amd_shared_dispatch.h
new file mode 100644
index 000000000..0dc5792f9
--- /dev/null
+++ b/lib/amdgpu/amd_shared_dispatch.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef AMD_SHARED_DISPATCH_H
+#define AMD_SHARED_DISPATCH_H
+
+
+enum cs_type {
+	CS_BUFFERCLEAR,
+	CS_BUFFERCOPY,
+	CS_HANG,
+	CS_HANG_SLOW
+};
+
+enum ps_type {
+	PS_CONST,
+	PS_TEX,
+	PS_HANG,
+	PS_HANG_SLOW
+};
+
+#endif
-- 
2.25.1



More information about the igt-dev mailing list