[igt-dev] [PATCH 5/7] lib/amdgpu: added draw helper functions
vitaly.prosyak at amd.com
vitaly.prosyak at amd.com
Thu Aug 11 18:41:32 UTC 2022
From: Vitaly Prosyak <vitaly.prosyak at amd.com>
Use UMR to dissamble binary shaders.
Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
Acked-by: Christian König <christian.koenig at amd.com>
---
lib/amdgpu/amd_draw_helpers.c | 1498 +++++++++++++++++++++++++++++++++
lib/amdgpu/amd_draw_helpers.h | 50 ++
lib/meson.build | 3 +-
3 files changed, 1550 insertions(+), 1 deletion(-)
create mode 100644 lib/amdgpu/amd_draw_helpers.c
create mode 100644 lib/amdgpu/amd_draw_helpers.h
diff --git a/lib/amdgpu/amd_draw_helpers.c b/lib/amdgpu/amd_draw_helpers.c
new file mode 100644
index 000000000..29d247105
--- /dev/null
+++ b/lib/amdgpu/amd_draw_helpers.c
@@ -0,0 +1,1498 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ * *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include "amd_draw_helpers.h"
+#include "amd_memory.h"
+#include "amd_PM4.h"
+#include "amd_shared_dispatch.h" /*cs_type ps_type*/
+#include "amd_shaders.h"
+#include "amd_ip_blocks.h"
+
+static int
+amdgpu_draw_draw(struct amdgpu_cmd_base *base, uint32_t version)
+{
+ int i = base->cdw;
+
+ if (version == 9) {
+ /* mmIA_MULTI_VGT_PARAM */
+ base->emit(base, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ base->emit(base, 0x40000258);
+ base->emit(base, 0xd00ff);
+
+ /* mmVGT_PRIMITIVE_TYPE */
+ base->emit(base, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ base->emit(base, 0x10000242);
+ base->emit(base, 0x11);
+
+ } else if (version == 10) {
+ /* mmGE_CNTL */
+ base->emit(base, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ base->emit(base, 0x25b);
+ base->emit(base, 0xff);
+
+ /* mmVGT_PRIMITIVE_TYPE */
+ base->emit(base, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ base->emit(base, 0x242);
+ base->emit(base, 0x11);
+
+ }
+ base->emit(base, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+ base->emit(base, 3);
+ base->emit(base, 2);
+
+ return base->cdw - i;
+}
+
+static int
+amdgpu_draw_ps_write2hw(struct amdgpu_cmd_base *base,
+ int ps_type,
+ uint64_t shader_addr,
+ uint32_t version)
+{
+ static const uint32_t ps_num_context_registers_gfx9 = 7;
+
+ static const uint32_t ps_const_context_reg_gfx9[][2] = {
+ {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
+ {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
+ {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
+ {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
+ {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
+ {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
+ {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
+ };
+
+ static const uint32_t ps_num_sh_registers_gfx9 = 2;
+
+ static const uint32_t ps_const_sh_registers_gfx9[][2] = {
+ {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
+ {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
+ };
+ static const uint32_t ps_num_sh_registers_gfx10 = 2;
+
+ static const uint32_t ps_const_sh_registers_gfx10[][2] = {
+ {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
+ {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
+ };
+ static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
+ {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
+ {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
+ };
+
+ static const uint32_t ps_tex_context_reg_gfx9[][2] = {
+ {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
+ {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
+ {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
+ {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
+ {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
+ {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
+ {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
+ };
+
+
+ int j;
+ const uint32_t *sh_registers;
+ const uint32_t *context_registers;
+ uint32_t num_sh_reg, num_context_reg;
+ int i = base->cdw;
+
+ if (ps_type == PS_CONST) {
+ if (version == 9) {
+ sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
+ num_sh_reg = ps_num_sh_registers_gfx9;
+ } else if (version == 10) {
+ sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10;
+ num_sh_reg = ps_num_sh_registers_gfx10;
+ }
+ context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
+ num_context_reg = ps_num_context_registers_gfx9;
+ } else if (ps_type == PS_TEX) {
+ sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
+ context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
+ num_sh_reg = ps_num_sh_registers_gfx9;
+ num_context_reg = ps_num_context_registers_gfx9;
+ }
+
+ i = 0;
+
+ if (version == 9) {
+ /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
+ 0x2c08 SPI_SHADER_PGM_LO_PS
+ 0x2c09 SPI_SHADER_PGM_HI_PS */
+ /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */
+ shader_addr += 256 * 9;
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 3));
+ base->emit(base, 0x7);
+ base->emit(base, 0xffff);
+ base->emit(base, shader_addr >> 8);
+ base->emit(base, shader_addr >> 40);
+ } else if (version == 10) {
+ shader_addr += 256 * 9;
+ /* 0x2c08 SPI_SHADER_PGM_LO_PS
+ 0x2c09 SPI_SHADER_PGM_HI_PS */
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, 0x8);
+ base->emit(base, shader_addr >> 8);
+ base->emit(base, shader_addr >> 40);
+
+ /* mmSPI_SHADER_PGM_RSRC3_PS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG_INDEX, 1));
+ base->emit(base, 0x30000007);
+ base->emit(base, 0xffff);
+ /* mmSPI_SHADER_PGM_RSRC4_PS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG_INDEX, 1));
+ base->emit(base, 0x30000001);
+ base->emit(base, 0xffff);
+ }
+
+ for (j = 0; j < num_sh_reg; j++) {
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, sh_registers[j * 2] - 0x2c00);
+ base->emit(base, sh_registers[j * 2 + 1]);
+ }
+
+ for (j = 0; j < num_context_reg; j++) {
+ if (context_registers[j * 2] != 0xA1C5) {
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, context_registers[j * 2] - 0xa000);
+ base->emit(base, context_registers[j * 2 + 1]);
+ }
+
+ if (context_registers[j * 2] == 0xA1B4) {
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1b3);
+ base->emit(base, 2);
+ }
+ }
+
+ return base->cdw - i;
+}
+
+static int amdgpu_draw_vs_RectPosTexFast_write2hw(struct amdgpu_cmd_base * base,
+ int ps_type,
+ uint64_t shader_addr,
+ uint32_t version,
+ int hang_slow)
+{
+ int i = base->cdw;
+
+ /* mmPA_CL_VS_OUT_CNTL */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x207);
+ base->emit(base, 0);
+
+ if (version == 9) {
+ /* mmSPI_SHADER_PGM_RSRC3_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, 0x46);
+ base->emit(base, 0xffff);
+ } else if (version == 10) {
+ /* mmSPI_SHADER_PGM_RSRC3_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG_INDEX, 1));
+ base->emit(base, 0x30000046);
+ base->emit(base, 0xffff);
+ /* mmSPI_SHADER_PGM_RSRC4_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG_INDEX, 1));
+ base->emit(base, 0x30000041);
+ base->emit(base, 0xffff);
+ }
+
+ /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, 0x48);
+ base->emit(base, shader_addr >> 8);
+ base->emit(base, shader_addr >> 40);
+
+ /* mmSPI_SHADER_PGM_RSRC1_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, 0x4a);
+ if (version == 9)
+ base->emit(base, 0xc0081);
+ else if (version == 10)
+ base->emit(base, 0xc0041);
+ /* mmSPI_SHADER_PGM_RSRC2_VS */
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, 0x4b);
+ base->emit(base, 0x18);
+
+ /* mmSPI_VS_OUT_CONFIG */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1b1);
+ base->emit(base, 2);
+
+ /* mmSPI_SHADER_POS_FORMAT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1c3);
+ base->emit(base, 4);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 4));
+ base->emit(base, 0x4c);
+ base->emit_repeat(base, 0, 2);
+ base->emit_repeat(base, hang_slow ? 0x45000000 : 0x42000000, 2);
+
+ base->emit(base,PACKET3(PKT3_SET_SH_REG, 4));
+ base->emit(base, 0x50);
+ base->emit_repeat(base, 0, 2);
+ if (ps_type == PS_CONST) {
+ base->emit_repeat(base, 0, 2);
+ } else if (ps_type == PS_TEX) {
+ base->emit_repeat(base, 0x3f800000, 2);
+ }
+
+ base->emit(base,PACKET3(PKT3_SET_SH_REG, 4));
+ base->emit(base,0x54);
+ base->emit_repeat(base, 0, 4);
+
+ return base->cdw - i;
+}
+
+static int
+amdgpu_draw_setup_and_write_drawblt_state(struct amdgpu_cmd_base * base,
+ uint32_t version, int hang_slow)
+{
+ /**
+ * s_load_dword s36, s[0:1], s0 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x3
+ * ;;
+ * v_cndmask_b32_e32 v0, s42, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], s10
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v16, s32, v0, vcc
+ * s_load_dword s36, s[0:1], s3 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, v255, v127, vcc
+ * s_load_dword s36, s[0:1], 0x8e
+ * ;;
+ * v_cndmask_b32_e32 v0, s15, v0, vcc
+ * v_cndmask_b32_e32 v0, s15, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], s5 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s18, v0, vcc
+ * s_load_dword s36, s[0:1], 0x10b
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s96 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x200 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v128, vcc
+ * v_cndmask_b32_e32 v102, s17, v0, vcc
+ * s_load_dword s36, s[0:1], 0x292
+ * ;;
+ * v_cndmask_b32_e32 v0, s32, v0, vcc
+ * v_subrev_f32_e32 v1, v184, v0
+ * s_load_dword s36, s[0:1], 0x2b0
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], ttmp8 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ */
+ static const uint32_t cached_cmd_gfx9[] = {
+ 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
+ 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
+ 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
+ 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
+ 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
+ 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
+ 0xc0026900, 0x292, 0x20, 0x60201b8,
+ 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
+ };
+
+ /**
+ * same as above, but not checked using memcmp
+ */
+ static const uint32_t cached_cmd_gfx10[] = {
+ 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
+ 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
+ 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
+ 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
+ 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
+ 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
+ 0xc0026900, 0x292, 0x20, 0x6020000,
+ 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
+ };
+
+ int i = base->cdw;
+ const uint32_t *cached_cmd_ptr;
+ uint32_t cached_cmd_size;
+
+ /* mmPA_SC_TILE_STEERING_OVERRIDE */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0xd7);
+ base->emit(base, 0);
+
+ base->emit(base, 0xffff1000);
+ base->emit(base, 0xc0021000);
+
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0xd7);
+ if (version == 9)
+ base->emit(base, 1);
+ else if (version == 10)
+ base->emit(base, 0);
+
+ /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 16));
+ base->emit(base, 0x2fe);
+ base->emit_repeat(base, 0,16);
+
+ /* mmPA_SC_CENTROID_PRIORITY_0 */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ base->emit(base, 0x2f5);
+ base->emit_repeat(base, 0, 2);
+
+ if (version == 9) {
+ cached_cmd_ptr = cached_cmd_gfx9;
+ cached_cmd_size = sizeof(cached_cmd_gfx9);
+ } else if (version == 10) {
+ cached_cmd_ptr = cached_cmd_gfx10;
+ cached_cmd_size = sizeof(cached_cmd_gfx10);
+ }
+
+ base->emit_buf(base, cached_cmd_ptr, 0, cached_cmd_size);
+ if (hang_slow)
+ base->emit_at_offset(base, 0x8000800, 12);
+
+ if (version == 10) {
+ /* mmCB_RMI_GL2_CACHE_CONTROL */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x104);
+ base->emit(base, 0x40aa0055);
+ /* mmDB_RMI_L2_CACHE_CONTROL */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1f);
+ base->emit(base, 0x2a0055);
+ }
+
+ return base->cdw - i;
+}
+
+static int
+amdgpu_draw_setup_and_write_drawblt_surf_info(struct amdgpu_cmd_base * base,
+ uint64_t dst_addr, int hang_slow,
+ uint32_t version)
+{
+ int i = base->cdw;
+
+ /* setup color buffer */
+ if (version == 9) {
+ /* offset reg
+ 0xA318 CB_COLOR0_BASE
+ 0xA319 CB_COLOR0_BASE_EXT
+ 0xA31A CB_COLOR0_ATTRIB2
+ 0xA31B CB_COLOR0_VIEW
+ 0xA31C CB_COLOR0_INFO
+ 0xA31D CB_COLOR0_ATTRIB
+ 0xA31E CB_COLOR0_DCC_CONTROL
+ 0xA31F CB_COLOR0_CMASK
+ 0xA320 CB_COLOR0_CMASK_BASE_EXT
+ 0xA321 CB_COLOR0_FMASK
+ 0xA322 CB_COLOR0_FMASK_BASE_EXT
+ 0xA323 CB_COLOR0_CLEAR_WORD0
+ 0xA324 CB_COLOR0_CLEAR_WORD1
+ 0xA325 CB_COLOR0_DCC_BASE
+ 0xA326 CB_COLOR0_DCC_BASE_EXT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
+ base->emit(base, 0x318);
+ base->emit(base, dst_addr >> 8);
+ base->emit(base, dst_addr >> 40);
+ base->emit(base, hang_slow ? 0x3ffc7ff : 0x7c01f);
+ base->emit(base, 0);
+ base->emit(base, 0x50438);
+ base->emit(base, 0x10140000);
+ base->emit_repeat(base, 0, 9);
+
+ /* mmCB_MRT0_EPITCH */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1e8);
+ base->emit(base, hang_slow ? 0xfff : 0x1f);
+ } else if (version == 10) {
+ /* 0xA318 CB_COLOR0_BASE
+ 0xA319 CB_COLOR0_PITCH
+ 0xA31A CB_COLOR0_SLICE
+ 0xA31B CB_COLOR0_VIEW
+ 0xA31C CB_COLOR0_INFO
+ 0xA31D CB_COLOR0_ATTRIB
+ 0xA31E CB_COLOR0_DCC_CONTROL
+ 0xA31F CB_COLOR0_CMASK
+ 0xA320 CB_COLOR0_CMASK_SLICE
+ 0xA321 CB_COLOR0_FMASK
+ 0xA322 CB_COLOR0_FMASK_SLICE
+ 0xA323 CB_COLOR0_CLEAR_WORD0
+ 0xA324 CB_COLOR0_CLEAR_WORD1
+ 0xA325 CB_COLOR0_DCC_BASE */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 14));
+ base->emit(base, 0x318);
+ base->emit(base, dst_addr >> 8);
+ base->emit_repeat(base, 0, 3);
+ base->emit(base, 0x50438);
+ base->emit_repeat(base, 0, 9);
+
+ /* 0xA390 CB_COLOR0_BASE_EXT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x390);
+ base->emit(base, dst_addr >> 40);
+
+ /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x398);
+ base->emit(base, 0);
+
+ /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x3a0);
+ base->emit(base, 0);
+
+ /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x3a8);
+ base->emit(base, 0);
+
+ /* 0xA3B0 CB_COLOR0_ATTRIB2 */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x3b0);
+ base->emit(base, hang_slow ? 0x3ffc7ff : 0x7c01f);
+
+ /* 0xA3B8 CB_COLOR0_ATTRIB3 */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x3b8);
+ base->emit(base, 0x9014000);
+ }
+
+ /* 0xA32B CB_COLOR1_BASE */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x32b);
+ base->emit(base, 0);
+
+ /* 0xA33A CB_COLOR1_BASE */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x33a);
+ base->emit(base, 0);
+
+ /* SPI_SHADER_COL_FORMAT */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base->emit(base, 0x1c5);
+ base->emit(base, 9);
+
+ /* Setup depth buffer */
+ if (version == 9) {
+ /* mmDB_Z_INFO */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ base->emit(base, 0xe);
+ base->emit_repeat(base, 0, 2);
+ } else if (version == 10) {
+ /* mmDB_Z_INFO */
+ base->emit(base, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ base->emit(base, 0x10);
+ base->emit_repeat(base, 0, 2);
+ }
+
+ return base->cdw - i;
+}
+
+ static int
+ amdgpu_draw_init(struct amdgpu_cmd_base * base, uint32_t version)
+{
+ /**
+ * s_load_dword s36, s[0:1], 0x81
+ * ;;
+ * s_add_u32 s0, s0, s0
+ * v_sub_f16_e32 v0, s0, v32
+ * s_load_dword s36, s[0:1], 0x8c
+ * ;;
+ * ...
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x90
+ * ;;
+ * s_add_u32 s0, s0, s0
+ * v_sub_f16_e32 v0, s0, v32
+ * s_load_dword s36, s[0:1], 0x94
+ * ;;
+ * s_add_u32 s0, s0, s0
+ * v_sub_f16_e32 v0, s0, v32
+ * s_load_dword s36, s[0:1], 0xb4
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_add_f16_e32 v192, s0, v0
+ * s_load_dword s36, s[0:1], s3 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s8 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s16 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s33 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x2ad
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s85 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v128, vcc
+ * s_load_dword s36, s[0:1], s92 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], 0x2de
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x2e5
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], ttmp9 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s5, v0, vcc
+ * v_add_f16_e32 v192, s0, v0
+ * v_add_f16_e32 v192, s0, v0
+ * v_add_f16_e32 v192, s0, v0
+ * v_add_f16_e32 v192, s0, v0
+ * s_load_dword s36, s[0:1], 0x311 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s3, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v8, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x316
+ * ;;
+ * v_cndmask_b32_e32 v0, s30, v0, vcc
+ * v_cndmask_b32_e32 v0, s32, v0, vcc
+ * s_load_dword s36, s[0:1], s73 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s88 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], flat_scratch_hi glc
+ * ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], ttmp6 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s5 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s25 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], xnack_mask_lo glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], 0x1e1 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x204
+ * ;;
+ * v_cndmask_b32_e32 v4, s0, v128, vcc
+ * v_cndmask_b32_e32 v0, s4, v0, vcc
+ * s_load_dwordx2 s[36:37], s[0:1], s12
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s50 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], 0x30e
+ * ;;
+ * ...
+ * ...
+ * s_load_dword s36, s[0:1], s20 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s38 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s36, s[0:1], s16 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s60, s[0:1], s1
+ * ;;
+ * s_load_dword s36, s[0:1], s1 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s1, v0, vcc
+ * s_load_dword s36, s[0:1], s24 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s2, v0, vcc
+ * s_load_dword s36, s[0:1], s6 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, v0, v1, vcc
+ * s_load_dword s100, s[0:1], s67 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s100, s[0:1], s72 glc
+ * ;;
+ * ...
+ * s_load_dword s100, s[0:1], s73 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s100, s[0:1], s74 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ * s_load_dword s100, s[0:1], s75 glc
+ * ;;
+ * v_cndmask_b32_e32 v0, s0, v0, vcc
+ */
+ static const uint32_t preamblecache_gfx9[] = {
+ 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
+ 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
+ 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
+ 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
+ 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
+ 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
+ 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
+ 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
+ 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
+ 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
+ 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
+ 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
+ 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
+ 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
+ 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
+ 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
+ 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
+ 0xc0017900, 0x24b, 0x0
+ };
+
+ /**
+ s_load_dword s36, s[0:1], 0x81
+ ;;
+ s_add_u32 s0, s0, s0
+ v_sub_f16_e32 v0, s0, v32
+ s_load_dword s36, s[0:1], 0x8c
+ ;;
+ ...
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x90
+ ;;
+ s_add_u32 s0, s0, s0
+ v_sub_f16_e32 v0, s0, v32
+ s_load_dword s36, s[0:1], 0x94
+ ;;
+ s_add_u32 s0, s0, s0
+ v_sub_f16_e32 v0, s0, v32
+ s_load_dword s36, s[0:1], 0xb4
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_add_f16_e32 v192, s0, v0
+ s_load_dword s36, s[0:1], s3 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s8 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s16 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s33 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x2ad
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s85 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v128, vcc
+ s_load_dword s36, s[0:1], s92 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[36:37], s[0:1], 0x2de
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x2e5
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[36:37], s[0:1], ttmp9 glc
+ ;;
+ v_cndmask_b32_e32 v0, s5, v0, vcc
+ v_add_f16_e32 v192, s0, v0
+ v_add_f16_e32 v192, s0, v0
+ v_add_f16_e32 v192, s0, v0
+ v_add_f16_e32 v192, s0, v0
+ s_load_dwordx2 s[36:37], s[0:1], s16
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s3, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v8, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x316
+ ;;
+ v_cndmask_b32_e32 v0, s14, v0, vcc
+ v_cndmask_b32_e32 v0, s32, v0, vcc
+ s_load_dword s36, s[0:1], s73 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s88 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], flat_scratch_hi glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], ttmp6 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s5 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s6 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[36:37], s[0:1], xnack_mask_lo glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[36:37], s[0:1], 0x1e1 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x204
+ ;;
+ v_cndmask_b32_e32 v4, s0, v128, vcc
+ v_cndmask_b32_e32 v0, s4, v0, vcc
+ s_load_dwordx2 s[36:37], s[0:1], s12
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s50 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], 0x30e
+ ;;
+ ...
+ ...
+ s_load_dword s36, s[0:1], s20 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s10 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s38 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s16 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s91 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s36, s[0:1], s84 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s60, s[0:1], s1
+ ;;
+ s_load_dword s36, s[0:1], s1 glc
+ ;;
+ v_cndmask_b32_e32 v0, s1, v0, vcc
+ s_load_dword s36, s[0:1], s14 glc
+ ;;
+ v_cndmask_b32_e32 v0, s2, v0, vcc
+ s_load_dword s36, s[0:1], s6 glc
+ ;;
+ v_cndmask_b32_e32 v0, v0, v1, vcc
+ s_load_dword s36, s[0:1], s18 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v1, vcc
+ s_load_dword s100, s[0:1], ttmp11 glc
+ ;;
+ v_cndmask_b32_e32 v0, s32, v0, vcc
+ s_load_dword xnack_mask_lo, s[0:1], s67 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s73 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s74 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s75 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s89 glc
+ ;;
+ ...
+ s_load_dword s100, s[0:1], s95 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s96 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s100, s[0:1], s98 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s88, s[0:1], s69 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dword s88, s[0:1], s6 glc
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[88:89], s[0:1], 0x70
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ s_load_dwordx2 s[88:89], s[0:1], 0x30
+ ;;
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ v_cndmask_b32_e32 v0, s0, v0, vcc
+ */
+ static const uint32_t preamblecache_gfx10[] = {
+ 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
+ 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
+ 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
+ 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
+ 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
+ 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
+ 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
+ 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
+ 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
+ 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
+ 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
+ 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
+ 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
+ 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
+ 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
+ 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
+ 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
+ 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
+ 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
+ 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
+ };
+ int i = base->cdw;
+ const uint32_t *preamblecache_ptr;
+ uint32_t preamblecache_size;
+
+ /* Write context control and load shadowing register if necessary */
+ base->emit(base, PACKET3(PKT3_CONTEXT_CONTROL, 1));
+ base->emit_repeat(base, 0x80000000, 2);
+
+ if (version == 9) {
+ preamblecache_ptr = preamblecache_gfx9;
+ preamblecache_size = sizeof(preamblecache_gfx9);
+ } else if (version == 10) {
+ preamblecache_ptr = preamblecache_gfx10;
+ preamblecache_size = sizeof(preamblecache_gfx10);
+ }
+ base->emit_buf(base, preamblecache_ptr, 0, preamblecache_size);
+
+ return base->cdw - i;
+}
+
+void
+amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
+ amdgpu_bo_handle bo_shader_ps,
+ amdgpu_bo_handle bo_shader_vs,
+ uint64_t mc_address_shader_ps,
+ uint64_t mc_address_shader_vs,
+ uint32_t ring, int version, int hang)
+{
+ amdgpu_context_handle context_handle;
+ amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
+ volatile unsigned char *ptr_dst;
+ unsigned char *ptr_src;
+ uint32_t *ptr_cmd;
+ uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
+ amdgpu_va_handle va_dst, va_src, va_cmd;
+ int i, r;
+ int bo_size = 16384;
+ int bo_cmd_size = 4096;
+ struct amdgpu_cs_request ibs_request = {0};
+ struct amdgpu_cs_ib_info ib_info= {0};
+ uint32_t hang_state, hangs;
+ uint32_t expired;
+ amdgpu_bo_list_handle bo_list;
+ struct amdgpu_cs_fence fence_status = {0};
+
+ struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+
+ r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &bo_cmd, (void **)&ptr_cmd,
+ &mc_address_cmd, &va_cmd);
+ igt_assert_eq(r, 0);
+ memset(ptr_cmd, 0, bo_cmd_size);
+ base_cmd->attach_buf(base_cmd, ptr_cmd, bo_cmd_size);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_src, (void **)&ptr_src,
+ &mc_address_src, &va_src);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_dst, (void **)&ptr_dst,
+ &mc_address_dst, &va_dst);
+ igt_assert_eq(r, 0);
+
+ memset(ptr_src, 0x55, bo_size);
+
+ amdgpu_draw_init(base_cmd, version);
+
+ amdgpu_draw_setup_and_write_drawblt_surf_info(base_cmd, mc_address_dst, 0, version);
+
+ amdgpu_draw_setup_and_write_drawblt_state(base_cmd, version, 0);
+
+ amdgpu_draw_vs_RectPosTexFast_write2hw(base_cmd, PS_TEX, mc_address_shader_vs,
+ version, 0);
+
+ amdgpu_draw_ps_write2hw(base_cmd, PS_TEX, mc_address_shader_ps, version);
+
+ base_cmd->emit(base_cmd, PACKET3(PKT3_SET_SH_REG, 8));
+ if (version == 9) {
+ base_cmd->emit(base_cmd, 0xc);
+ base_cmd->emit(base_cmd, mc_address_src >> 8);
+ base_cmd->emit(base_cmd, mc_address_src >> 40 | 0x10e00000);
+ base_cmd->emit(base_cmd, 0x7c01f);
+ base_cmd->emit(base_cmd, 0x90500fac);
+ base_cmd->emit(base_cmd, 0x3e000);
+ base_cmd->emit_repeat(base_cmd, 0, 3);
+ } else if (version == 10) {
+ base_cmd->emit(base_cmd, 0xc);
+ base_cmd->emit(base_cmd, mc_address_src >> 8);
+ base_cmd->emit(base_cmd, mc_address_src >> 40 | 0xc4b00000);
+ base_cmd->emit(base_cmd, 0x8007c007);
+ base_cmd->emit(base_cmd, 0x90500fac);
+ base_cmd->emit_repeat(base_cmd, 0, 2);
+ base_cmd->emit(base_cmd, 0x400);
+ base_cmd->emit(base_cmd, 0);
+ }
+
+ base_cmd->emit(base_cmd, PACKET3(PKT3_SET_SH_REG, 4));
+ base_cmd->emit(base_cmd, 0x14);
+ base_cmd->emit(base_cmd, 0x92);
+ base_cmd->emit_repeat(base_cmd, 0, 3);
+
+ base_cmd->emit(base_cmd, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base_cmd->emit(base_cmd, 0x191);
+ base_cmd->emit(base_cmd, 0);
+
+ amdgpu_draw_draw(base_cmd, version);
+
+ base_cmd->emit_aligned(base_cmd, 7, 0xffff1000); /* type3 nop packet */
+
+ resources[0] = bo_dst;
+ resources[1] = bo_src;
+ resources[2] = bo_shader_ps;
+ resources[3] = bo_shader_vs;
+ resources[4] = bo_cmd;
+ r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
+ igt_assert_eq(r, 0);
+
+ ib_info.ib_mc_address = mc_address_cmd;
+ ib_info.size = base_cmd->cdw;
+ ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+ ibs_request.ring = ring;
+ ibs_request.resources = bo_list;
+ ibs_request.number_of_ibs = 1;
+ ibs_request.ibs = &ib_info;
+ ibs_request.fence_info.handle = NULL;
+ r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
+ igt_assert_eq(r, 0);
+
+ fence_status.ip_type = AMDGPU_HW_IP_GFX;
+ fence_status.ip_instance = 0;
+ fence_status.ring = ring;
+ fence_status.context = context_handle;
+ fence_status.fence = ibs_request.seq_no;
+
+ /* wait for IB accomplished */
+ r = amdgpu_cs_query_fence_status(&fence_status,
+ AMDGPU_TIMEOUT_INFINITE,
+ 0, &expired);
+ /**
+ * TODO improve here
+ */
+ if (!hang) {
+ igt_assert_eq(r, 0);
+ igt_assert_eq(expired, true);
+
+ /* verify if memcpy test result meets with expected */
+ i = 0;
+ while(i < bo_size) {
+ igt_assert_eq(ptr_dst[i], ptr_src[i]);
+ i++;
+ }
+ } else {
+ r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
+ igt_assert_eq(r, 0);
+ igt_assert_eq(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
+ }
+
+ amdgpu_bo_list_destroy(bo_list);
+
+ amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
+ amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
+
+ amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
+
+ amdgpu_cs_ctx_free(context_handle);
+ free_cmd_base(base_cmd);
+}
+
+void
+amdgpu_memset_draw(amdgpu_device_handle device_handle,
+ amdgpu_bo_handle bo_shader_ps,
+ amdgpu_bo_handle bo_shader_vs,
+ uint64_t mc_address_shader_ps,
+ uint64_t mc_address_shader_vs,
+ uint32_t ring_id, uint32_t version)
+{
+ amdgpu_context_handle context_handle;
+ amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
+ volatile unsigned char *ptr_dst;
+ uint32_t *ptr_cmd;
+ uint64_t mc_address_dst, mc_address_cmd;
+ amdgpu_va_handle va_dst, va_cmd;
+ int i, r;
+ int bo_dst_size = 16384;
+ int bo_cmd_size = 4096;
+ struct amdgpu_cs_request ibs_request = {0};
+ struct amdgpu_cs_ib_info ib_info = {0};
+ struct amdgpu_cs_fence fence_status = {0};
+ uint32_t expired;
+ amdgpu_bo_list_handle bo_list;
+ struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+
+ r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &bo_cmd, (void **)&ptr_cmd,
+ &mc_address_cmd, &va_cmd);
+ igt_assert_eq(r, 0);
+ memset(ptr_cmd, 0, bo_cmd_size);
+ base_cmd->attach_buf(base_cmd, ptr_cmd, bo_cmd_size);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_dst, (void **)&ptr_dst,
+ &mc_address_dst, &va_dst);
+ igt_assert_eq(r, 0);
+
+ amdgpu_draw_init(base_cmd, version);
+ amdgpu_draw_setup_and_write_drawblt_surf_info(base_cmd, mc_address_dst, 0, version);
+ amdgpu_draw_setup_and_write_drawblt_state(base_cmd, version, 0);
+ amdgpu_draw_vs_RectPosTexFast_write2hw(base_cmd, PS_CONST, mc_address_shader_vs, version,0);
+ amdgpu_draw_ps_write2hw(base_cmd, PS_CONST, mc_address_shader_ps, version);
+
+ base_cmd->emit(base_cmd, PACKET3(PKT3_SET_SH_REG, 4));
+ base_cmd->emit(base_cmd, 0xc);
+ base_cmd->emit_repeat(base_cmd, 0x33333333, 4);
+
+ amdgpu_draw_draw(base_cmd, version);
+
+ base_cmd->emit_aligned(base_cmd, 7, 0xffff1000 );/* type3 nop packet */
+
+ resources[0] = bo_dst;
+ resources[1] = bo_shader_ps;
+ resources[2] = bo_shader_vs;
+ resources[3] = bo_cmd;
+ r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
+ igt_assert_eq(r, 0);
+
+ ib_info.ib_mc_address = mc_address_cmd;
+ ib_info.size = base_cmd->cdw;
+ ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+ ibs_request.ring = ring_id;
+ ibs_request.resources = bo_list;
+ ibs_request.number_of_ibs = 1;
+ ibs_request.ibs = &ib_info;
+ ibs_request.fence_info.handle = NULL;
+
+ /* submit CS */
+ r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_list_destroy(bo_list);
+ igt_assert_eq(r, 0);
+
+ fence_status.ip_type = AMDGPU_HW_IP_GFX;
+ fence_status.ip_instance = 0;
+ fence_status.ring = ring_id;
+ fence_status.context = context_handle;
+ fence_status.fence = ibs_request.seq_no;
+
+ /* wait for IB accomplished */
+ r = amdgpu_cs_query_fence_status(&fence_status,
+ AMDGPU_TIMEOUT_INFINITE,
+ 0, &expired);
+ igt_assert_eq(r, 0);
+ igt_assert_eq(expired, true);
+
+ /* verify if memset test result meets with expected */
+ i = 0;
+
+ /**
+ * TODO improve here
+ */
+ while(i < bo_cmd_size) {
+ igt_assert_eq(ptr_dst[i++], 0x33);
+ }
+
+ amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
+
+ amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
+
+ amdgpu_cs_ctx_free(context_handle);
+ free_cmd_base(base_cmd);
+}
+
+/* load RectPosTexFast_VS */
+int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version)
+{
+ /**
+ v_cvt_f32_i32_e32 v0, v0
+ v_add_f32_e32 v0, -1.0, v0
+ v_cmp_eq_f32_e64 s[10:11], v0, 0
+ ;;
+ v_mov_b32_e32 v1, s2
+ v_mov_b32_e32 v2, s0
+ v_add_f32_e32 v0, -1.0, v0
+ v_mov_b32_e32 v3, s6
+ v_mov_b32_e32 v4, s4
+ v_cndmask_b32_e64 v1, v2, v1, s[10:11]
+ ;;
+ v_cmp_eq_f32_e32 vcc, 0, v0
+ v_mov_b32_e32 v0, s0
+ v_mov_b32_e32 v2, s3
+ v_mov_b32_e32 v5, s1
+ v_cndmask_b32_e64 v3, v4, v3, s[10:11]
+ ;;
+ v_mov_b32_e32 v6, s7
+ v_mov_b32_e32 v7, s5
+ v_cndmask_b32_e32 v0, v1, v0, vcc
+ v_cndmask_b32_e32 v1, v5, v2, vcc
+ v_mov_b32_e32 v2, s8
+ v_mov_b32_e32 v5, 1.0
+ v_cndmask_b32_e32 v3, v3, v4, vcc
+ v_cndmask_b32_e32 v4, v7, v6, vcc
+ v_mov_b32_e32 v6, s9
+ exp pos0 v0, v1, v2, v5 done
+ ;;
+ exp param0 v3, v4, v6, v5
+ ;;
+ s_endpgm
+ */
+ static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
+ 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
+ 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
+ 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
+ 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
+ 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
+ 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
+ 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
+ 0xC400020F, 0x05060403, 0xBF810000
+ };
+
+ /**
+ v_cvt_f32_i32_e32 v0, v0
+ v_subrev_f32_e32 v0, -1.0, v0
+ v_mov_b32_e32 v1, s2
+ v_mov_b32_e32 v2, s6
+ ...
+ v_subrev_f32_e32 v0, -1.0, v0
+ v_interp_p2_f32_e32 v64, v1, attr0.x
+ v_cndmask_b32_e32 v213, s0, v1, vcc
+ v_mov_b32_e32 v3, s3
+ v_interp_p2_f32_e32 v64, v2, attr0.x
+ v_cndmask_b32_e32 v213, s4, v2, vcc
+ v_mov_b32_e32 v4, s7
+ ...
+ v_interp_p2_f32_e32 v64, v0, attr0.x
+ v_cndmask_b32_e32 v212, v1, v0, vcc
+ v_interp_p2_f32_e32 v64, v1, attr0.x
+ v_cndmask_b32_e32 v213, s1, v3, vcc
+ v_mov_b32_e32 v3, s8
+ v_mov_b32_e32 v5, 1.0
+ v_interp_p2_f32_e32 v64, v2, attr0.x
+ v_cndmask_b32_e32 v212, v2, v4, vcc
+ v_interp_p2_f32_e32 v64, v4, attr0.x
+ v_cndmask_b32_e32 v213, s5, v4, vcc
+ v_mov_b32_e32 v6, s9
+ ...
+ v_sub_f32_e32 v129, v0, v128
+ ...
+ v_sub_f32_e32 v131, s2, v2
+ s_endpgm
+ */
+ static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
+ 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
+ 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
+ 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
+ 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
+ 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
+ 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
+ 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
+ 0xBF810000
+ };
+
+ const uint32_t *shader;
+ uint32_t shader_size;
+
+ if (version == 9) {
+ shader = vs_RectPosTexFast_shader_gfx9;
+ shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
+ } else if (version == 10) {
+ shader = vs_RectPosTexFast_shader_gfx10;
+ shader_size = sizeof(vs_RectPosTexFast_shader_gfx10);
+ }
+
+ memcpy(ptr, shader, shader_size);
+
+ return 0;
+}
+
+void
+amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version)
+{
+ amdgpu_context_handle context_handle;
+ amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
+ amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
+ void *ptr_shader_ps;
+ void *ptr_shader_vs;
+ volatile unsigned char *ptr_dst;
+ unsigned char *ptr_src;
+ uint32_t *ptr_cmd;
+ uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
+ uint64_t mc_address_shader_ps, mc_address_shader_vs;
+ amdgpu_va_handle va_shader_ps, va_shader_vs;
+ amdgpu_va_handle va_dst, va_src, va_cmd;
+ struct amdgpu_gpu_info gpu_info = {0};
+ int r;
+ int bo_size = 0x4000000;
+ int bo_shader_ps_size = 0x400000;
+ int bo_shader_vs_size = 4096;
+ int bo_cmd_size = 4096;
+ struct amdgpu_cs_request ibs_request = {0};
+ struct amdgpu_cs_ib_info ib_info= {0};
+ uint32_t hang_state, hangs, expired;
+ amdgpu_bo_list_handle bo_list;
+ struct amdgpu_cs_fence fence_status = {0};
+
+ struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+
+ r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &bo_cmd, (void **)&ptr_cmd,
+ &mc_address_cmd, &va_cmd);
+ igt_assert_eq(r, 0);
+ memset(ptr_cmd, 0, bo_cmd_size);
+ base_cmd->attach_buf(base_cmd, ptr_cmd, bo_cmd_size);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_shader_ps, &ptr_shader_ps,
+ &mc_address_shader_ps, &va_shader_ps);
+ igt_assert_eq(r, 0);
+ memset(ptr_shader_ps, 0, bo_shader_ps_size);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_shader_vs, &ptr_shader_vs,
+ &mc_address_shader_vs, &va_shader_vs);
+ igt_assert_eq(r, 0);
+ memset(ptr_shader_vs, 0, bo_shader_vs_size);
+
+ r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_src, (void **)&ptr_src,
+ &mc_address_src, &va_src);
+ igt_assert_eq(r, 0);
+
+ r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
+ AMDGPU_GEM_DOMAIN_VRAM, 0,
+ &bo_dst, (void **)&ptr_dst,
+ &mc_address_dst, &va_dst);
+ igt_assert_eq(r, 0);
+
+ memset(ptr_src, 0x55, bo_size);
+
+ amdgpu_draw_init(base_cmd, version);
+
+ amdgpu_draw_setup_and_write_drawblt_surf_info(base_cmd, mc_address_dst, version, 1);
+
+ amdgpu_draw_setup_and_write_drawblt_state(base_cmd, version, 1);
+
+ amdgpu_draw_vs_RectPosTexFast_write2hw(base_cmd, PS_TEX,
+ mc_address_shader_vs, version, 1);
+
+ amdgpu_draw_ps_write2hw(base_cmd, PS_TEX, mc_address_shader_ps, version);
+
+ base_cmd->emit(base_cmd, PACKET3(PKT3_SET_SH_REG, 8));
+
+ if (version == 9) {
+ base_cmd->emit(base_cmd, 0xc);
+ base_cmd->emit(base_cmd, mc_address_src >> 8);
+ base_cmd->emit(base_cmd, mc_address_src >> 40 | 0x10e00000);
+ base_cmd->emit(base_cmd, 0x1ffcfff);
+ base_cmd->emit(base_cmd, 0x90500fac);
+ base_cmd->emit(base_cmd, 0x1ffe000);
+ base_cmd->emit_repeat(base_cmd, 0 , 3);
+ } else if (version == 10) {
+ base_cmd->emit(base_cmd, 0xc);
+ base_cmd->emit(base_cmd, mc_address_src >> 8);
+ base_cmd->emit(base_cmd, mc_address_src >> 40 | 0xc4b00000);
+ base_cmd->emit(base_cmd, 0x81ffc1ff);
+ base_cmd->emit(base_cmd, 0x90500fac);
+ base_cmd->emit_repeat(base_cmd, 0 , 4);
+ }
+
+ base_cmd->emit(base_cmd, PACKET3(PKT3_SET_SH_REG, 4));
+ base_cmd->emit(base_cmd, 0x14);
+ base_cmd->emit(base_cmd, 0x92);
+ base_cmd->emit_repeat(base_cmd, 0 , 3);
+
+ base_cmd->emit(base_cmd, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ base_cmd->emit(base_cmd, 0x191);
+ base_cmd->emit(base_cmd, 0);
+
+ amdgpu_draw_draw(base_cmd, version);
+
+ base_cmd->emit_aligned(base_cmd, 7, 0xffff1000);/* type3 nop packet */
+
+ resources[0] = bo_dst;
+ resources[1] = bo_src;
+ resources[2] = bo_shader_ps;
+ resources[3] = bo_shader_vs;
+ resources[4] = bo_cmd;
+ r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
+ igt_assert_eq(r, 0);
+
+ ib_info.ib_mc_address = mc_address_cmd;
+ ib_info.size = base_cmd->cdw;
+ ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+ ibs_request.ring = ring;
+ ibs_request.resources = bo_list;
+ ibs_request.number_of_ibs = 1;
+ ibs_request.ibs = &ib_info;
+ ibs_request.fence_info.handle = NULL;
+ r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
+ igt_assert_eq(r, 0);
+
+ fence_status.ip_type = AMDGPU_HW_IP_GFX;
+ fence_status.ip_instance = 0;
+ fence_status.ring = ring;
+ fence_status.context = context_handle;
+ fence_status.fence = ibs_request.seq_no;
+
+ /* wait for IB accomplished */
+ r = amdgpu_cs_query_fence_status(&fence_status,
+ AMDGPU_TIMEOUT_INFINITE,
+ 0, &expired);
+
+ r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
+ igt_assert_eq(r, 0);
+ igt_assert_eq(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
+
+ amdgpu_bo_list_destroy(bo_list);
+
+ amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
+ amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
+
+ amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
+
+ amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
+ amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
+
+ amdgpu_cs_ctx_free(context_handle);
+ free_cmd_base(base_cmd);
+}
+
diff --git a/lib/amdgpu/amd_draw_helpers.h b/lib/amdgpu/amd_draw_helpers.h
new file mode 100644
index 000000000..01bb080ec
--- /dev/null
+++ b/lib/amdgpu/amd_draw_helpers.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef __AMD_DRAW_HELPERS_H__
+#define __AMD_DRAW_HELPERS_H__
+#include <amdgpu.h>
+
+void
+amdgpu_memset_draw(amdgpu_device_handle device_handle,
+ amdgpu_bo_handle bo_shader_ps,
+ amdgpu_bo_handle bo_shader_vs,
+ uint64_t mc_address_shader_ps,
+ uint64_t mc_address_shader_vs,
+ uint32_t ring_id, uint32_t version);
+
+void
+amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
+ amdgpu_bo_handle bo_shader_ps,
+ amdgpu_bo_handle bo_shader_vs,
+ uint64_t mc_address_shader_ps,
+ uint64_t mc_address_shader_vs,
+ uint32_t ring, int version, int hang);
+
+int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version);
+
+void
+amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,
+ uint32_t ring, int version);
+
+#endif
diff --git a/lib/meson.build b/lib/meson.build
index 9bb0c8410..f110d8901 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -136,7 +136,8 @@ if libdrm_amdgpu.found()
'amdgpu/amd_gfx_v8_0.c',
'amdgpu/amd_gfx_v9_0.c',
'amdgpu/amd_dispatch_helpers.c',
- 'amdgpu/amd_dispatch.c'
+ 'amdgpu/amd_dispatch.c',
+ 'amdgpu/amd_draw_helpers.c'
]
endif
--
2.25.1
More information about the igt-dev
mailing list