[PATCH v4 11/24] drm/amdkfd/gfx9: enable host trap
James Zhu
James.Zhu at amd.com
Tue Feb 6 15:59:07 UTC 2024
Enable host trap.
Signed-off-by: James Zhu <James.Zhu at amd.com>
---
.../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 63 +++++++++++--------
.../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++---
2 files changed, 52 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d1caaf0e6a7c..af1f678790e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820258,
+ 0xbf820001, 0xbf82025e,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850055, 0xbf8e0010,
+ 0xbf85005b, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -294,7 +294,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf85003a,
+ 0x00000400, 0xbf850040,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
@@ -303,13 +303,16 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
- 0xc0031bbd, 0x00000010,
- 0xbf8cc07f, 0x8e6e976e,
- 0x8977ff77, 0x00800000,
- 0x87776e77, 0xc0071bbd,
- 0x00000000, 0xbf8cc07f,
+ 0xc0031c3d, 0x00000010,
+ 0xc0071bbd, 0x00000000,
0xc0071ebd, 0x00000008,
- 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf8cc07f, 0x8671ff6d,
+ 0x01000000, 0xbf840004,
+ 0x92f1ff70, 0x00010001,
+ 0xbf840016, 0xbf820005,
+ 0x86708170, 0x8e709770,
+ 0x8977ff77, 0x00800000,
+ 0x87777077, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
0x866eff6d, 0x01ff0000,
0xbf850005, 0x8778ff78,
@@ -1098,14 +1101,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d4,
+ 0xbf820001, 0xbf8202da,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850055, 0xbf8e0010,
+ 0xbf85005b, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -1118,7 +1121,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf85003a,
+ 0x00000400, 0xbf850040,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
@@ -1127,13 +1130,16 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
- 0xc0031bbd, 0x00000010,
- 0xbf8cc07f, 0x8e6e976e,
- 0x8977ff77, 0x00800000,
- 0x87776e77, 0xc0071bbd,
- 0x00000000, 0xbf8cc07f,
+ 0xc0031c3d, 0x00000010,
+ 0xc0071bbd, 0x00000000,
0xc0071ebd, 0x00000008,
- 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf8cc07f, 0x8671ff6d,
+ 0x01000000, 0xbf840004,
+ 0x92f1ff70, 0x00010001,
+ 0xbf840016, 0xbf820005,
+ 0x86708170, 0x8e709770,
+ 0x8977ff77, 0x00800000,
+ 0x87777077, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
0x866eff6d, 0x01ff0000,
0xbf850005, 0x8778ff78,
@@ -1578,14 +1584,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202df,
+ 0xbf820001, 0xbf8202e5,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850055, 0xbf8e0010,
+ 0xbf85005b, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
@@ -1598,7 +1604,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf85003a,
+ 0x00000400, 0xbf850040,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
@@ -1607,13 +1613,16 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
- 0xc0031bbd, 0x00000010,
- 0xbf8cc07f, 0x8e6e976e,
- 0x8977ff77, 0x00800000,
- 0x87776e77, 0xc0071bbd,
- 0x00000000, 0xbf8cc07f,
+ 0xc0031c3d, 0x00000010,
+ 0xc0071bbd, 0x00000000,
0xc0071ebd, 0x00000008,
- 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf8cc07f, 0x8671ff6d,
+ 0x01000000, 0xbf840004,
+ 0x92f1ff70, 0x00010001,
+ 0xbf840016, 0xbf820005,
+ 0x86708170, 0x8e709770,
+ 0x8977ff77, 0x00800000,
+ 0x87777077, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
0x866eff6d, 0x01ff0000,
0xbf850005, 0x8778ff78,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..991fe6bb1726 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -104,6 +104,10 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
+var TMA_HOST_TRAP_EN_SHIFT = 1
+var TMA_HOST_TRAP_EN_SIZE = 1
+var TMA_HOST_TRAP_EN_BFE = (TMA_HOST_TRAP_EN_SHIFT | (TMA_HOST_TRAP_EN_SIZE << 16))
+
var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23
@@ -288,17 +292,21 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
- s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
- s_waitcnt lgkmcnt(0)
- s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
- s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK
- s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2
-
+ s_load_dword ttmp4, [ttmp14, ttmp15], 0x10 glc:1 // enable flags from 1st level TMA
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
- s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
-
+ s_and_b32 ttmp5, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK // host trap request
+ s_cbranch_scc0 L_NOT_HT
+ s_bfe_u32 ttmp5, ttmp4, TMA_HOST_TRAP_EN_BFE // extract host_trap_en to ttmp5[0]
+ s_cbranch_scc0 L_EXIT_TRAP // HT requested, but host traps not enabled
+ s_branch L_GOTO_2ND_TRAP
+L_NOT_HT:
+ s_and_b32 ttmp4, ttmp4, 0x1 // debug_enable bit left over
+ s_lshl_b32 ttmp4, ttmp4, TTMP_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp4
+L_GOTO_2ND_TRAP:
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
--
2.25.1
More information about the amd-gfx
mailing list