[PATCH 08/11] drm/amdkfd: update buffer_{store, load}_* modifiers for gfx940
Alex Deucher
alexander.deucher at amd.com
Mon Dec 2 21:49:49 UTC 2024
From: Lancelot SIX <lancelot.six at amd.com>
Instruction modifiers of the untyped vector memory buffer instructions
(MUBUF encoded) changed in gfx940. The slc, scc and glc modifiers have
been replaced with sc0, sc1 and nt.
The current CWSR trap handler is written using pre-gfx940 modifier
names, making the source incompatible with a strict gfx940 assembler.
This patch updates the cwsr_trap_handler_gfx9.s source file to be
compatible with all gfx9 variants of the ISA. The binary assembled code
is unchanged (so the behaviour is unchanged as well), only the source
representation is updated.
Signed-off-by: Lancelot SIX <lancelot.six at amd.com>
Reviewed-by: Jay Cornwall <jay.cornwall at amd.com>
Acked-by: Felix Kuehling <felix.kuehling at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
.../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++++++++++-------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4b..a2d597d7fb574 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -48,6 +48,12 @@ var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -581,7 +587,7 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
@@ -979,17 +985,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
--
2.47.0
More information about the amd-gfx
mailing list