[PATCH] drm/amdkfd: update buffer_{store,load}_* modifiers for gfx940

Jay Cornwall jay.cornwall at amd.com
Mon Apr 29 21:50:34 UTC 2024


On 4/29/2024 06:06, Lancelot SIX wrote:
> Instruction modifiers of the untyped vector memory buffer instructions
> (MUBUF encoded) changed in gfx940.  The slc, scc and glc modifiers have
> been replaced with sc0, sc1 and nt.
> 
> The current CWSR trap handler is written using pre-gfx940 modifier
> names, making the source incompatible with a strict gfx940 assembler.
> 
> This patch updates the cwsr_trap_handler_gfx9.s source file to be
> compatible with all gfx9 variants of the ISA.  The binary assembled code
> is unchanged (so the behaviour is unchanged as well), only the source
> representation is updated.
> 
> Signed-off-by: Lancelot SIX <lancelot.six at amd.com>
> ---
>   .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++++++++++-------
>   1 file changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> index bb26338204f4..a2d597d7fb57 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
> @@ -48,6 +48,12 @@ var ACK_SQC_STORE		    =	1		    //workaround for suspected SQC store bug causing
>   var SAVE_AFTER_XNACK_ERROR	    =	1		    //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
>   var SINGLE_STEP_MISSED_WORKAROUND   =	(ASIC_FAMILY <= CHIP_ALDEBARAN)	//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
>   
> +#if ASIC_FAMILY < CHIP_GC_9_4_3
> +#define VMEM_MODIFIERS slc:1 glc:1
> +#else
> +#define VMEM_MODIFIERS sc0:1 nt:1
> +#endif
> +
>   /**************************************************************************/
>   /*			variables					  */
>   /**************************************************************************/
> @@ -581,7 +587,7 @@ end
>   L_SAVE_LDS_LOOP_VECTOR:
>         ds_read_b64 v[0:1], v2	//x =LDS[a], byte address
>         s_waitcnt lgkmcnt(0)
> -      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1
> +      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
>   //	s_waitcnt vmcnt(0)
>   //	v_add_u32 v2, vcc[0:1], v2, v3
>         v_add_u32 v2, v2, v3
> @@ -979,17 +985,17 @@ L_TCP_STORE_CHECK_DONE:
>   end
>   
>   function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
> -	buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
> -	buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256
> -	buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*2
> -	buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*3
> +	buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
> +	buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
> +	buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
> +	buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
>   end
>   
>   function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
> -	buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
> -	buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
> -	buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
> -	buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
> +	buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
> +	buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
> +	buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
> +	buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
>   	s_waitcnt vmcnt(0)
>   end
>   
> 
> base-commit: cf743996352e327f483dc7d66606c90276f57380

Reviewed-by: Jay Cornwall <jay.cornwall at amd.com>


More information about the amd-gfx mailing list