[PATCH] drm/amdkfd: update buffer_{store,load}_* modifiers for gfx940

Felix Kuehling felix.kuehling at amd.com
Mon Apr 29 22:34:36 UTC 2024


On 2024-04-29 17:50, Jay Cornwall wrote:
> On 4/29/2024 06:06, Lancelot SIX wrote:
>> Instruction modifiers of the untyped vector memory buffer instructions
>> (MUBUF encoded) changed in gfx940.  The slc, scc and glc modifiers have
>> been replaced with sc0, sc1 and nt.
>>
>> The current CWSR trap handler is written using pre-gfx940 modifier
>> names, making the source incompatible with a strict gfx940 assembler.
>>
>> This patch updates the cwsr_trap_handler_gfx9.s source file to be
>> compatible with all gfx9 variants of the ISA.  The binary assembled code
>> is unchanged (so the behaviour is unchanged as well), only the source
>> representation is updated.
>>
>> Signed-off-by: Lancelot SIX <lancelot.six at amd.com>
>> ---
>>   .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 24 ++++++++++++-------
>>   1 file changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 
>> b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
>> index bb26338204f4..a2d597d7fb57 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
>> +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
>> @@ -48,6 +48,12 @@ var ACK_SQC_STORE            = 1            
>> //workaround for suspected SQC store bug causing
>>   var SAVE_AFTER_XNACK_ERROR        =    1 //workaround for TCP store 
>> failure after XNACK error when ALLOW_REPLAY=0, for debugger
>>   var SINGLE_STEP_MISSED_WORKAROUND   =    (ASIC_FAMILY <= 
>> CHIP_ALDEBARAN)    //workaround for lost MODE.DEBUG_EN exception when 
>> SAVECTX raised
>>   +#if ASIC_FAMILY < CHIP_GC_9_4_3
>> +#define VMEM_MODIFIERS slc:1 glc:1
>> +#else
>> +#define VMEM_MODIFIERS sc0:1 nt:1
>> +#endif
>> +
>> /**************************************************************************/
>>   /*            variables                      */
>> /**************************************************************************/
>> @@ -581,7 +587,7 @@ end
>>   L_SAVE_LDS_LOOP_VECTOR:
>>         ds_read_b64 v[0:1], v2    //x =LDS[a], byte address
>>         s_waitcnt lgkmcnt(0)
>> -      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, 
>> s_save_mem_offset offen:1  glc:1  slc:1
>> +      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, 
>> s_save_mem_offset VMEM_MODIFIERS offen:1
>>   //    s_waitcnt vmcnt(0)
>>   //    v_add_u32 v2, vcc[0:1], v2, v3
>>         v_add_u32 v2, v2, v3
>> @@ -979,17 +985,17 @@ L_TCP_STORE_CHECK_DONE:
>>   end
>>     function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
>> -    buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
>> -    buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1  
>> offset:256
>> -    buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1  
>> offset:256*2
>> -    buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1  
>> offset:256*3
>> +    buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
>> +    buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256
>> +    buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256*2
>> +    buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256*3
>>   end
>>     function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
>> -    buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
>> -    buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 
>> offset:256
>> -    buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 
>> offset:256*2
>> -    buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 
>> offset:256*3
>> +    buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
>> +    buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256
>> +    buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256*2
>> +    buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS 
>> offset:256*3
>>       s_waitcnt vmcnt(0)
>>   end
>>
>> base-commit: cf743996352e327f483dc7d66606c90276f57380
>
> Reviewed-by: Jay Cornwall <jay.cornwall at amd.com>

Acked-by: Felix Kuehling <felix.kuehling at amd.com>

Do you need me to submit the patch to amd-staging-drm-next?

Thanks,
   Felix




More information about the amd-gfx mailing list