[PATCH 2/3] drm/amdgpu: add utcl2 RAS poison functions for Aldebaran

Zhou1, Tao Tao.Zhou1 at amd.com
Tue Mar 15 07:07:34 UTC 2022


[AMD Official Use Only]



> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar at amd.com>
> Sent: Monday, March 14, 2022 5:44 PM
> To: Zhou1, Tao <Tao.Zhou1 at amd.com>; amd-gfx at lists.freedesktop.org; Zhang,
> Hawking <Hawking.Zhang at amd.com>; Yang, Stanley
> <Stanley.Yang at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>
> Subject: Re: [PATCH 2/3] drm/amdgpu: add utcl2 RAS poison functions for
> Aldebaran
> 
> 
> 
> On 3/14/2022 12:33 PM, Tao Zhou wrote:
> > Add help functions to check and clear RAS utcl2 poison status.
> >
> > Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> > ---
> >   .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  | 28
> ++++++++++++++++++-
> >   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  3 ++
> >   2 files changed, 30 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index c8935d718207..ebd7d36d099b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -23,6 +23,30 @@
> >   #include "amdgpu_amdkfd.h"
> >   #include "amdgpu_amdkfd_arcturus.h"
> >   #include "amdgpu_amdkfd_gfx_v9.h"
> > +#include "soc15.h"
> > +#include "gc/gc_9_4_2_sh_mask.h"
> > +
> > +static bool kgd_aldebaran_is_ras_utcl2_poison(struct amdgpu_device *adev,
> > +			uint16_t client_id)
> > +{
> > +	uint32_t status = 0;
> > +	struct amdgpu_vmhub *hub;
> > +
> > +	if (client_id != SOC15_IH_CLIENTID_UTCL2)
> > +		return false;
> > +
> 
> Status check is not related to interrupt. Is IH client id needed here?

[Tao] I'll remove the check for client_id since it has been checked in vm fault handler.

> 
> Thanks,
> Lijo
> 
> > +	hub = &adev->vmhub[AMDGPU_GFXHUB_0];
> > +	status = RREG32(hub->vm_l2_pro_fault_status);
> > +	return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS,
> FED); }
> > +
> > +static void kgd_aldebaran_utcl2_fault_clear(struct amdgpu_device
> > +*adev) {
> > +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
> > +
> > +	hub = &adev->vmhub[AMDGPU_GFXHUB_0];
> > +	WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); }
> >
> >   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> >   	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
> > @@ -41,5 +65,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> >   	.get_atc_vmid_pasid_mapping_info =
> >
> 	kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
> >   	.set_vm_context_page_table_base =
> kgd_gfx_v9_set_vm_context_page_table_base,
> > -	.program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings
> > +	.program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
> > +	.is_ras_utcl2_poison = kgd_aldebaran_is_ras_utcl2_poison,
> > +	.utcl2_fault_clear = kgd_aldebaran_utcl2_fault_clear
> >   };
> > diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> > b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> > index 2f60cf35a444..78400479193e 100644
> > --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> > +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> > @@ -291,6 +291,9 @@ struct kfd2kgd_calls {
> >   			int *wave_cnt, int *max_waves_per_cu);
> >   	void (*program_trap_handler_settings)(struct amdgpu_device *adev,
> >   			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
> > +	bool (*is_ras_utcl2_poison)(struct amdgpu_device *adev,
> > +			uint16_t client_id);
> > +	void (*utcl2_fault_clear)(struct amdgpu_device *adev);
> >   };
> >
> >   #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
> >


More information about the amd-gfx mailing list