[PATCH 1/4] drm/amdgpu: export umc error address translation interface

Yang, Stanley Stanley.Yang at amd.com
Mon Sep 26 03:15:25 UTC 2022


[AMD Official Use Only - General]

Hi Tao,

> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Friday, September 23, 2022 5:21 PM
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH 1/4] drm/amdgpu: export umc error address translation
> interface
> 
> Make it globally so we can convert specific mca address.
> 
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  6 ++++++
>  drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 11 +++++------
>  2 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 3629d8f292ef..31fbefaaf676 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -22,6 +22,8 @@
>  #define __AMDGPU_UMC_H__
>  #include "amdgpu_ras.h"
> 
> +#define UMC_INVALID_ADDR 0x1ULL
> +
>  /*
>   * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
>   * is the index of 4KB block
> @@ -51,6 +53,10 @@ struct amdgpu_umc_ras {
>  	struct amdgpu_ras_block_object ras_block;
>  	void (*err_cnt_init)(struct amdgpu_device *adev);
>  	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
> +	void (*query_error_address_per_channel)(struct amdgpu_device
> *adev,
> +						 struct ras_err_data
> *err_data,
> +						 uint32_t umc_reg_offset,
> uint32_t ch_inst,
> +						 uint32_t umc_inst, uint64_t
> mca_addr);
>  	void (*ecc_info_query_ras_error_count)(struct amdgpu_device
> *adev,
>  				      void *ras_error_status);
>  	void (*ecc_info_query_ras_error_address)(struct amdgpu_device
> *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index bf7524f16b66..0f1b215653f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -452,9 +452,8 @@ static void umc_v6_7_query_ras_error_count(struct
> amdgpu_device *adev,
> 
>  static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
>  					 struct ras_err_data *err_data,
> -					 uint32_t umc_reg_offset,
> -					 uint32_t ch_inst,
> -					 uint32_t umc_inst)
> +					 uint32_t umc_reg_offset, uint32_t
> ch_inst,
> +					 uint32_t umc_inst, uint64_t
> mca_addr)
>  {
>  	uint32_t mc_umc_status_addr;
>  	uint32_t channel_index;
> @@ -540,9 +539,8 @@ static void
> umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
>  							 ch_inst);
>  		umc_v6_7_query_error_address(adev,
>  					     err_data,
> -					     umc_reg_offset,
> -					     ch_inst,
> -					     umc_inst);
> +					     umc_reg_offset, ch_inst,
> +					     umc_inst, UMC_INVALID_ADDR);
>  	}
>  }
> 
> @@ -583,4 +581,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
>  	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
>  	.ecc_info_query_ras_error_count =
> umc_v6_7_ecc_info_query_ras_error_count,
>  	.ecc_info_query_ras_error_address =
> umc_v6_7_ecc_info_query_ras_error_address,
> +	.query_error_address_per_channel =
> umc_v6_7_query_error_address,

Stanley: According to patch#3, it's better to rename query_error_address_per_channel to covert/query_error_address_at_specific_channel due to the channel_instance and umc_instance get form the mce structure, using per_channel may cause misunderstanding.

>  };
> --
> 2.35.1


More information about the amd-gfx mailing list