[PATCH 1/4] drm/amdgpu: export umc error address translation interface
Yang, Stanley
Stanley.Yang at amd.com
Mon Sep 26 03:15:25 UTC 2022
[AMD Official Use Only - General]
Hi Tao,
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Friday, September 23, 2022 5:21 PM
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH 1/4] drm/amdgpu: export umc error address translation
> interface
>
> Make it globally so we can convert specific mca address.
>
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 ++++++
> drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 11 +++++------
> 2 files changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 3629d8f292ef..31fbefaaf676 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -22,6 +22,8 @@
> #define __AMDGPU_UMC_H__
> #include "amdgpu_ras.h"
>
> +#define UMC_INVALID_ADDR 0x1ULL
> +
> /*
> * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
> * is the index of 4KB block
> @@ -51,6 +53,10 @@ struct amdgpu_umc_ras {
> struct amdgpu_ras_block_object ras_block;
> void (*err_cnt_init)(struct amdgpu_device *adev);
> bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
> + void (*query_error_address_per_channel)(struct amdgpu_device
> *adev,
> + struct ras_err_data
> *err_data,
> + uint32_t umc_reg_offset,
> uint32_t ch_inst,
> + uint32_t umc_inst, uint64_t
> mca_addr);
> void (*ecc_info_query_ras_error_count)(struct amdgpu_device
> *adev,
> void *ras_error_status);
> void (*ecc_info_query_ras_error_address)(struct amdgpu_device
> *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index bf7524f16b66..0f1b215653f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -452,9 +452,8 @@ static void umc_v6_7_query_ras_error_count(struct
> amdgpu_device *adev,
>
> static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
> struct ras_err_data *err_data,
> - uint32_t umc_reg_offset,
> - uint32_t ch_inst,
> - uint32_t umc_inst)
> + uint32_t umc_reg_offset, uint32_t
> ch_inst,
> + uint32_t umc_inst, uint64_t
> mca_addr)
> {
> uint32_t mc_umc_status_addr;
> uint32_t channel_index;
> @@ -540,9 +539,8 @@ static void
> umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
> ch_inst);
> umc_v6_7_query_error_address(adev,
> err_data,
> - umc_reg_offset,
> - ch_inst,
> - umc_inst);
> + umc_reg_offset, ch_inst,
> + umc_inst, UMC_INVALID_ADDR);
> }
> }
>
> @@ -583,4 +581,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
> .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
> .ecc_info_query_ras_error_count =
> umc_v6_7_ecc_info_query_ras_error_count,
> .ecc_info_query_ras_error_address =
> umc_v6_7_ecc_info_query_ras_error_address,
> + .query_error_address_per_channel =
> umc_v6_7_query_error_address,
Stanley: According to patch#3, it's better to rename query_error_address_per_channel to covert/query_error_address_at_specific_channel due to the channel_instance and umc_instance get form the mce structure, using per_channel may cause misunderstanding.
> };
> --
> 2.35.1
More information about the amd-gfx
mailing list