[PATCH] drm/amdgpu: Skip poison aca bank from UE channel

Liu, Xiang(Dean) Xiang.Liu at amd.com
Wed Jul 30 14:36:07 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

Thanks, will do.

Best Regards,

Liu, Xiang

________________________________
From: Zhou1, Tao <Tao.Zhou1 at amd.com>
Sent: Wednesday, July 30, 2025 6:46 PM
To: Liu, Xiang(Dean) <Xiang.Liu at amd.com>; amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Liu, Xiang(Dean) <Xiang.Liu at amd.com>
Subject: RE: [PATCH] drm/amdgpu: Skip poison aca bank from UE channel

[AMD Official Use Only - AMD Internal Distribution Only]

Better to add comment for the added condition check, with this resolved, the patch is:

Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Xiang Liu
> Sent: Wednesday, July 30, 2025 5:25 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Liu, Xiang(Dean)
> <Xiang.Liu at amd.com>
> Subject: [PATCH] drm/amdgpu: Skip poison aca bank from UE channel
>
> Avoid GFX poison consumption errors logged when fatal error occurs.
>
> Signed-off-by: Xiang Liu <xiang.liu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 47 ++++++++++++++-----------
>  1 file changed, 26 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> index 3835f2592914..59dbb9257096 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> @@ -125,6 +125,27 @@ static void aca_smu_bank_dump(struct amdgpu_device
> *adev, int idx, int total, st
>               RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged
> by the scrubber\n");  }
>
> +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum
> +aca_hwip_type type) {
> +
> +     struct aca_hwip *hwip;
> +     int hwid, mcatype;
> +     u64 ipid;
> +
> +     if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
> +             return false;
> +
> +     hwip = &aca_hwid_mcatypes[type];
> +     if (!hwip->hwid)
> +             return false;
> +
> +     ipid = bank->regs[ACA_REG_IDX_IPID];
> +     hwid = ACA_REG__IPID__HARDWAREID(ipid);
> +     mcatype = ACA_REG__IPID__MCATYPE(ipid);
> +
> +     return hwip->hwid == hwid && hwip->mcatype == mcatype; }
> +
>  static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum
> aca_smu_type type,
>                                      int start, int count,
>                                      struct aca_banks *banks, struct
> ras_query_context *qctx) @@ -163,6 +184,11 @@ static int
> aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
>
>               bank.smu_err_type = type;
>
> +             if (type == ACA_SMU_TYPE_UE &&
> +
> ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
> +                 !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
> +                     continue;
> +
>               aca_smu_bank_dump(adev, i, count, &bank, qctx);
>
>               ret = aca_banks_add_bank(banks, &bank); @@ -173,27 +199,6 @@
> static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum
> aca_smu_
>       return 0;
>  }
>
> -static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum
> aca_hwip_type type) -{
> -
> -     struct aca_hwip *hwip;
> -     int hwid, mcatype;
> -     u64 ipid;
> -
> -     if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
> -             return false;
> -
> -     hwip = &aca_hwid_mcatypes[type];
> -     if (!hwip->hwid)
> -             return false;
> -
> -     ipid = bank->regs[ACA_REG_IDX_IPID];
> -     hwid = ACA_REG__IPID__HARDWAREID(ipid);
> -     mcatype = ACA_REG__IPID__MCATYPE(ipid);
> -
> -     return hwip->hwid == hwid && hwip->mcatype == mcatype;
> -}
> -
>  static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
> enum aca_smu_type type)  {
>       const struct aca_bank_ops *bank_ops = handle->bank_ops;
> --
> 2.34.1

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250730/79b33bf6/attachment-0001.htm>


More information about the amd-gfx mailing list