[PATCH 2/2] drm/amdgpu: refine bad page loading when in the same nps mode

Zhou1, Tao Tao.Zhou1 at amd.com
Mon Jul 7 10:35:29 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Xie, Patrick <Gangliang.Xie at amd.com>
> Sent: Monday, July 7, 2025 3:10 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>; Xie, Patrick <Gangliang.Xie at amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: refine bad page loading when in the same nps
> mode
>
> when loading bad page in the same nps mode, need to set the other fields in eeprom
> records manually besides retired_page
>
> Signed-off-by: ganglxie <ganglxie at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index e03550be45b4..e02af20e6204 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2861,6 +2861,13 @@ static int
> __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
>                       if (amdgpu_umc_pages_in_a_row(adev, err_data,
>                                       bps[0].retired_page <<
> AMDGPU_GPU_PAGE_SHIFT))
>                               return -EINVAL;
> +                     for ( i = 0; i < adev->umc.retire_unit; i++) {
> +                             err_data->err_addr[i].address = bps[0].address;
> +                             err_data->err_addr[i].mem_channel =
> bps[0].mem_channel;
> +                             err_data->err_addr[i].bank = bps[0].bank;
> +                             err_data->err_addr[i].err_type = bps[0].err_type;
> +                             err_data->err_addr[i].mcumc_id = bps[0].mcumc_id;
> +                     }

[Tao] can we use amdgpu_umc_fill_error_record?

>               } else {
>                       if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
>                               return -EINVAL;
> @@ -2893,6 +2900,7 @@ static int __amdgpu_ras_convert_rec_from_rom(struct
> amdgpu_device *adev,
>                               enum amdgpu_memory_partition nps)
>  {
>       enum amdgpu_memory_partition save_nps;
> +     int i = 0;
>
>       save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
>       bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); @@ -
> 2901,6 +2909,13 @@ static int __amdgpu_ras_convert_rec_from_rom(struct
> amdgpu_device *adev,
>               if (amdgpu_umc_pages_in_a_row(adev, err_data,
>                               bps->retired_page <<
> AMDGPU_GPU_PAGE_SHIFT))
>                       return -EINVAL;
> +             for ( i = 0; i < adev->umc.retire_unit; i++) {
> +                     err_data->err_addr[i].address = bps->address;
> +                     err_data->err_addr[i].mem_channel = bps->mem_channel;
> +                     err_data->err_addr[i].bank = bps->bank;
> +                     err_data->err_addr[i].err_type = bps->err_type;
> +                     err_data->err_addr[i].mcumc_id = bps->mcumc_id;
> +             }
>       } else {
>               if (bps->address) {
>                       if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
> --
> 2.34.1



More information about the amd-gfx mailing list