[PATCH] drm/amdgpu: Modify the count method of defer error

Mario Limonciello mario.limonciello at amd.com
Wed May 7 02:15:47 UTC 2025


On 5/6/2025 7:30 AM, Ce Sun wrote:
> The number of newly added de counts and the number of
> newly added error addresses remain consistent

You need to make sure you include a Signed-off-by: tag when you submit 
patches.

https://www.kernel.org/doc/html/latest/process/submitting-patches.html#developer-s-certificate-of-origin-1-1


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  | 11 +++++++++--
>   2 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 857693bcd8d4..52fb71c4ce9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -130,6 +130,7 @@ struct amdgpu_umc {
>   
>   	/* active mask for umc node instance */
>   	unsigned long active_mask;
> +	unsigned long err_addr_cnt;
>   };
>   
>   int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> index 0e404c074975..eb3f99dbbcd7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
> @@ -262,6 +262,9 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
>   				soc_pa, channel_index, umc_inst);
>   	}
>   
> +	if (err_data)
> +		adev->umc.err_addr_cnt = err_data->err_addr_cnt;
> +
>   out:
>   	return ret;
>   }
> @@ -428,8 +431,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
>   		bank->regs[ACA_REG_IDX_ADDR]);
>   
>   	ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
> -	count = ext_error_code == 0 ?
> -		ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
> +	if (umc_v12_0_is_deferred_error(adev, status))
> +		count = ext_error_code == 0 ?
> +			adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
> +	else
> +		count = ext_error_code == 0 ?
> +			ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
>   
>   	return aca_error_cache_log_bank_error(handle, &info, err_type, count);
>   }



More information about the amd-gfx mailing list