[PATCH v2 05/12] drm/amdgpu: Generate cper records

Kees Bakker kees at ijzerbout.nl
Tue Feb 18 19:23:41 UTC 2025


Op 14-02-2025 om 09:07 schreef Xiang Liu:
> From: Hawking Zhang <Hawking.Zhang at amd.com>
>
> Encode the error information in CPER format and commit
> to the cper ring
>
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
> Reviewed-by: Yang Wang <keivnyang.wang at amd.com>
> Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 32 +++++++++++++++++++++++++
>   1 file changed, 32 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> index ed1c20bd8114..c0da9096a7fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
> @@ -384,6 +384,36 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
>   	return ret;
>   }
>   
> +static void aca_banks_generate_cper(struct amdgpu_device *adev,
> +				    enum aca_smu_type type,
> +				    struct aca_banks *banks,
> +				    int count)
> +{
> +	struct aca_bank_node *node;
> +	struct aca_bank *bank;
> +
> +	if (!adev || !banks || !count) {
> +		dev_warn(adev->dev, "fail to generate cper records\n");
The check for !adev is suspicious in combination with the 
dev_warn(adev->dev...
> +		return;
> +	}
> +
> +	/* UEs must be encoded into separate CPER entries */
> +	if (type == ACA_SMU_TYPE_UE) {
> +		list_for_each_entry(node, &banks->list, node) {
> +			bank = &node->bank;
> +			if (amdgpu_cper_generate_ue_record(adev, bank))
> +				dev_warn(adev->dev, "fail to generate ue cper records\n");
> +		}
> +	} else {
> +		/*
> +		 * SMU_TYPE_CE banks are combined into 1 CPER entries,
> +		 * they could be CEs or DEs or both
> +		 */
> +		if (amdgpu_cper_generate_ce_records(adev, banks, count))
> +			dev_warn(adev->dev, "fail to generate ce cper records\n");
> +	}
> +}
> +
>   static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
>   			    bank_handler_t handler, struct ras_query_context *qctx, void *data)
>   {
> @@ -421,6 +451,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
>   	if (ret)
>   		goto err_release_banks;
>   
> +	aca_banks_generate_cper(adev, type, &banks, count);
> +
>   err_release_banks:
>   	aca_banks_release(&banks);
>   



More information about the amd-gfx mailing list