[PATCH 1/2] drm/amdgpu: Only create err_count sysfs when hw_op is supported

Zhou1, Tao Tao.Zhou1 at amd.com
Mon Jun 12 06:22:18 UTC 2023


[AMD Official Use Only - General]

> -----Original Message-----
> From: Zhang, Hawking <Hawking.Zhang at amd.com>
> Sent: Sunday, June 11, 2023 6:46 PM
> To: amd-gfx at lists.freedesktop.org; Yang, Stanley <Stanley.Yang at amd.com>; Li,
> Candice <Candice.Li at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>;
> Zhou1, Tao <Tao.Zhou1 at amd.com>
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH 1/2] drm/amdgpu: Only create err_count sysfs when hw_op is
> supported
>
> Some IP blocks only support partial ras feature and don't have ras counter and/or
> ras error status register at all.
> Driver should not create err_count sysfs node for those IP blocks.
>
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 31 ++++++++++++++-----------
>  1 file changed, 18 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index a6c3265cdbc4..dd7cdc234d7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2757,23 +2757,28 @@ int amdgpu_ras_block_late_init(struct
> amdgpu_device *adev,
>                       goto cleanup;
>       }
>
> -     r = amdgpu_ras_sysfs_create(adev, ras_block);
> -     if (r)
> -             goto interrupt;
> +     if (ras_obj->hw_ops &&
> +         (ras_obj->hw_ops->query_ras_error_count ||
> +          ras_obj->hw_ops->query_ras_error_status)) {

[Tao] the condition can be also changed like this:

               if (!(ras_obj->hw_ops &&
            (ras_obj->hw_ops->query_ras_error_count ||
             ras_obj->hw_ops->query_ras_error_status)))
                    return 0;

Either way is fine with me.


> +             r = amdgpu_ras_sysfs_create(adev, ras_block);
> +             if (r)
> +                     goto interrupt;
>
> -     /* Those are the cached values at init.
> -      */
> -     query_info = kzalloc(sizeof(struct ras_query_if), GFP_KERNEL);
> -     if (!query_info)
> -             return -ENOMEM;
> -     memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
> +             /* Those are the cached values at init.
> +              */
> +             query_info = kzalloc(sizeof(struct ras_query_if), GFP_KERNEL);
> +             if (!query_info)
> +                     return -ENOMEM;
> +             memcpy(&query_info->head, ras_block, sizeof(struct
> ras_common_if));
>
> -     if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count,
> query_info) == 0) {
> -             atomic_set(&con->ras_ce_count, ce_count);
> -             atomic_set(&con->ras_ue_count, ue_count);
> +             if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count,
> query_info) == 0) {
> +                     atomic_set(&con->ras_ce_count, ce_count);
> +                     atomic_set(&con->ras_ue_count, ue_count);
> +             }
> +
> +             kfree(query_info);
>       }
>
> -     kfree(query_info);
>       return 0;
>
>  interrupt:
> --
> 2.17.1



More information about the amd-gfx mailing list