[PATCH V2 01/11] drm/amdgpu: Unify ras block interface for each ras block
Zhou1, Tao
Tao.Zhou1 at amd.com
Mon Dec 6 07:36:10 UTC 2021
[AMD Official Use Only]
It's better to loop @Clements, John for the code review.
Regards,
Tao
> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai at amd.com>
> Sent: Wednesday, December 1, 2021 6:53 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>
> Subject: [PATCH V2 01/11] drm/amdgpu: Unify ras block interface for each ras
> block
>
> 1. Define unified ops interface for each block.
> 2. Add ras_block_match function pointer in ops interface for each ras block to
> identify itself.
> 3. Define unified basic ras block data for each ras block.
> 4. Create dedicated amdgpu device ras block link list to manage all of the ras
> blocks.
> 5. Add amdgpu_ras_register_ras_block new function interface for each ras block
> to register itself to ras controlling block.
>
> Signed-off-by: yipechai <YiPeng.Chai at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 +++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 29 ++++++++++++++++++++++
> 4 files changed, 45 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index db1505455761..eddf230856e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1151,6 +1151,8 @@ struct amdgpu_device {
> bool barrier_has_auto_waitcnt;
>
> struct amdgpu_reset_control *reset_cntl;
> +
> + struct list_head ras_list;
> };
>
> static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 73ec46140d68..0980396ee709 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3578,6 +3578,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>
> INIT_LIST_HEAD(&adev->reset_list);
>
> + INIT_LIST_HEAD(&adev->ras_list);
> +
> INIT_DELAYED_WORK(&adev->delayed_init_work,
> amdgpu_device_delayed_init_work_handler);
> INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 90f0db3b4f65..8713575c7cf1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2739,3 +2739,15 @@ static void
> amdgpu_register_bad_pages_mca_notifier(void)
> }
> }
> #endif
> +/* Rigister each ip ras block into amdgpu ras */ int
> +amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
> + struct amdgpu_ras_block_object* ras_block_obj) {
> + if (!adev || !ras_block_obj)
> + return -EINVAL;
> +
> + INIT_LIST_HEAD(&ras_block_obj->node);
> + list_add_tail(&ras_block_obj->node, &adev->ras_list);
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index cdd0010a5389..d6e5e3c862bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -469,6 +469,34 @@ struct ras_debug_if {
> };
> int op;
> };
> +
> +struct amdgpu_ras_block_object {
> + /* block name */
> + char name[32];
> +
> + enum amdgpu_ras_block block;
> +
> + uint32_t sub_block_index;
> +
> + /* ras block link */
> + struct list_head node;
> +
> + const struct amdgpu_ras_block_ops *ops; };
> +
> +struct amdgpu_ras_block_ops {
> + int (*ras_block_match)(struct amdgpu_ras_block_object* block_obj,
> enum amdgpu_ras_block block, uint32_t sub_block_index);
> + int (*ras_late_init)(struct amdgpu_device *adev);
> + void (*ras_fini)(struct amdgpu_device *adev);
> + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
> + void (*query_ras_error_count)(struct amdgpu_device *adev,void
> *ras_error_status);
> + void (*query_ras_error_status)(struct amdgpu_device *adev);
> + bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
> + void (*query_ras_error_address)(struct amdgpu_device *adev, void
> *ras_error_status);
> + void (*reset_ras_error_count)(struct amdgpu_device *adev);
> + void (*reset_ras_error_status)(struct amdgpu_device *adev); };
> +
> /* work flow
> * vbios
> * 1: ras feature enable (enabled by default) @@ -652,4 +680,5 @@ const char
> *get_ras_block_str(struct ras_common_if *ras_block);
>
> bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
>
> +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct
> +amdgpu_ras_block_object* ras_block_obj);
> #endif
> --
> 2.25.1
More information about the amd-gfx
mailing list