[PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for UMC poison handler
Zhou1, Tao
Tao.Zhou1 at amd.com
Fri Oct 21 02:51:15 UTC 2022
[AMD Official Use Only - General]
> -----Original Message-----
> From: Zhang, Hawking <Hawking.Zhang at amd.com>
> Sent: Thursday, October 20, 2022 5:30 PM
> To: Zhou1, Tao <Tao.Zhou1 at amd.com>; amd-gfx at lists.freedesktop.org; Yang,
> Stanley <Stanley.Yang at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Li,
> Candice <Candice.Li at amd.com>
> Subject: RE: [PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for
> UMC poison handler
>
> [AMD Official Use Only - General]
>
> Might squash this with patch 1
[Tao] This is a refinement different from patch#1. Both ways are OK but I prefer leaving the patch alone.
>
> Regards,
> Hawking
>
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Wednesday, October 19, 2022 16:12
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>; Li, Candice <Candice.Li at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for
> UMC poison handler
>
> Make the code more simple.
>
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 +---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 16 ++++++----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 4 +---
> 4 files changed, 9 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 0561812aa0a4..37db39ba8718 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -753,9 +753,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct
> amdgpu_device *adev)
>
> void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device
> *adev, bool reset) {
> - struct ras_err_data err_data = {0, 0, 0, NULL};
> -
> - amdgpu_umc_poison_handler(adev, &err_data, reset);
> + amdgpu_umc_poison_handler(adev, reset);
> }
>
> bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device
> *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 28463b47ce33..693bce07eb46 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1561,7 +1561,6 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * {
> bool poison_stat = false;
> struct amdgpu_device *adev = obj->adev;
> - struct ras_err_data err_data = {0, 0, 0, NULL};
> struct amdgpu_ras_block_object *block_obj =
> amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
>
> @@ -1584,7 +1583,7 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
> }
>
> if (!adev->gmc.xgmi.connected_to_cpu)
> - amdgpu_umc_poison_handler(adev, &err_data, false);
> + amdgpu_umc_poison_handler(adev, false);
>
> if (block_obj->hw_ops->handle_poison_consumption)
> poison_stat = block_obj->hw_ops-
> >handle_poison_consumption(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index dd1b1a612343..c040c9104521 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -179,27 +179,23 @@ static int amdgpu_umc_do_page_retirement(struct
> amdgpu_device *adev,
> return AMDGPU_RAS_SUCCESS;
> }
>
> -int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
> - void *ras_error_status,
> - bool reset)
> +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
> {
> + struct ras_err_data err_data = {0, 0, 0, NULL};
> int ret;
>
> if (adev->gmc.xgmi.connected_to_cpu) {
> - ret = amdgpu_umc_poison_handler_mca(adev,
> ras_error_status, reset);
> + ret = amdgpu_umc_poison_handler_mca(adev, &err_data,
> reset);
> } else {
> - struct ras_err_data *err_data = (struct ras_err_data
> *)ras_error_status;
> struct ras_common_if head = {
> .block = AMDGPU_RAS_BLOCK__UMC,
> };
> struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
>
> - ret =
> - amdgpu_umc_do_page_retirement(adev,
> ras_error_status, NULL, reset);
> -
> + ret = amdgpu_umc_do_page_retirement(adev, &err_data,
> NULL, reset);
> if (ret == AMDGPU_RAS_SUCCESS && obj) {
> - obj->err_data.ue_count += err_data->ue_count;
> - obj->err_data.ce_count += err_data->ce_count;
> + obj->err_data.ue_count += err_data.ue_count;
> + obj->err_data.ce_count += err_data.ce_count;
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 659a10de29c9..a6951160f13a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -83,9 +83,7 @@ struct amdgpu_umc {
> };
>
> int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct
> ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct
> amdgpu_device *adev,
> - void *ras_error_status,
> - bool reset);
> +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
> int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
> struct amdgpu_irq_src *source,
> struct amdgpu_iv_entry *entry);
> --
> 2.35.1
More information about the amd-gfx
mailing list