[PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for UMC poison handler

Zhou1, Tao Tao.Zhou1 at amd.com
Fri Oct 21 02:51:15 UTC 2022


[AMD Official Use Only - General]



> -----Original Message-----
> From: Zhang, Hawking <Hawking.Zhang at amd.com>
> Sent: Thursday, October 20, 2022 5:30 PM
> To: Zhou1, Tao <Tao.Zhou1 at amd.com>; amd-gfx at lists.freedesktop.org; Yang,
> Stanley <Stanley.Yang at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Li,
> Candice <Candice.Li at amd.com>
> Subject: RE: [PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for
> UMC poison handler
> 
> [AMD Official Use Only - General]
> 
> Might squash this with patch 1

[Tao] This is a refinement different from patch#1. Both ways are OK but I prefer leaving the patch alone.

> 
> Regards,
> Hawking
> 
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Wednesday, October 19, 2022 16:12
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>; Li, Candice <Candice.Li at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH 4/4] drm/amdgpu: remove ras_error_status parameter for
> UMC poison handler
> 
> Make the code more simple.
> 
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  4 +---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    |  3 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c    | 16 ++++++----------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h    |  4 +---
>  4 files changed, 9 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 0561812aa0a4..37db39ba8718 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -753,9 +753,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct
> amdgpu_device *adev)
> 
>  void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device
> *adev, bool reset)  {
> -	struct ras_err_data err_data = {0, 0, 0, NULL};
> -
> -	amdgpu_umc_poison_handler(adev, &err_data, reset);
> +	amdgpu_umc_poison_handler(adev, reset);
>  }
> 
>  bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device
> *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 28463b47ce33..693bce07eb46 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1561,7 +1561,6 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *  {
>  	bool poison_stat = false;
>  	struct amdgpu_device *adev = obj->adev;
> -	struct ras_err_data err_data = {0, 0, 0, NULL};
>  	struct amdgpu_ras_block_object *block_obj =
>  		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
> 
> @@ -1584,7 +1583,7 @@ static void
> amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
>  	}
> 
>  	if (!adev->gmc.xgmi.connected_to_cpu)
> -		amdgpu_umc_poison_handler(adev, &err_data, false);
> +		amdgpu_umc_poison_handler(adev, false);
> 
>  	if (block_obj->hw_ops->handle_poison_consumption)
>  		poison_stat = block_obj->hw_ops-
> >handle_poison_consumption(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index dd1b1a612343..c040c9104521 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -179,27 +179,23 @@ static int amdgpu_umc_do_page_retirement(struct
> amdgpu_device *adev,
>  	return AMDGPU_RAS_SUCCESS;
>  }
> 
> -int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
> -		void *ras_error_status,
> -		bool reset)
> +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
>  {
> +	struct ras_err_data err_data = {0, 0, 0, NULL};
>  	int ret;
> 
>  	if (adev->gmc.xgmi.connected_to_cpu) {
> -		ret = amdgpu_umc_poison_handler_mca(adev,
> ras_error_status, reset);
> +		ret = amdgpu_umc_poison_handler_mca(adev, &err_data,
> reset);
>  	} else {
> -		struct ras_err_data *err_data = (struct ras_err_data
> *)ras_error_status;
>  		struct ras_common_if head = {
>  			.block = AMDGPU_RAS_BLOCK__UMC,
>  		};
>  		struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
> 
> -		ret =
> -			amdgpu_umc_do_page_retirement(adev,
> ras_error_status, NULL, reset);
> -
> +		ret = amdgpu_umc_do_page_retirement(adev, &err_data,
> NULL, reset);
>  		if (ret == AMDGPU_RAS_SUCCESS && obj) {
> -			obj->err_data.ue_count += err_data->ue_count;
> -			obj->err_data.ce_count += err_data->ce_count;
> +			obj->err_data.ue_count += err_data.ue_count;
> +			obj->err_data.ce_count += err_data.ce_count;
>  		}
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index 659a10de29c9..a6951160f13a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -83,9 +83,7 @@ struct amdgpu_umc {
>  };
> 
>  int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct
> ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct
> amdgpu_device *adev,
> -		void *ras_error_status,
> -		bool reset);
> +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
>  int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
>  		struct amdgpu_irq_src *source,
>  		struct amdgpu_iv_entry *entry);
> --
> 2.35.1


More information about the amd-gfx mailing list