[PATCH 1/5] drm/amdgpu: add condition check for waking up thread

Lazar, Lijo lijo.lazar at amd.com
Thu Jun 13 08:11:14 UTC 2024



On 6/13/2024 7:55 AM, YiPeng Chai wrote:
> 1. Cannot add messages to fifo in gpu reset mode.
> 2. Only when the message is successfully saved to the
> fifo, the thread can be awakened.
> 

I think fifo should still cache the poison requests while in reset. Page
retirement thread may try to acquire the read side of reset lock and
wait if any reset is in progress.

Thanks
Lijo

> Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 18 +++++++++++-------
>  2 files changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index d0dcd3d37e6d..ed260966363f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2093,12 +2093,16 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
>  	if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
>  		struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
>  
> -		amdgpu_ras_put_poison_req(obj->adev,
> -			AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
> -
> -		atomic_inc(&con->page_retirement_req_cnt);
> -
> -		wake_up(&con->page_retirement_wq);
> +		if (!amdgpu_in_reset(obj->adev) && !atomic_read(&con->in_recovery)) {
> +			int ret;
> +
> +			ret = amdgpu_ras_put_poison_req(obj->adev,
> +				AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
> +			if (!ret) {
> +				atomic_inc(&con->page_retirement_req_cnt);
> +				wake_up(&con->page_retirement_wq);
> +			}
> +		}
>  	}
>  #endif
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 1dbe69eabb9a..94181ae85886 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -293,16 +293,20 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
>  
>  			amdgpu_ras_error_data_fini(&err_data);
>  		} else {
> -				struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
> -
>  #ifdef HAVE_KFIFO_PUT_NON_POINTER
> -				amdgpu_ras_put_poison_req(adev,
> -					block, pasid, pasid_fn, data, reset);
> -#endif
> +			struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>  
> -				atomic_inc(&con->page_retirement_req_cnt);
> +			if (!amdgpu_in_reset(adev) && !atomic_read(&con->in_recovery)) {
> +				int ret;
>  
> -				wake_up(&con->page_retirement_wq);
> +				ret = amdgpu_ras_put_poison_req(adev,
> +					block, pasid, pasid_fn, data, reset);
> +				if (!ret) {
> +					atomic_inc(&con->page_retirement_req_cnt);
> +					wake_up(&con->page_retirement_wq);
> +				}
> +			}
> +#endif
>  		}
>  	} else {
>  		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)


More information about the amd-gfx mailing list