[PATCH] drm/amdgpu: avoid over-handle of fence driver fini in s3 test (v2)

Chen, Guchun Guchun.Chen at amd.com
Mon Aug 2 08:23:31 UTC 2021


[Public]

Thank you, Christian.

Regarding fence_drv.initialized, it looks to a bit redundant, anyway let me look into this more.

Regards,
Guchun

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken at gmail.com> 
Sent: Monday, August 2, 2021 2:56 PM
To: Chen, Guchun <Guchun.Chen at amd.com>; amd-gfx at lists.freedesktop.org; Gao, Likun <Likun.Gao at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>
Subject: Re: [PATCH] drm/amdgpu: avoid over-handle of fence driver fini in s3 test (v2)

Am 02.08.21 um 07:16 schrieb Guchun Chen:
> In amdgpu_fence_driver_hw_fini, no need to call drm_sched_fini to stop 
> scheduler in s3 test, otherwise, fence related failure will arrive 
> after resume. To fix this and for a better clean up, move 
> drm_sched_fini from fence_hw_fini to fence_sw_fini, as it's part of 
> driver shutdown, and should never be called in hw_fini.
>
> v2: rename amdgpu_fence_driver_init to amdgpu_fence_driver_sw_init, to 
> keep sw_init and sw_fini paired.
>
> Fixes: cd87a6dcf6af drm/amdgpu: adjust fence driver enable sequence
> Suggested-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Guchun Chen <guchun.chen at amd.com>

It's a bit ambiguous now what fence_drv.initialized means, but I think we can live with that for now.

Patch is Reviewed-by: Christian König <christian.koenig at amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 ++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 12 +++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  4 ++--
>   3 files changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b1d2dc39e8be..9e53ff851496 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3646,9 +3646,9 @@ int amdgpu_device_init(struct amdgpu_device 
> *adev,
>   
>   fence_driver_init:
>   	/* Fence driver */
> -	r = amdgpu_fence_driver_init(adev);
> +	r = amdgpu_fence_driver_sw_init(adev);
>   	if (r) {
> -		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
> +		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
>   		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
>   		goto failed;
>   	}
> @@ -3988,7 +3988,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
>   	}
>   	amdgpu_fence_driver_hw_init(adev);
>   
> -
>   	r = amdgpu_device_ip_late_init(adev);
>   	if (r)
>   		return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 49c5c7331c53..7495911516c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -498,7 +498,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>   }
>   
>   /**
> - * amdgpu_fence_driver_init - init the fence driver
> + * amdgpu_fence_driver_sw_init - init the fence driver
>    * for all possible rings.
>    *
>    * @adev: amdgpu device pointer
> @@ -509,13 +509,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>    * amdgpu_fence_driver_start_ring().
>    * Returns 0 for success.
>    */
> -int amdgpu_fence_driver_init(struct amdgpu_device *adev)
> +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
>   {
>   	return 0;
>   }
>   
>   /**
> - * amdgpu_fence_driver_fini - tear down the fence driver
> + * amdgpu_fence_driver_hw_fini - tear down the fence driver
>    * for all possible rings.
>    *
>    * @adev: amdgpu device pointer
> @@ -531,8 +531,7 @@ void amdgpu_fence_driver_hw_fini(struct 
> amdgpu_device *adev)
>   
>   		if (!ring || !ring->fence_drv.initialized)
>   			continue;
> -		if (!ring->no_scheduler)
> -			drm_sched_fini(&ring->sched);
> +
>   		/* You can't wait for HW to signal if it's gone */
>   		if (!drm_dev_is_unplugged(&adev->ddev))
>   			r = amdgpu_fence_wait_empty(ring); @@ -560,6 +559,9 @@ void 
> amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
>   		if (!ring || !ring->fence_drv.initialized)
>   			continue;
>   
> +		if (!ring->no_scheduler)
> +			drm_sched_fini(&ring->sched);
> +
>   		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
>   			dma_fence_put(ring->fence_drv.fences[j]);
>   		kfree(ring->fence_drv.fences);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 27adffa7658d..9c11ced4312c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -106,7 +106,6 @@ struct amdgpu_fence_driver {
>   	struct dma_fence		**fences;
>   };
>   
> -int amdgpu_fence_driver_init(struct amdgpu_device *adev);
>   void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
>   
>   int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, @@ 
> -115,9 +114,10 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>   int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
>   				   struct amdgpu_irq_src *irq_src,
>   				   unsigned irq_type);
> +void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
>   void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
> +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
>   void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -void 
> amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
>   int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
>   		      unsigned flags);
>   int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,


More information about the amd-gfx mailing list