[PATCH] drm/amdgpu: avoid over-handle of fence driver fini in s3 test

Christian König christian.koenig at amd.com
Thu Jul 29 11:11:27 UTC 2021


Am 29.07.21 um 12:49 schrieb Guchun Chen:
> In amdgpu_fence_driver_hw_fini, no need to call drm_sched_fini to stop
> scheduler in s3 test, otherwise, fence errors will occur after resume.
> So introduce a new parameter to distingiush the case in this API.

NAK, the problem is rather that drm_sched_fini() is part of the sw 
shutdown and should never be called during hw_fini.

Christian.

>
> Fixes: cd87a6dcf6af drm/amdgpu: adjust fence driver enable sequence
> Signed-off-by: Guchun Chen <guchun.chen at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 8 +++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   | 2 +-
>   3 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b1d2dc39e8be..aaff8ebbb7dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3844,7 +3844,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>   		else
>   			drm_atomic_helper_shutdown(adev_to_drm(adev));
>   	}
> -	amdgpu_fence_driver_hw_fini(adev);
> +	amdgpu_fence_driver_hw_fini(adev, false);
>   
>   	if (adev->pm_sysfs_en)
>   		amdgpu_pm_sysfs_fini(adev);
> @@ -3941,7 +3941,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
>   	/* evict vram memory */
>   	amdgpu_bo_evict_vram(adev);
>   
> -	amdgpu_fence_driver_hw_fini(adev);
> +	amdgpu_fence_driver_hw_fini(adev, adev->in_suspend);
>   
>   	amdgpu_device_ip_suspend_phase2(adev);
>   	/* evict remaining vram memory
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 49c5c7331c53..7e6a73c2919d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -515,14 +515,15 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
>   }
>   
>   /**
> - * amdgpu_fence_driver_fini - tear down the fence driver
> + * amdgpu_fence_driver_hw_fini - tear down the fence driver
>    * for all possible rings.
>    *
>    * @adev: amdgpu device pointer
> + * @in_reset: indicator to distingiush device removal case or s3 case
>    *
>    * Tear down the fence driver for all possible rings (all asics).
>    */
> -void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
> +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev, bool in_reset)
>   {
>   	int i, r;
>   
> @@ -531,8 +532,9 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
>   
>   		if (!ring || !ring->fence_drv.initialized)
>   			continue;
> -		if (!ring->no_scheduler)
> +		if (!ring->no_scheduler && !in_reset)
>   			drm_sched_fini(&ring->sched);
> +
>   		/* You can't wait for HW to signal if it's gone */
>   		if (!drm_dev_is_unplugged(&adev->ddev))
>   			r = amdgpu_fence_wait_empty(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 27adffa7658d..42cbecfc26a3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -115,7 +115,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>   int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
>   				   struct amdgpu_irq_src *irq_src,
>   				   unsigned irq_type);
> -void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
> +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev, bool in_reset);
>   void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
>   void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
>   int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,



More information about the amd-gfx mailing list