[PATCH v6 13/16] drm/amdgpu: Fix hang on device removal.
Andrey Grodzovsky
andrey.grodzovsky at amd.com
Mon May 10 16:36:22 UTC 2021
If removing while commands in flight you cannot wait to flush the
HW fences on a ring since the device is gone.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 1ffb36bd0b19..fa03702ecbfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -36,6 +36,7 @@
#include <linux/firmware.h>
#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -525,8 +526,7 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
{
- unsigned i, j;
- int r;
+ int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -535,11 +535,15 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev)
continue;
if (!ring->no_scheduler)
drm_sched_fini(&ring->sched);
- r = amdgpu_fence_wait_empty(ring);
- if (r) {
- /* no need to trigger GPU reset as we are unloading */
+ /* You can't wait for HW to signal if it's gone */
+ if (!drm_dev_is_unplugged(&adev->ddev))
+ r = amdgpu_fence_wait_empty(ring);
+ else
+ r = -ENODEV;
+ /* no need to trigger GPU reset as we are unloading */
+ if (r)
amdgpu_fence_driver_force_completion(ring);
- }
+
if (ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
--
2.25.1
More information about the amd-gfx
mailing list