[PATCH] drm/amdgpu: Fix the dead lock issue.
Emily Deng
Emily.Deng at amd.com
Tue Sep 11 02:51:33 UTC 2018
It will ramdomly have the dead lock issue when test TDR:
1. amdgpu_device_handle_vram_lost gets the lock shadow_list_lock
2. amdgpu_bo_create locked the bo's resv lock
3. amdgpu_bo_create_shadow is waiting for the shadow_list_lock
4. amdgpu_device_recover_vram_from_shadow is waiting for the bo's resv
lock.
v2:
Make a local copy of the list
Signed-off-by: Emily Deng <Emily.Deng at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2a21267..8c81404 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3105,6 +3105,9 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
long r = 1;
int i = 0;
long tmo;
+ struct list_head local_shadow_list;
+
+ INIT_LIST_HEAD(&local_shadow_list);
if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000);
@@ -3112,8 +3115,19 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
tmo = msecs_to_jiffies(100);
DRM_INFO("recover vram bo from shadow start\n");
+
+ mutex_lock(&adev->shadow_list_lock);
+ list_splice_init(&adev->shadow_list, &local_shadow_list);
+ mutex_unlock(&adev->shadow_list_lock);
+
+
mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+ list_for_each_entry_safe(bo, tmp, &local_shadow_list, shadow_list) {
+ mutex_unlock(&adev->shadow_list_lock);
+
+ if (!bo)
+ continue;
+
next = NULL;
amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
if (fence) {
@@ -3132,9 +3146,14 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
dma_fence_put(fence);
fence = next;
+ mutex_lock(&adev->shadow_list_lock);
}
mutex_unlock(&adev->shadow_list_lock);
+ mutex_lock(&adev->shadow_list_lock);
+ list_splice_init(&local_shadow_list, &adev->shadow_list);
+ mutex_unlock(&adev->shadow_list_lock);
+
if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
--
2.7.4
More information about the amd-gfx
mailing list