[PATCH v3] drm/amdgpu: Fix the looply call svm_range_restore_pages

Thu Jan 9 06:08:53 UTC 2025

As the delayed free pt, the wanted freed bo has been reused, which will cause
unexpected page fault, and then call svm_range_restore_pages.

Detail as below:
1.It wants to free the pt in follow code, but it is not freed immediately
and used schedule_work(&vm->pt_free_work);

[   92.276838] Call Trace:
[   92.276841]  dump_stack+0x63/0xa0
[   92.276887]  amdgpu_vm_pt_free_list+0xfb/0x120 [amdgpu]
[   92.276932]  amdgpu_vm_update_range+0x69c/0x8e0 [amdgpu]
[   92.276990]  svm_range_unmap_from_gpus+0x112/0x310 [amdgpu]
[   92.277046]  svm_range_cpu_invalidate_pagetables+0x725/0x780 [amdgpu]
[   92.277050]  ? __alloc_pages_nodemask+0x19f/0x3e0
[   92.277051]  mn_itree_invalidate+0x72/0xc0
[   92.277052]  __mmu_notifier_invalidate_range_start+0x48/0x60
[   92.277054]  migrate_vma_collect+0xf6/0x100
[   92.277055]  migrate_vma_setup+0xcf/0x120
[   92.277109]  svm_migrate_ram_to_vram+0x256/0x6b0 [amdgpu]

2.Call svm_range_map_to_gpu->amdgpu_vm_update_range to update the page
table, at this time it will use the same entry bo which is the want free
bo in step1.

3.Then it executes the pt_free_work to free the bo. At this time, the GPU
access memory will cause page fault as pt bo has been freed. And then it will
call svm_range_restore_pages again.

How to fix?
Duplicate the free bo list.

Signed-off-by: Emily Deng <Emily.Deng at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index f78a0434a48f..62a5e0ad4243 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -561,8 +561,10 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
 	/* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
 	amdgpu_bo_reserve(vm->root.bo, true);
 
-	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
+	list_for_each_entry_safe(entry, next, &pt_freed, vm_status) {
 		amdgpu_vm_pt_free(entry);
+		kfree(entry);
+	}
 
 	amdgpu_bo_unreserve(vm->root.bo);
 }
@@ -578,7 +580,7 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
 void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
 			    struct amdgpu_vm_update_params *params)
 {
-	struct amdgpu_vm_bo_base *entry, *next;
+	struct amdgpu_vm_bo_base *entry, *next, *new_entry;
 	struct amdgpu_vm *vm = params->vm;
 	bool unlocked = params->unlocked;
 
@@ -587,7 +589,20 @@ void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
 
 	if (unlocked) {
 		spin_lock(&vm->status_lock);
-		list_splice_init(&params->tlb_flush_waitlist, &vm->pt_freed);
+		list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status) {
+			new_entry = kmalloc(sizeof(*new_entry), GFP_KERNEL);
+			if (!new_entry) {
+				spin_unlock(&vm->status_lock);
+				return;
+			}
+			*new_entry = *entry;
+			INIT_LIST_HEAD(&new_entry->vm_status);
+			list_add_tail(&new_entry->vm_status, &vm->pt_freed);
+			if (entry->bo) {
+				entry->bo = NULL;
+				list_del(&entry->vm_status);
+			}
+		}
 		spin_unlock(&vm->status_lock);
 		schedule_work(&vm->pt_free_work);
 		return;
-- 
2.34.1