[PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone

Philip Yang Philip.Yang at amd.com
Wed Jan 19 16:22:45 UTC 2022


After mm is removed from task->mm, deferred_list work should continue to
handle deferred_range_list which maybe split to child range to avoid
child range leak, and remove ranges mmu interval notifier to avoid mm
mm_count leak, but skip updating notifier and inserting new notifier.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Reported-by: Ruili Ji <ruili.ji at amd.com>
Tested-by: Ruili Ji <ruili.ji at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2805ba74c80..9ec195e1ef23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1985,10 +1985,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
 }
 
 static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+			 struct mm_struct *mm)
 {
-	struct mm_struct *mm = prange->work_item.mm;
-
 	switch (prange->work_item.op) {
 	case SVM_OP_NULL:
 		pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -2004,25 +2003,29 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
 	case SVM_OP_UPDATE_RANGE_NOTIFIER:
 		pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
 			 svms, prange, prange->start, prange->last);
-		svm_range_update_notifier_and_interval_tree(mm, prange);
+		if (mm)
+			svm_range_update_notifier_and_interval_tree(mm, prange);
 		break;
 	case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
 		pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
 			 svms, prange, prange->start, prange->last);
-		svm_range_update_notifier_and_interval_tree(mm, prange);
+		if (mm)
+			svm_range_update_notifier_and_interval_tree(mm, prange);
 		/* TODO: implement deferred validation and mapping */
 		break;
 	case SVM_OP_ADD_RANGE:
 		pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
 			 prange->start, prange->last);
 		svm_range_add_to_svms(prange);
-		svm_range_add_notifier_locked(mm, prange);
+		if (mm)
+			svm_range_add_notifier_locked(mm, prange);
 		break;
 	case SVM_OP_ADD_RANGE_AND_MAP:
 		pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
 			 prange, prange->start, prange->last);
 		svm_range_add_to_svms(prange);
-		svm_range_add_notifier_locked(mm, prange);
+		if (mm)
+			svm_range_add_notifier_locked(mm, prange);
 		/* TODO: implement deferred validation and mapping */
 		break;
 	default:
@@ -2071,20 +2074,22 @@ static void svm_range_deferred_list_work(struct work_struct *work)
 	pr_debug("enter svms 0x%p\n", svms);
 
 	p = container_of(svms, struct kfd_process, svms);
-	/* Avoid mm is gone when inserting mmu notifier */
+
+	/* If mm is gone, continue cleanup the deferred_range_list */
 	mm = get_task_mm(p->lead_thread);
-	if (!mm) {
+	if (!mm)
 		pr_debug("svms 0x%p process mm gone\n", svms);
-		return;
-	}
+
 retry:
-	mmap_write_lock(mm);
+	if (mm)
+		mmap_write_lock(mm);
 
 	/* Checking for the need to drain retry faults must be inside
 	 * mmap write lock to serialize with munmap notifiers.
 	 */
 	if (unlikely(atomic_read(&svms->drain_pagefaults))) {
-		mmap_write_unlock(mm);
+		if (mm)
+			mmap_write_unlock(mm);
 		svm_range_drain_retry_fault(svms);
 		goto retry;
 	}
@@ -2109,19 +2114,21 @@ static void svm_range_deferred_list_work(struct work_struct *work)
 			pr_debug("child prange 0x%p op %d\n", pchild,
 				 pchild->work_item.op);
 			list_del_init(&pchild->child_list);
-			svm_range_handle_list_op(svms, pchild);
+			svm_range_handle_list_op(svms, pchild, mm);
 		}
 		mutex_unlock(&prange->migrate_mutex);
 
-		svm_range_handle_list_op(svms, prange);
+		svm_range_handle_list_op(svms, prange, mm);
 		mutex_unlock(&svms->lock);
 
 		spin_lock(&svms->deferred_list_lock);
 	}
 	spin_unlock(&svms->deferred_list_lock);
 
-	mmap_write_unlock(mm);
-	mmput(mm);
+	if (mm) {
+		mmap_write_unlock(mm);
+		mmput(mm);
+	}
 	pr_debug("exit svms 0x%p\n", svms);
 }
 
-- 
2.17.1



More information about the amd-gfx mailing list