[PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone
Ji, Ruili
Ruili.Ji at amd.com
Thu Jan 20 09:47:25 UTC 2022
[AMD Official Use Only]
sudo ./kfdtest --gtest_filter=KFDSVM*
sudo ./kfdtest
Test results are pass.
Tested-by: Ruili Ji <ruili.ji at amd.com>
-----Original Message-----
From: Yang, Philip <Philip.Yang at amd.com>
Sent: 2022年1月20日 0:23
To: amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Ji, Ruili <Ruili.Ji at amd.com>; Yang, Philip <Philip.Yang at amd.com>
Subject: [PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone
After mm is removed from task->mm, deferred_list work should continue to handle deferred_range_list which maybe split to child range to avoid child range leak, and remove ranges mmu interval notifier to avoid mm mm_count leak, but skip updating notifier and inserting new notifier.
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Reported-by: Ruili Ji <ruili.ji at amd.com>
Tested-by: Ruili Ji <ruili.ji at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++------------
1 file changed, 24 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2805ba74c80..9ec195e1ef23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1985,10 +1985,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, }
static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm)
{
- struct mm_struct *mm = prange->work_item.mm;
-
switch (prange->work_item.op) {
case SVM_OP_NULL:
pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2004,25 +2003,29 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
case SVM_OP_UPDATE_RANGE_NOTIFIER:
pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
svms, prange, prange->start, prange->last);
- svm_range_update_notifier_and_interval_tree(mm, prange);
+ if (mm)
+ svm_range_update_notifier_and_interval_tree(mm, prange);
break;
case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
svms, prange, prange->start, prange->last);
- svm_range_update_notifier_and_interval_tree(mm, prange);
+ if (mm)
+ svm_range_update_notifier_and_interval_tree(mm, prange);
/* TODO: implement deferred validation and mapping */
break;
case SVM_OP_ADD_RANGE:
pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
prange->start, prange->last);
svm_range_add_to_svms(prange);
- svm_range_add_notifier_locked(mm, prange);
+ if (mm)
+ svm_range_add_notifier_locked(mm, prange);
break;
case SVM_OP_ADD_RANGE_AND_MAP:
pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last);
svm_range_add_to_svms(prange);
- svm_range_add_notifier_locked(mm, prange);
+ if (mm)
+ svm_range_add_notifier_locked(mm, prange);
/* TODO: implement deferred validation and mapping */
break;
default:
@@ -2071,20 +2074,22 @@ static void svm_range_deferred_list_work(struct work_struct *work)
pr_debug("enter svms 0x%p\n", svms);
p = container_of(svms, struct kfd_process, svms);
- /* Avoid mm is gone when inserting mmu notifier */
+
+ /* If mm is gone, continue cleanup the deferred_range_list */
mm = get_task_mm(p->lead_thread);
- if (!mm) {
+ if (!mm)
pr_debug("svms 0x%p process mm gone\n", svms);
- return;
- }
+
retry:
- mmap_write_lock(mm);
+ if (mm)
+ mmap_write_lock(mm);
/* Checking for the need to drain retry faults must be inside
* mmap write lock to serialize with munmap notifiers.
*/
if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
+ if (mm)
+ mmap_write_unlock(mm);
svm_range_drain_retry_fault(svms);
goto retry;
}
@@ -2109,19 +2114,21 @@ static void svm_range_deferred_list_work(struct work_struct *work)
pr_debug("child prange 0x%p op %d\n", pchild,
pchild->work_item.op);
list_del_init(&pchild->child_list);
- svm_range_handle_list_op(svms, pchild);
+ svm_range_handle_list_op(svms, pchild, mm);
}
mutex_unlock(&prange->migrate_mutex);
- svm_range_handle_list_op(svms, prange);
+ svm_range_handle_list_op(svms, prange, mm);
mutex_unlock(&svms->lock);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
- mmap_write_unlock(mm);
- mmput(mm);
+ if (mm) {
+ mmap_write_unlock(mm);
+ mmput(mm);
+ }
pr_debug("exit svms 0x%p\n", svms);
}
--
2.17.1
More information about the amd-gfx
mailing list