[PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone

Ji, Ruili Ruili.Ji at amd.com
Thu Jan 20 09:47:25 UTC 2022


[AMD Official Use Only]

sudo ./kfdtest --gtest_filter=KFDSVM*
sudo ./kfdtest
Test results are pass.
Tested-by: Ruili Ji <ruili.ji at amd.com>

-----Original Message-----
From: Yang, Philip <Philip.Yang at amd.com>
Sent: 2022年1月20日 0:23
To: amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Ji, Ruili <Ruili.Ji at amd.com>; Yang, Philip <Philip.Yang at amd.com>
Subject: [PATCH 1/2] drm/amdkfd: svm deferred_list work continue cleanup after mm gone

After mm is removed from task->mm, deferred_list work should continue to handle deferred_range_list which maybe split to child range to avoid child range leak, and remove ranges mmu interval notifier to avoid mm mm_count leak, but skip updating notifier and inserting new notifier.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Reported-by: Ruili Ji <ruili.ji at amd.com>
Tested-by: Ruili Ji <ruili.ji at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2805ba74c80..9ec195e1ef23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1985,10 +1985,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,  }

 static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+                        struct mm_struct *mm)
 {
-       struct mm_struct *mm = prange->work_item.mm;
-
        switch (prange->work_item.op) {
        case SVM_OP_NULL:
                pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2004,25 +2003,29 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
        case SVM_OP_UPDATE_RANGE_NOTIFIER:
                pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
                         svms, prange, prange->start, prange->last);
-               svm_range_update_notifier_and_interval_tree(mm, prange);
+               if (mm)
+                       svm_range_update_notifier_and_interval_tree(mm, prange);
                break;
        case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
                pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
                         svms, prange, prange->start, prange->last);
-               svm_range_update_notifier_and_interval_tree(mm, prange);
+               if (mm)
+                       svm_range_update_notifier_and_interval_tree(mm, prange);
                /* TODO: implement deferred validation and mapping */
                break;
        case SVM_OP_ADD_RANGE:
                pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
                         prange->start, prange->last);
                svm_range_add_to_svms(prange);
-               svm_range_add_notifier_locked(mm, prange);
+               if (mm)
+                       svm_range_add_notifier_locked(mm, prange);
                break;
        case SVM_OP_ADD_RANGE_AND_MAP:
                pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
                         prange, prange->start, prange->last);
                svm_range_add_to_svms(prange);
-               svm_range_add_notifier_locked(mm, prange);
+               if (mm)
+                       svm_range_add_notifier_locked(mm, prange);
                /* TODO: implement deferred validation and mapping */
                break;
        default:
@@ -2071,20 +2074,22 @@ static void svm_range_deferred_list_work(struct work_struct *work)
        pr_debug("enter svms 0x%p\n", svms);

        p = container_of(svms, struct kfd_process, svms);
-       /* Avoid mm is gone when inserting mmu notifier */
+
+       /* If mm is gone, continue cleanup the deferred_range_list */
        mm = get_task_mm(p->lead_thread);
-       if (!mm) {
+       if (!mm)
                pr_debug("svms 0x%p process mm gone\n", svms);
-               return;
-       }
+
 retry:
-       mmap_write_lock(mm);
+       if (mm)
+               mmap_write_lock(mm);

        /* Checking for the need to drain retry faults must be inside
         * mmap write lock to serialize with munmap notifiers.
         */
        if (unlikely(atomic_read(&svms->drain_pagefaults))) {
-               mmap_write_unlock(mm);
+               if (mm)
+                       mmap_write_unlock(mm);
                svm_range_drain_retry_fault(svms);
                goto retry;
        }
@@ -2109,19 +2114,21 @@ static void svm_range_deferred_list_work(struct work_struct *work)
                        pr_debug("child prange 0x%p op %d\n", pchild,
                                 pchild->work_item.op);
                        list_del_init(&pchild->child_list);
-                       svm_range_handle_list_op(svms, pchild);
+                       svm_range_handle_list_op(svms, pchild, mm);
                }
                mutex_unlock(&prange->migrate_mutex);

-               svm_range_handle_list_op(svms, prange);
+               svm_range_handle_list_op(svms, prange, mm);
                mutex_unlock(&svms->lock);

                spin_lock(&svms->deferred_list_lock);
        }
        spin_unlock(&svms->deferred_list_lock);

-       mmap_write_unlock(mm);
-       mmput(mm);
+       if (mm) {
+               mmap_write_unlock(mm);
+               mmput(mm);
+       }
        pr_debug("exit svms 0x%p\n", svms);
 }

--
2.17.1



More information about the amd-gfx mailing list