[PATCH v4 4/7] amd/amdkfd: Unmap range from GPU based on granularity

Philip Yang Philip.Yang at amd.com
Mon Jan 15 22:49:47 UTC 2024


When MMU notifier invalidate the range, align the start and last address
to range granularity to unmap from GPU and update bitmap_map flag.
Skip unmap from GPU if range is already unmapped based on bitmap_map
flag. This  avoids unmap 1 page from GPU and flush TLB, also solve
the rocgdb CWSR migration related issue.

Unmap the range from cpu will remove the range and split the range, this
cannot align the start and last address to range granularity. Change
to split the range and bitmap_map flag first, then unmap the range
from GPU. If unmapping from GPU first, the bitmap_map flag is updated,
split range may get incorrect bitmap_map for the remaining ranges.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 42 +++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a003406db067..7a30c3e58234 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2114,6 +2114,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 	} else {
 		unsigned long s, l;
 		uint32_t trigger;
+		u64 size = 1UL << prange->granularity;
+
+		if (!svm_range_partial_mapped(prange, start, last)) {
+			pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n",
+				 prange->svms, start, last);
+			return 0;
+		}
 
 		if (event == MMU_NOTIFY_MIGRATE)
 			trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
@@ -2122,16 +2129,17 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 
 		pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
 			 prange->svms, start, last);
+
 		list_for_each_entry(pchild, &prange->child_list, child_list) {
 			mutex_lock_nested(&pchild->lock, 1);
-			s = max(start, pchild->start);
-			l = min(last, pchild->last);
+			s = svm_range_align_start(start, pchild->start, size);
+			l = svm_range_align_last(last, pchild->last, size);
 			if (l >= s)
 				svm_range_unmap_from_gpus(pchild, s, l, trigger);
 			mutex_unlock(&pchild->lock);
 		}
-		s = max(start, prange->start);
-		l = min(last, prange->last);
+		s = svm_range_align_start(start, prange->start, size);
+		l = svm_range_align_last(last, prange->last, size);
 		if (l >= s)
 			svm_range_unmap_from_gpus(prange, s, l, trigger);
 	}
@@ -2645,24 +2653,32 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
 
 	list_for_each_entry(pchild, &prange->child_list, child_list) {
 		mutex_lock_nested(&pchild->lock, 1);
-		s = max(start, pchild->start);
-		l = min(last, pchild->last);
-		if (l >= s)
-			svm_range_unmap_from_gpus(pchild, s, l, trigger);
 		svm_range_unmap_split(mm, prange, pchild, start, last);
 		mutex_unlock(&pchild->lock);
 	}
-	s = max(start, prange->start);
-	l = min(last, prange->last);
-	if (l >= s)
-		svm_range_unmap_from_gpus(prange, s, l, trigger);
 	svm_range_unmap_split(mm, prange, prange, start, last);
-
 	if (unmap_parent)
 		svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
 	else
 		svm_range_add_list_work(svms, prange, mm,
 					SVM_OP_UPDATE_RANGE_NOTIFIER);
+
+	list_for_each_entry(pchild, &prange->child_list, child_list) {
+		if (pchild->work_item.op != SVM_OP_UNMAP_RANGE)
+			continue;
+
+		s = max(start, pchild->start);
+		l = min(last, pchild->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(pchild, s, l, trigger);
+	}
+	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+		s = max(start, prange->start);
+		l = min(last, prange->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(prange, s, l, trigger);
+	}
+
 	schedule_deferred_list_work(svms);
 
 	kfd_unref_process(p);
-- 
2.35.1



More information about the amd-gfx mailing list