[PATCH 23/35] drm/amdkfd: invalidate tables on page retry fault

Felix Kuehling Felix.Kuehling at amd.com
Thu Jan 7 03:01:15 UTC 2021


From: Alex Sierra <alex.sierra at amd.com>

GPU page tables are invalidated by unmapping prange directly at
the mmu notifier, when page fault retry is enabled through
amdgpu_noretry global parameter. The restore page table is
performed at the page fault handler.

If xnack is on, we need update GPU mapping after prefetch migration
to avoid GPU vm fault, because range migration unmap the range from
GPUs, there is no restore work scheduled to update GPU mapping.

Signed-off-by: Alex Sierra <alex.sierra at amd.com>
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 37f35f986930..ea27c5ed4ef3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1279,7 +1279,9 @@ svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 	int r = 0;
 	struct interval_tree_node *node;
 	struct svm_range *prange;
+	struct kfd_process *p;
 
+	p = container_of(svms, struct kfd_process, svms);
 	svms_lock(svms);
 
 	pr_debug("invalidate svms 0x%p [0x%lx 0x%lx]\n", svms, start, last);
@@ -1292,8 +1294,13 @@ svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 		next = interval_tree_iter_next(node, start, last);
 
 		invalid = atomic_inc_return(&prange->invalid);
-		evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
-		if (evicted_ranges == 1) {
+
+		if (!p->xnack_enabled) {
+			evicted_ranges =
+				atomic_inc_return(&svms->evicted_ranges);
+			if (evicted_ranges != 1)
+				goto next_node;
+
 			pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
 				 prange->svms, prange->it_node.start,
 				 prange->it_node.last);
@@ -1306,7 +1313,14 @@ svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 			pr_debug("schedule to restore svm %p ranges\n", svms);
 			schedule_delayed_work(&svms->restore_work,
 			   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+		} else {
+			pr_debug("invalidate svms 0x%p [0x%lx 0x%lx] %d\n",
+				 prange->svms, prange->it_node.start,
+				 prange->it_node.last, invalid);
+			if (invalid == 1)
+				svm_range_unmap_from_gpus(prange);
 		}
+next_node:
 		node = next;
 	}
 
@@ -1944,7 +1958,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
 		if (r)
 			goto out_unlock;
 
-		if (migrated) {
+		if (migrated && !p->xnack_enabled) {
 			pr_debug("restore_work will update mappings of GPUs\n");
 			mutex_unlock(&prange->mutex);
 			continue;
-- 
2.29.2



More information about the dri-devel mailing list