[RFC PATCH 2/3] drm/amdgpu: Add range param to amdgpu_vm_update_range

Tue Dec 20 23:27:03 UTC 2022

This allows page table updates to be coordinated with interval notifiers
to avoid writing stale page table entries to the pabe table. Moving the
critical section inside the page table update avoids lock dependencies
with page table allocations under the notifier lock.

Suggested-by: Christian König <christian.koenig at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 27 ++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    | 58 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  6 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c      |  4 +-
 4 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a04f7aef4ca9..556d2e5d90e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -768,6 +768,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
  * @vram_base: base for vram mappings
  * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
+ * @range: optional HMM range for coordination with interval notifier
  * @fence: optional resulting fence
  *
  * Fill in the page table entries between @start and @last.
@@ -780,7 +781,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			   struct dma_resv *resv, uint64_t start, uint64_t last,
 			   uint64_t flags, uint64_t offset, uint64_t vram_base,
 			   struct ttm_resource *res, dma_addr_t *pages_addr,
-			   struct dma_fence **fence)
+			   struct hmm_range *range, struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
 	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -794,7 +795,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
 	if (!tlb_cb) {
 		r = -ENOMEM;
-		goto error_unlock;
+		goto error_dev_exit;
 	}
 
 	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
@@ -811,6 +812,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+#ifdef CONFIG_MMU_NOTIFIER
+	params.range = range;
+#endif
 	params.immediate = immediate;
 	params.pages_addr = pages_addr;
 	params.unlocked = unlocked;
@@ -823,12 +827,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	else
 		sync_mode = AMDGPU_SYNC_EXPLICIT;
 
-	amdgpu_vm_eviction_lock(vm);
-	if (vm->evicting) {
-		r = -EBUSY;
-		goto error_free;
-	}
-
 	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
 		struct dma_fence *tmp = dma_fence_get_stub();
 
@@ -893,7 +891,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		start = tmp;
 	}
 
+	r = amdgpu_vm_pts_lock(&params);
+	if (r)
+		goto error_free;
 	r = vm->update_funcs->commit(&params, fence);
+	amdgpu_vm_pts_unlock(&params);
 
 	if (flush_tlb || params.table_freed) {
 		tlb_cb->vm = vm;
@@ -911,8 +913,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 error_free:
 	kfree(tlb_cb);
 
-error_unlock:
-	amdgpu_vm_eviction_unlock(vm);
+error_dev_exit:
 	drm_dev_exit(idx);
 	return r;
 }
@@ -1058,7 +1059,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 					   resv, mapping->start, mapping->last,
 					   update_flags, mapping->offset,
 					   vram_base, mem, pages_addr,
-					   last_update);
+					   NULL, last_update);
 		if (r)
 			return r;
 	}
@@ -1253,7 +1254,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
 					   mapping->start, mapping->last,
 					   init_pte_value, 0, 0, NULL, NULL,
-					   &f);
+					   NULL, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
@@ -2512,7 +2513,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 	}
 
 	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
-				   addr, flags, value, 0, NULL, NULL, NULL);
+				   addr, flags, value, 0, NULL, NULL, NULL, NULL);
 	if (r)
 		goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 07af80df812b..8fad51d66bce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -31,6 +31,8 @@
 #include <drm/drm_file.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include <linux/mmu_notifier.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -196,6 +198,13 @@ struct amdgpu_vm_update_params {
 	 */
 	struct amdgpu_vm *vm;
 
+#ifdef CONFIG_MMU_NOTIFIER
+	/**
+	 * @range: optional HMM range for coordination with interval notifier
+	 */
+	struct hmm_range *range;
+#endif
+
 	/**
 	 * @immediate: if changes should be made immediately
 	 */
@@ -411,7 +420,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			   struct dma_resv *resv, uint64_t start, uint64_t last,
 			   uint64_t flags, uint64_t offset, uint64_t vram_base,
 			   struct ttm_resource *res, dma_addr_t *pages_addr,
-			   struct dma_fence **fence);
+			   struct hmm_range *range, struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
@@ -535,4 +544,51 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
 	mutex_unlock(&vm->eviction_lock);
 }
 
+/*
+ * Make page tables safe to access without a reservation and ensure that the
+ * ptes being written are still valid. This can fail if page tables are being
+ * evicted (-EBUSY) or an HMM range is being invalidated (-EAGAIN).
+ */
+static inline int amdgpu_vm_pts_lock(struct amdgpu_vm_update_params *params)
+{
+	int r = 0;
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (params->range) {
+		mutex_lock(params->vm->notifier_lock);
+		if (mmu_interval_read_retry(params->range->notifier,
+					    params->range->notifier_seq)) {
+			r = -EAGAIN;
+			goto error_unlock_notifier;
+		}
+	}
+#endif
+	amdgpu_vm_eviction_lock(params->vm);
+	if (params->vm->evicting) {
+		r = -EBUSY;
+		goto error_unlock;
+	}
+
+	return 0;
+
+error_unlock:
+	amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+error_unlock_notifier:
+	if (params->range)
+		mutex_unlock(params->vm->notifier_lock);
+#endif
+
+	return r;
+}
+
+static inline void amdgpu_vm_pts_unlock(struct amdgpu_vm_update_params *params)
+{
+	amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+	if (params->range)
+		mutex_unlock(params->vm->notifier_lock);
+#endif
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..2891284eba1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -597,9 +597,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
 	if (entry->bo)
 		return 0;
 
-	amdgpu_vm_eviction_unlock(vm);
 	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
-	amdgpu_vm_eviction_lock(vm);
 	if (r)
 		return r;
 
@@ -960,6 +958,9 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 		entry_end += cursor.pfn & ~(entry_end - 1);
 		entry_end = min(entry_end, end);
 
+		r = amdgpu_vm_pts_lock(params);
+		if (r)
+			return r;
 		do {
 			struct amdgpu_vm *vm = params->vm;
 			uint64_t upd_end = min(entry_end, frag_end);
@@ -992,6 +993,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 					break;
 			}
 		} while (frag_start < entry_end);
+		amdgpu_vm_pts_unlock(params);
 
 		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
 			/* Free all child entries.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2dc3b04064bd..cc46878901c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1217,7 +1217,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
 				      last, init_pte_value, 0, 0, NULL, NULL,
-				      fence);
+				      NULL, fence);
 }
 
 static int
@@ -1323,7 +1323,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 					   pte_flags,
 					   (last_start - prange->start) << PAGE_SHIFT,
 					   bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
-					   NULL, dma_addr, &vm->last_update);
+					   NULL, dma_addr, NULL, &vm->last_update);
 
 		for (j = last_start - prange->start; j <= i; j++)
 			dma_addr[j] |= last_domain;
-- 
2.32.0