[PATCH 2/3] drm/amdkfd: handle VMA remove race

Philip Yang Philip.Yang at amd.com
Wed Nov 17 03:43:23 UTC 2021


VMA may be removed before unmap notifier callback, restore pages take
mmap write lock to lookup VMA to avoid race, and then create unregister
new range and check VMA access permission, then downgrade to take mmap
read lock to recover fault. Refactor code to avoid duplicate VMA lookup.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 65 ++++++++++------------------
 1 file changed, 24 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c1f367934428..3eb0a9491755 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2329,20 +2329,13 @@ svm_range_best_restore_location(struct svm_range *prange,
 }
 
 static int
-svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
-			       unsigned long *start, unsigned long *last,
-			       bool *is_heap_stack)
+svm_range_get_range_boundaries(struct kfd_process *p, struct vm_area_struct *vma,
+			       int64_t addr, unsigned long *start,
+			       unsigned long *last, bool *is_heap_stack)
 {
-	struct vm_area_struct *vma;
 	struct interval_tree_node *node;
 	unsigned long start_limit, end_limit;
 
-	vma = find_vma(p->mm, addr << PAGE_SHIFT);
-	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
-		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
-		return -EFAULT;
-	}
-
 	*is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
 			  vma->vm_end >= vma->vm_mm->start_brk) ||
 			 (vma->vm_start <= vma->vm_mm->start_stack &&
@@ -2437,9 +2430,10 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
 
 static struct
 svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
-						struct kfd_process *p,
-						struct mm_struct *mm,
-						int64_t addr)
+					       struct kfd_process *p,
+					       struct mm_struct *mm,
+					       struct vm_area_struct *vma,
+					       int64_t addr)
 {
 	struct svm_range *prange = NULL;
 	unsigned long start, last;
@@ -2449,7 +2443,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
 	uint64_t bo_l = 0;
 	int r;
 
-	if (svm_range_get_range_boundaries(p, addr, &start, &last,
+	if (svm_range_get_range_boundaries(p, vma, addr, &start, &last,
 					   &is_heap_stack))
 		return NULL;
 
@@ -2552,20 +2546,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
 }
 
 static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
 {
 	unsigned long requested = VM_READ;
-	struct vm_area_struct *vma;
 
 	if (write_fault)
 		requested |= VM_WRITE;
 
-	vma = find_vma(mm, addr << PAGE_SHIFT);
-	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
-		pr_debug("address 0x%llx VMA is removed\n", addr);
-		return true;
-	}
-
 	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
 		vma->vm_flags);
 	return (vma->vm_flags & requested) == requested;
@@ -2582,7 +2569,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	uint64_t timestamp;
 	int32_t best_loc;
 	int32_t gpuidx = MAX_GPU_INSTANCE;
-	bool write_locked = false;
+	struct vm_area_struct *vma = NULL;
 	int r = 0;
 
 	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2606,26 +2593,22 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 
 	/* mm is available because kfd_process_notifier_release drain fault */
 	mm = p->mm;
+	mmap_write_lock(mm);
+
+	vma = find_vma(p->mm, addr << PAGE_SHIFT);
+	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+		pr_debug("VMA not found for address 0x%llx\n", addr);
+		mmap_write_downgrade(mm);
+		r = -EFAULT;
+		goto out_unlock_mm;
+	}
 
-	mmap_read_lock(mm);
-retry_write_locked:
 	mutex_lock(&svms->lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
-		if (!write_locked) {
-			/* Need the write lock to create new range with MMU notifier.
-			 * Also flush pending deferred work to make sure the interval
-			 * tree is up to date before we add a new range
-			 */
-			mutex_unlock(&svms->lock);
-			mmap_read_unlock(mm);
-			mmap_write_lock(mm);
-			write_locked = true;
-			goto retry_write_locked;
-		}
-		prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+		prange = svm_range_create_unregistered_range(adev, p, mm, vma, addr);
 		if (!prange) {
 			pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
 				 svms, addr);
@@ -2634,8 +2617,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			goto out_unlock_svms;
 		}
 	}
-	if (write_locked)
-		mmap_write_downgrade(mm);
+
+	mmap_write_downgrade(mm);
 
 	mutex_lock(&prange->migrate_mutex);
 
@@ -2652,7 +2635,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		goto out_unlock_range;
 	}
 
-	if (!svm_fault_allowed(mm, addr, write_fault)) {
+	if (!svm_fault_allowed(vma, write_fault)) {
 		pr_debug("fault addr 0x%llx no %s permission\n", addr,
 			write_fault ? "write" : "read");
 		r = -EPERM;
@@ -2704,10 +2687,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	mutex_unlock(&prange->migrate_mutex);
 out_unlock_svms:
 	mutex_unlock(&svms->lock);
+out_unlock_mm:
 	mmap_read_unlock(mm);
 
 	svm_range_count_fault(adev, p, gpuidx);
-
 out:
 	kfd_unref_process(p);
 
-- 
2.17.1



More information about the amd-gfx mailing list