[CI 29/42] drm/xe/svm: Create userptr if page fault occurs on system_allocator VMA

Thu Jun 13 15:31:15 UTC 2024

From: Matthew Brost <matthew.brost at intel.com>

If a page fault occurs on system_allocator VMA, create a userptr VMA to
replaced fault region and map to GPU.

v1: Pass userptr to the req_offset of sm_map_ops_create function. This
fix malloc'd memory failure (Oak)
drop xe_vma_is_userptr(vma) condition. XE_VMA_FAULT_USERPTR
is enough to determine a fault userptr (Matt)

v2: don't pin fault userptr during fault userptr creation. Fault userptr
is created during gpu page fault. After creation, we decide whether we
need to migrate it to GPU vram. So pin fault userptr after migration is
enough. A pin before migration is redundant. (Oak)

v3: remove mm field from xe_vm as we add mm in drm_gpuvm now. rebase (Oak)

Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Cc: Brian Welty <brian.welty at intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Oak Zeng <oak.zeng at intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  12 +++
 drivers/gpu/drm/xe/xe_vm.c           | 118 +++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_vm.h           |   7 ++
 drivers/gpu/drm/xe/xe_vm_types.h     |   1 +
 4 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 847d3fbb38fe..018fa19ff80e 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -132,6 +132,18 @@ static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf,
 	trace_xe_vma_pagefault(vma);
 	atomic = access_is_atomic(pf->access_type);
 
+	/*
+	 * Create userptr VMA if fault occurs in a range reserved for system
+	 * allocator.
+	 */
+	if (xe_vma_is_system_allocator(vma)) {
+		vma = xe_vm_fault_userptr(vm, pf->page_addr);
+		if (IS_ERR(vma)) {
+			xe_vm_kill(vm, true);
+			return PTR_ERR(vma);
+		}
+	}
+
 retry_userptr:
 	if (xe_vma_is_userptr(vma) &&
 	    xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 39765e4b6b78..3e9e8ffa0a9a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1482,7 +1482,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	}
 
 	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
-		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops, false);
+	           vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops,
+	           flags & XE_VM_FLAG_PARTICIPATE_SVM);
 
 	drm_gem_object_put(vm_resv_obj);
 
@@ -2093,7 +2094,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
 	if (bo)
 		drm_exec_fini(&exec);
 
-	if (xe_vma_is_userptr(vma)) {
+	if (xe_vma_is_userptr(vma) && !xe_vma_is_fault_userptr(vma)) {
 		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
 		if (err) {
 			prep_vma_destroy(vm, vma, false);
@@ -2207,8 +2208,11 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 	return err;
 }
 
-static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
-				   struct xe_vma_ops *vops)
+static int vm_bind_ioctl_ops_update_gpuvm_state(struct xe_vm *vm,
+						struct drm_gpuva_ops *ops,
+						struct xe_sync_entry *syncs,
+						u32 num_syncs,
+						struct xe_vma_ops *vops)
 {
 	struct xe_device *xe = vm->xe;
 	struct drm_gpuva_op *__op;
@@ -3145,7 +3149,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto unwind_ops;
 		}
 
-		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
+		err = vm_bind_ioctl_ops_update_gpuvm_state(vm, ops[i], syncs,
+							   num_syncs, &vops);
 		if (err)
 			goto unwind_ops;
 	}
@@ -3464,3 +3469,106 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
 	}
 	kvfree(snap);
 }
+
+/**
+ * xe_vm_fault_userptr() - VM fault userptr
+ * @vm: VM
+ * @fault_addr: fault address
+ *
+ * Create userptr VMA from fault address
+ *
+ * Return: newly created userptr VMA on success, ERR_PTR on failure
+ */
+struct xe_vma *xe_vm_fault_userptr(struct xe_vm *vm, u64 fault_addr)
+{
+	struct vm_area_struct *vas;
+	struct mm_struct *mm = vm->gpuvm.mm;
+	struct xe_vma_ops vops;
+	struct drm_gpuva_ops *ops = NULL;
+	struct drm_gpuva_op *__op;
+	struct xe_vma *vma = NULL;
+	u64 start, range;
+	int err;
+
+	vm_dbg(&vm->xe->drm, "FAULT: addr=0x%016llx", fault_addr);
+
+	if (!mmget_not_zero(mm))
+		return ERR_PTR(-EFAULT);
+
+	kthread_use_mm(mm);
+
+	mmap_read_lock(mm);
+	vas = find_vma_intersection(mm, fault_addr, fault_addr + 4);
+	if (!vas) {
+		err = -ENOENT;
+		goto err_unlock;
+	}
+
+	vm_dbg(&vm->xe->drm, "FOUND VAS: vm_start=0x%016lx, vm_end=0x%016lx",
+	       vas->vm_start, vas->vm_end);
+
+	start = vas->vm_start;
+	range = vas->vm_end - vas->vm_start;
+	mmap_read_unlock(mm);
+
+	ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, start, range, 0, start);
+	if (IS_ERR(ops)) {
+		err = PTR_ERR(ops);
+		goto err_kthread;
+	}
+
+	drm_gpuva_for_each_op(__op, ops)
+		print_op(vm->xe, __op);
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+	err = vm_bind_ioctl_ops_update_gpuvm_state(vm, ops, NULL, 0, &vops);
+	if (err)
+		goto err_kthread;
+
+	/*
+	 * No need to execute ops as we just want to update GPUVM state, page
+	 * fault handler will update GPU page tables. Find VMA that needs GPU
+	 * mapping and return to page fault handler.
+	 */
+	xe_vm_lock(vm, false);
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			xe_assert(vm->xe, !vma);
+			vma = op->map.vma;
+			vma->gpuva.flags |= XE_VMA_FAULT_USERPTR;
+		} else if (__op->op == DRM_GPUVA_OP_UNMAP) {
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL);
+		} else if (__op->op == DRM_GPUVA_OP_REMAP) {
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       NULL);
+		}
+	}
+	xe_vm_unlock(vm);
+
+	kthread_unuse_mm(mm);
+	mmput(mm);
+	drm_gpuva_ops_free(&vm->gpuvm, ops);
+
+	return vma;
+
+err_unlock:
+	mmap_read_unlock(mm);
+err_kthread:
+	kthread_unuse_mm(mm);
+	mmput(mm);
+	if (ops) {
+		drm_gpuva_for_each_op_reverse(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+		drm_gpuva_ops_free(&vm->gpuvm, ops);
+	}
+
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 680c0c49b2f4..a31409b87b8a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -172,6 +172,11 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
 		!xe_vma_is_system_allocator(vma);
 }
 
+static inline bool xe_vma_is_fault_userptr(struct xe_vma *vma)
+{
+	return vma->gpuva.flags & XE_VMA_FAULT_USERPTR;
+}
+
 /**
  * to_userptr_vma() - Return a pointer to an embedding userptr vma
  * @vma: Pointer to the embedded struct xe_vma
@@ -252,6 +257,8 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
 
 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
 
+struct xe_vma *xe_vm_fault_userptr(struct xe_vm *vm, u64 fault_addr);
+
 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 165c2ca258de..c1bffa60cefc 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -33,6 +33,7 @@ struct xe_vm_pgtable_update_op;
 #define XE_VMA_PTE_COMPACT	(DRM_GPUVA_USERBITS << 7)
 #define XE_VMA_DUMPABLE		(DRM_GPUVA_USERBITS << 8)
 #define XE_VMA_SYSTEM_ALLOCATOR	(DRM_GPUVA_USERBITS << 9)
+#define XE_VMA_FAULT_USERPTR	(DRM_GPUVA_USERBITS << 10)
 
 /** struct xe_userptr - User pointer */
 struct xe_userptr {
-- 
2.26.3