[v2 07/31] drm/xe: Create userptr if page fault occurs on system_allocator VMA

Tue Apr 9 20:17:18 UTC 2024

From: Matthew Brost <matthew.brost at intel.com>

If a page fault occurs on system_allocator VMA, create a userptr VMA to
replaced fault region and map to GPU.

v1: Pass userptr to the req_offset of sm_map_ops_create function. This
    fix malloc'd memory failure (Oak)

Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Oak Zeng <oak.zeng at intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c |  13 +++
 drivers/gpu/drm/xe/xe_vm.c           | 115 +++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_vm.h           |   2 +
 drivers/gpu/drm/xe/xe_vm_types.h     |   3 +
 4 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index c49b1409e168..c9c2f15d9f5b 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -166,6 +166,19 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
 		goto unlock_vm;
 	}
 
+	/*
+	 * Create userptr VMA if fault occurs in a range reserved for system
+	 * allocator.
+	 */
+	if (xe_vma_is_system_allocator(vma)) {
+		vma = xe_vm_fault_userptr(vm, pf->page_addr);
+		if (IS_ERR(vma)) {
+			xe_vm_kill(vm, true);
+			ret = PTR_ERR(vma);
+			goto unlock_vm;
+		}
+	}
+
 	if (!xe_vma_is_userptr(vma) ||
 	    !xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
 		downgrade_write(&vm->lock);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d31d067d2e8b..1ae7f4160061 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1411,6 +1411,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		return ERR_PTR(-ENOMEM);
 
 	vm->xe = xe;
+	vm->mm = current->mm;
 
 	vm->size = 1ull << xe->info.va_bits;
 
@@ -2151,9 +2152,11 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 	return err;
 }
 
-static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
-				   struct xe_sync_entry *syncs, u32 num_syncs,
-				   struct xe_vma_ops *vops)
+static int vm_bind_ioctl_ops_update_gpuvm_state(struct xe_vm *vm,
+						struct drm_gpuva_ops *ops,
+						struct xe_sync_entry *syncs,
+						u32 num_syncs,
+						struct xe_vma_ops *vops)
 {
 	struct xe_device *xe = vm->xe;
 	struct drm_gpuva_op *__op;
@@ -3069,8 +3072,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto unwind_ops;
 		}
 
-		err = vm_bind_ioctl_ops_parse(vm, ops[i], syncs, num_syncs,
-					      &vops);
+		err = vm_bind_ioctl_ops_update_gpuvm_state(vm, ops[i], syncs,
+							   num_syncs, &vops);
 		if (err)
 			goto unwind_ops;
 
@@ -3438,3 +3441,105 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
 	}
 	kvfree(snap);
 }
+
+/**
+ * xe_vm_fault_userptr() - VM fault userptr
+ * @vm: VM
+ * @fault_addr: fault address
+ *
+ * Create userptr VMA from fault address
+ *
+ * Return: newly created userptr VMA on success, ERR_PTR on failure
+ */
+struct xe_vma *xe_vm_fault_userptr(struct xe_vm *vm, u64 fault_addr)
+{
+	struct vm_area_struct *vas;
+	struct mm_struct *mm = vm->mm;
+	struct xe_vma_ops vops;
+	struct drm_gpuva_ops *ops = NULL;
+	struct drm_gpuva_op *__op;
+	struct xe_vma *vma = NULL;
+	u64 start, range;
+	int err;
+
+	vm_dbg(&vm->xe->drm, "FAULT: addr=0x%016llx", fault_addr);
+
+	if (!mmget_not_zero(mm))
+		return ERR_PTR(-EFAULT);
+
+	kthread_use_mm(mm);
+
+	mmap_read_lock(mm);
+	vas = find_vma_intersection(mm, fault_addr, fault_addr + 4);
+	if (!vas) {
+		err = -ENOENT;
+		goto err_unlock;
+	}
+
+	vm_dbg(&vm->xe->drm, "FOUND VAS: vm_start=0x%016lx, vm_end=0x%016lx",
+	       vas->vm_start, vas->vm_end);
+
+	start = vas->vm_start;
+	range = vas->vm_end - vas->vm_start;
+	mmap_read_unlock(mm);
+
+	ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, start, range, 0, start);
+	if (IS_ERR(ops)) {
+		err = PTR_ERR(ops);
+		goto err_kthread;
+	}
+
+	drm_gpuva_for_each_op(__op, ops)
+		print_op(vm->xe, __op);
+
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+	err = vm_bind_ioctl_ops_update_gpuvm_state(vm, ops, NULL, 0, &vops);
+	if (err)
+		goto err_kthread;
+
+	/*
+	 * No need to execute ops as we just want to update GPUVM state, page
+	 * fault handler will update GPU page tables. Find VMA that needs GPU
+	 * mapping and return to page fault handler.
+	 */
+	xe_vm_lock(vm, false);
+	drm_gpuva_for_each_op(__op, ops) {
+		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+		if (__op->op == DRM_GPUVA_OP_MAP) {
+			xe_assert(vm->xe, !vma);
+			vma = op->map.vma;
+		} else if (__op->op == DRM_GPUVA_OP_UNMAP) {
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL);
+		} else if (__op->op == DRM_GPUVA_OP_REMAP) {
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       NULL);
+		}
+	}
+	xe_vm_unlock(vm);
+
+	kthread_unuse_mm(mm);
+	mmput(mm);
+	drm_gpuva_ops_free(&vm->gpuvm, ops);
+
+	return vma;
+
+err_unlock:
+	mmap_read_unlock(mm);
+err_kthread:
+	kthread_unuse_mm(mm);
+	mmput(mm);
+	if (ops) {
+		drm_gpuva_for_each_op_reverse(__op, ops) {
+			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+			xe_vma_op_unwind(vm, op,
+					 op->flags & XE_VMA_OP_COMMITTED,
+					 op->flags & XE_VMA_OP_PREV_COMMITTED,
+					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
+		}
+		drm_gpuva_ops_free(&vm->gpuvm, ops);
+	}
+
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 6e5470a409fc..97d38daf0e9a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -244,6 +244,8 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
 
 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
 
+struct xe_vma *xe_vm_fault_userptr(struct xe_vm *vm, u64 fault_addr);
+
 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e5d12bf4cf87..cb67a3918990 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -233,6 +233,9 @@ struct xe_vm {
 
 	struct xe_device *xe;
 
+	/** @mm: user MM of VM */
+	struct mm_struct *mm;
+
 	/* exec queue used for (un)binding vma's */
 	struct xe_exec_queue *q;
 
-- 
2.26.3