[Intel-xe] [PATCH v5 4/8] drm/xe: Port Xe to GPUVA

Thomas Hellström thomas.hellstrom at linux.intel.com
Fri Apr 21 14:54:30 UTC 2023


On 4/4/23 03:42, Matthew Brost wrote:
> Rather than open coding VM binds and VMA tracking, use the GPUVA
> library. GPUVA provides a common infrastructure for VM binds to use mmap
> / munmap semantics and support for VK sparse bindings.
>
> The concepts are:
>
> 1) xe_vm inherits from drm_gpuva_manager
> 2) xe_vma inherits from drm_gpuva
> 3) xe_vma_op inherits from drm_gpuva_op
> 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the
> GPUVA code to generate an VMA operations list which is parsed, commited,
> and executed.
>
> v2 (CI): Add break after default in case statement.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_bo.c                  |   10 +-
>   drivers/gpu/drm/xe/xe_device.c              |    2 +-
>   drivers/gpu/drm/xe/xe_exec.c                |    2 +-
>   drivers/gpu/drm/xe/xe_gt_pagefault.c        |   23 +-
>   drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |   14 +-
>   drivers/gpu/drm/xe/xe_guc_ct.c              |    6 +-
>   drivers/gpu/drm/xe/xe_migrate.c             |    8 +-
>   drivers/gpu/drm/xe/xe_pt.c                  |  106 +-
>   drivers/gpu/drm/xe/xe_trace.h               |   10 +-
>   drivers/gpu/drm/xe/xe_vm.c                  | 1799 +++++++++----------
>   drivers/gpu/drm/xe/xe_vm.h                  |   66 +-
>   drivers/gpu/drm/xe/xe_vm_madvise.c          |   87 +-
>   drivers/gpu/drm/xe/xe_vm_types.h            |  165 +-
>   13 files changed, 1126 insertions(+), 1172 deletions(-)
>
...
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index bdf82d34eb66..fddbe8d5f984 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -25,10 +25,8 @@
>   #include "xe_preempt_fence.h"
>   #include "xe_pt.h"
>   #include "xe_res_cursor.h"
> -#include "xe_sync.h"
>   #include "xe_trace.h"
> -
> -#define TEST_VM_ASYNC_OPS_ERROR
> +#include "xe_sync.h"
>   
>   /**
>    * xe_vma_userptr_check_repin() - Advisory check for repin needed
> @@ -51,20 +49,19 @@ int xe_vma_userptr_check_repin(struct xe_vma *vma)
>   
>   int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>   {
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	struct xe_device *xe = vm->xe;
> -	const unsigned long num_pages =
> -		(vma->end - vma->start + 1) >> PAGE_SHIFT;
> +	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
>   	struct page **pages;
>   	bool in_kthread = !current->mm;
>   	unsigned long notifier_seq;
>   	int pinned, ret, i;
> -	bool read_only = vma->pte_flags & PTE_READ_ONLY;
> +	bool read_only = xe_vma_read_only(vma);
>   
>   	lockdep_assert_held(&vm->lock);
>   	XE_BUG_ON(!xe_vma_is_userptr(vma));
>   retry:
> -	if (vma->destroyed)
> +	if (vma->gpuva.flags & XE_VMA_DESTROYED)
>   		return 0;
>   
>   	notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
> @@ -94,7 +91,8 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>   	}
>   
>   	while (pinned < num_pages) {
> -		ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
> +		ret = get_user_pages_fast(xe_vma_userptr(vma) +
> +					  pinned * PAGE_SIZE,
>   					  num_pages - pinned,
>   					  read_only ? 0 : FOLL_WRITE,
>   					  &pages[pinned]);
> @@ -295,7 +293,7 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
>   	struct xe_vma *vma;
>   
>   	list_for_each_entry(vma, &vm->extobj.list, extobj.link)
> -		dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
> +		dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage);
>   }
>   
>   static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
> @@ -444,7 +442,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>   	INIT_LIST_HEAD(objs);
>   	list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
>   		tv_bo->num_shared = num_shared;
> -		tv_bo->bo = &vma->bo->ttm;
> +		tv_bo->bo = &xe_vma_bo(vma)->ttm;
>   
>   		list_add_tail(&tv_bo->head, objs);
>   		tv_bo++;
> @@ -459,10 +457,10 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
>   	spin_lock(&vm->notifier.list_lock);
>   	list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
>   				 notifier.rebind_link) {
> -		xe_bo_assert_held(vma->bo);
> +		xe_bo_assert_held(xe_vma_bo(vma));
>   
>   		list_del_init(&vma->notifier.rebind_link);
> -		if (vma->gt_present && !vma->destroyed)
> +		if (vma->gt_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
>   			list_move_tail(&vma->rebind_link, &vm->rebind_list);
>   	}
>   	spin_unlock(&vm->notifier.list_lock);
> @@ -583,10 +581,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
>   		goto out_unlock;
>   
>   	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> -		if (xe_vma_is_userptr(vma) || vma->destroyed)
> +		if (xe_vma_is_userptr(vma) ||
> +		    vma->gpuva.flags & XE_VMA_DESTROYED)
>   			continue;
>   
> -		err = xe_bo_validate(vma->bo, vm, false);
> +		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
>   		if (err)
>   			goto out_unlock;
>   	}
> @@ -645,17 +644,12 @@ static void preempt_rebind_work_func(struct work_struct *w)
>   	trace_xe_vm_rebind_worker_exit(vm);
>   }
>   
> -struct async_op_fence;
> -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> -			struct xe_engine *e, struct xe_sync_entry *syncs,
> -			u32 num_syncs, struct async_op_fence *afence);
> -
>   static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>   				   const struct mmu_notifier_range *range,
>   				   unsigned long cur_seq)
>   {
>   	struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	struct dma_resv_iter cursor;
>   	struct dma_fence *fence;
>   	long err;
> @@ -679,7 +673,8 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>   	 * Tell exec and rebind worker they need to repin and rebind this
>   	 * userptr.
>   	 */
> -	if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
> +	if (!xe_vm_in_fault_mode(vm) &&
> +	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->gt_present) {
introduce an xe_vma_destroyed()?
>   		spin_lock(&vm->userptr.invalidated_lock);
>   		list_move_tail(&vma->userptr.invalidate_link,
>   			       &vm->userptr.invalidated);
> @@ -784,7 +779,8 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
>   
>   static struct dma_fence *
>   xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
> -	       struct xe_sync_entry *syncs, u32 num_syncs);
> +	       struct xe_sync_entry *syncs, u32 num_syncs,
> +	       bool first_op, bool last_op);
>   
>   struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>   {
> @@ -805,7 +801,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>   			trace_xe_vma_rebind_worker(vma);
>   		else
>   			trace_xe_vma_rebind_exec(vma);
> -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
> +		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
>   		if (IS_ERR(fence))
>   			return fence;
>   	}
> @@ -833,6 +829,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   		return vma;
>   	}
>   
> +	/* FIXME: Way to many lists, should be able to reduce this */
Unrelated.
>   	INIT_LIST_HEAD(&vma->rebind_link);
>   	INIT_LIST_HEAD(&vma->unbind_link);
>   	INIT_LIST_HEAD(&vma->userptr_link);
> @@ -840,11 +837,12 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   	INIT_LIST_HEAD(&vma->notifier.rebind_link);
>   	INIT_LIST_HEAD(&vma->extobj.link);
>   
> -	vma->vm = vm;
> -	vma->start = start;
> -	vma->end = end;
> +	INIT_LIST_HEAD(&vma->gpuva.head);
> +	vma->gpuva.mgr = &vm->mgr;
> +	vma->gpuva.va.addr = start;
> +	vma->gpuva.va.range = end - start + 1;

Hm. This comment really belongs to one of the gpuva patches, but while 
range in statistics indeed means the difference between the highest and 
lowest values, I associate range in computer science with the set of 
first and last value or first value and size. Why isn't "size" used 
here, and similarly for some arguments below.

>   	if (read_only)
> -		vma->pte_flags = PTE_READ_ONLY;
> +		vma->gpuva.flags |= XE_VMA_READ_ONLY;
>   
>   	if (gt_mask) {
>   		vma->gt_mask = gt_mask;
> @@ -855,22 +853,24 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   	}
>   
>   	if (vm->xe->info.platform == XE_PVC)
> -		vma->use_atomic_access_pte_bit = true;
> +		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>   
>   	if (bo) {
>   		xe_bo_assert_held(bo);
> -		vma->bo_offset = bo_offset_or_userptr;
> -		vma->bo = xe_bo_get(bo);
> -		list_add_tail(&vma->bo_link, &bo->vmas);
> +
> +		drm_gem_object_get(&bo->ttm.base);
> +		vma->gpuva.gem.obj = &bo->ttm.base;
> +		vma->gpuva.gem.offset = bo_offset_or_userptr;
> +		drm_gpuva_link(&vma->gpuva);
>   	} else /* userptr */ {
>   		u64 size = end - start + 1;
>   		int err;
>   
> -		vma->userptr.ptr = bo_offset_or_userptr;
> +		vma->gpuva.gem.offset = bo_offset_or_userptr;
>   
>   		err = mmu_interval_notifier_insert(&vma->userptr.notifier,
>   						   current->mm,
> -						   vma->userptr.ptr, size,
> +						   xe_vma_userptr(vma), size,
>   						   &vma_userptr_notifier_ops);
>   		if (err) {
>   			kfree(vma);
> @@ -888,16 +888,16 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   static void vm_remove_extobj(struct xe_vma *vma)
>   {
>   	if (!list_empty(&vma->extobj.link)) {
> -		vma->vm->extobj.entries--;
> +		xe_vma_vm(vma)->extobj.entries--;
>   		list_del_init(&vma->extobj.link);
>   	}
>   }
>   
>   static void xe_vma_destroy_late(struct xe_vma *vma)
>   {
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	struct xe_device *xe = vm->xe;
> -	bool read_only = vma->pte_flags & PTE_READ_ONLY;
> +	bool read_only = xe_vma_read_only(vma);
>   
>   	if (xe_vma_is_userptr(vma)) {
>   		if (vma->userptr.sg) {
> @@ -917,7 +917,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
>   		mmu_interval_notifier_remove(&vma->userptr.notifier);
>   		xe_vm_put(vm);
>   	} else {
> -		xe_bo_put(vma->bo);
> +		xe_bo_put(xe_vma_bo(vma));
>   	}
>   
>   	kfree(vma);
> @@ -942,21 +942,22 @@ static void vma_destroy_cb(struct dma_fence *fence,
>   
>   static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
>   {
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   
>   	lockdep_assert_held_write(&vm->lock);
>   	XE_BUG_ON(!list_empty(&vma->unbind_link));
>   
>   	if (xe_vma_is_userptr(vma)) {
> -		XE_WARN_ON(!vma->destroyed);
> +		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
> +
>   		spin_lock(&vm->userptr.invalidated_lock);
>   		list_del_init(&vma->userptr.invalidate_link);
>   		spin_unlock(&vm->userptr.invalidated_lock);
>   		list_del(&vma->userptr_link);
>   	} else {
> -		xe_bo_assert_held(vma->bo);
> -		list_del(&vma->bo_link);
> -		if (!vma->bo->vm)
> +		xe_bo_assert_held(xe_vma_bo(vma));
> +		drm_gpuva_unlink(&vma->gpuva);
> +		if (!xe_vma_bo(vma)->vm)
>   			vm_remove_extobj(vma);
>   	}
>   
> @@ -981,13 +982,13 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
>   {
>   	struct ttm_validate_buffer tv[2];
>   	struct ww_acquire_ctx ww;
> -	struct xe_bo *bo = vma->bo;
> +	struct xe_bo *bo = xe_vma_bo(vma);
>   	LIST_HEAD(objs);
>   	LIST_HEAD(dups);
>   	int err;
>   
>   	memset(tv, 0, sizeof(tv));
> -	tv[0].bo = xe_vm_ttm_bo(vma->vm);
> +	tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma));
>   	list_add(&tv[0].head, &objs);
>   
>   	if (bo) {
> @@ -1004,77 +1005,61 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
>   		xe_bo_put(bo);
>   }
>   
> -static struct xe_vma *to_xe_vma(const struct rb_node *node)
> -{
> -	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
> -	return (struct xe_vma *)node;
> -}
> -
> -static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
> -{
> -	if (a->end < b->start) {
> -		return -1;
> -	} else if (b->end < a->start) {
> -		return 1;
> -	} else {
> -		return 0;
> -	}
> -}
> -
> -static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
> -{
> -	return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
> -}
> -
> -int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
> -{
> -	struct xe_vma *cmp = to_xe_vma(node);
> -	const struct xe_vma *own = key;
> -
> -	if (own->start > cmp->end)
> -		return 1;
> -
> -	if (own->end < cmp->start)
> -		return -1;
> -
> -	return 0;
> -}
> -
>   struct xe_vma *
> -xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
> +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
>   {
> -	struct rb_node *node;
> +	struct drm_gpuva *gpuva;
>   
>   	if (xe_vm_is_closed(vm))
>   		return NULL;
>   
> -	XE_BUG_ON(vma->end >= vm->size);
> +	XE_BUG_ON(start + range > vm->size);
>   	lockdep_assert_held(&vm->lock);
>   
> -	node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
> +	gpuva = drm_gpuva_find_first(&vm->mgr, start, range);
>   
> -	return node ? to_xe_vma(node) : NULL;
> +	return gpuva ? gpuva_to_vma(gpuva) : NULL;
>   }
>   
>   static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
>   {
> -	XE_BUG_ON(vma->vm != vm);
> +	int err;
> +
> +	XE_BUG_ON(xe_vma_vm(vma) != vm);
>   	lockdep_assert_held(&vm->lock);
>   
> -	rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
> +	err = drm_gpuva_insert(&vm->mgr, &vma->gpuva);
> +	XE_WARN_ON(err);
>   }
>   
> -static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
> +static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma, bool remove)
>   {
> -	XE_BUG_ON(vma->vm != vm);
> +	XE_BUG_ON(xe_vma_vm(vma) != vm);
>   	lockdep_assert_held(&vm->lock);
>   
> -	rb_erase(&vma->vm_node, &vm->vmas);
> +	if (remove)
> +		drm_gpuva_remove(&vma->gpuva);
>   	if (vm->usm.last_fault_vma == vma)
>   		vm->usm.last_fault_vma = NULL;
>   }
>   
> -static void async_op_work_func(struct work_struct *w);
> +static struct drm_gpuva_op *xe_vm_op_alloc(void)
> +{
> +	struct xe_vma_op *op;
> +
> +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> +
> +	if (unlikely(!op))
> +		return NULL;
> +
> +	return &op->base;
> +}
> +
> +static struct drm_gpuva_fn_ops gpuva_ops = {
> +	.op_alloc = xe_vm_op_alloc,
> +};
> +
> +static void xe_vma_op_work_func(struct work_struct *w);
>   static void vm_destroy_work_func(struct work_struct *w);
>   
>   struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> @@ -1094,7 +1079,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>   
>   	vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
>   
> -	vm->vmas = RB_ROOT;
>   	vm->flags = flags;
>   
>   	init_rwsem(&vm->lock);
> @@ -1110,7 +1094,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>   	spin_lock_init(&vm->notifier.list_lock);
>   
>   	INIT_LIST_HEAD(&vm->async_ops.pending);
> -	INIT_WORK(&vm->async_ops.work, async_op_work_func);
> +	INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
>   	spin_lock_init(&vm->async_ops.lock);
>   
>   	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
> @@ -1130,6 +1114,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>   	if (err)
>   		goto err_put;
>   
> +	drm_gpuva_manager_init(&vm->mgr, "Xe VM", 0, vm->size, 0, 0,
> +			       &gpuva_ops, 0);
>   	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
>   		vm->flags |= XE_VM_FLAGS_64K;
>   
> @@ -1235,6 +1221,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>   			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
>   	}
>   	dma_resv_unlock(&vm->resv);
> +	drm_gpuva_manager_destroy(&vm->mgr);
>   err_put:
>   	dma_resv_fini(&vm->resv);
>   	kfree(vm);
> @@ -1284,14 +1271,18 @@ static void vm_error_capture(struct xe_vm *vm, int err,
>   
>   void xe_vm_close_and_put(struct xe_vm *vm)
>   {
> -	struct rb_root contested = RB_ROOT;
> +	struct list_head contested;
>   	struct ww_acquire_ctx ww;
>   	struct xe_device *xe = vm->xe;
>   	struct xe_gt *gt;
> +	struct xe_vma *vma, *next_vma;
> +	DRM_GPUVA_ITER(it, &vm->mgr, 0);
>   	u8 id;
>   
>   	XE_BUG_ON(vm->preempt.num_engines);
>   
> +	INIT_LIST_HEAD(&contested);
> +
>   	vm->size = 0;
>   	smp_mb();
>   	flush_async_ops(vm);
> @@ -1308,24 +1299,25 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>   
>   	down_write(&vm->lock);
>   	xe_vm_lock(vm, &ww, 0, false);
> -	while (vm->vmas.rb_node) {
> -		struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
> +	drm_gpuva_iter_for_each(it) {
> +		vma = gpuva_to_vma(it.va);
>   
>   		if (xe_vma_is_userptr(vma)) {
>   			down_read(&vm->userptr.notifier_lock);
> -			vma->destroyed = true;
> +			vma->gpuva.flags |= XE_VMA_DESTROYED;
>   			up_read(&vm->userptr.notifier_lock);
>   		}
>   
> -		rb_erase(&vma->vm_node, &vm->vmas);
> +		xe_vm_remove_vma(vm, vma, false);
> +		drm_gpuva_iter_remove(&it);
>   
>   		/* easy case, remove from VMA? */
> -		if (xe_vma_is_userptr(vma) || vma->bo->vm) {
> +		if (xe_vma_is_userptr(vma) || xe_vma_bo(vma)->vm) {
>   			xe_vma_destroy(vma, NULL);
>   			continue;
>   		}
>   
> -		rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
> +		list_add_tail(&vma->unbind_link, &contested);
>   	}
>   
>   	/*
> @@ -1348,19 +1340,14 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>   	}
>   	xe_vm_unlock(vm, &ww);
>   
> -	if (contested.rb_node) {
> -
> -		/*
> -		 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
> -		 * Since we hold a refcount to the bo, we can remove and free
> -		 * the members safely without locking.
> -		 */
> -		while (contested.rb_node) {
> -			struct xe_vma *vma = to_xe_vma(contested.rb_node);
> -
> -			rb_erase(&vma->vm_node, &contested);
> -			xe_vma_destroy_unlocked(vma);
> -		}
> +	/*
> +	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
> +	 * Since we hold a refcount to the bo, we can remove and free
> +	 * the members safely without locking.
> +	 */
> +	list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
> +		list_del_init(&vma->unbind_link);
> +		xe_vma_destroy_unlocked(vma);
>   	}
>   
>   	if (vm->async_ops.error_capture.addr)
> @@ -1369,6 +1356,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>   	XE_WARN_ON(!list_empty(&vm->extobj.list));
>   	up_write(&vm->lock);
>   
> +	drm_gpuva_manager_destroy(&vm->mgr);
> +
>   	mutex_lock(&xe->usm.lock);
>   	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
>   		xe->usm.num_vm_in_fault_mode--;
> @@ -1456,13 +1445,14 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
>   
>   static struct dma_fence *
>   xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
> -		 struct xe_sync_entry *syncs, u32 num_syncs)
> +		 struct xe_sync_entry *syncs, u32 num_syncs,
> +		 bool first_op, bool last_op)
>   {
>   	struct xe_gt *gt;
>   	struct dma_fence *fence = NULL;
>   	struct dma_fence **fences = NULL;
>   	struct dma_fence_array *cf = NULL;
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	int cur_fence = 0, i;
>   	int number_gts = hweight_long(vma->gt_present);
>   	int err;
> @@ -1483,7 +1473,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
>   
>   		XE_BUG_ON(xe_gt_is_media_type(gt));
>   
> -		fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
> +		fence = __xe_pt_unbind_vma(gt, vma, e, first_op ? syncs : NULL,
> +					   first_op ? num_syncs : 0);
>   		if (IS_ERR(fence)) {
>   			err = PTR_ERR(fence);
>   			goto err_fences;
> @@ -1509,7 +1500,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
>   		}
>   	}
>   
> -	for (i = 0; i < num_syncs; i++)
> +	for (i = 0; last_op && i < num_syncs; i++)
>   		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
>   
>   	return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
> @@ -1528,13 +1519,14 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
>   
>   static struct dma_fence *
>   xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
> -	       struct xe_sync_entry *syncs, u32 num_syncs)
> +	       struct xe_sync_entry *syncs, u32 num_syncs,
> +	       bool first_op, bool last_op)
>   {
>   	struct xe_gt *gt;
>   	struct dma_fence *fence;
>   	struct dma_fence **fences = NULL;
>   	struct dma_fence_array *cf = NULL;
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	int cur_fence = 0, i;
>   	int number_gts = hweight_long(vma->gt_mask);
>   	int err;
> @@ -1554,7 +1546,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
>   			goto next;
>   
>   		XE_BUG_ON(xe_gt_is_media_type(gt));
> -		fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
> +		fence = __xe_pt_bind_vma(gt, vma, e, first_op ? syncs : NULL,
Do we need that conditional operator expression? Always use "syncs" ?
> +					 first_op ? num_syncs : 0,
>   					 vma->gt_present & BIT(id));
>   		if (IS_ERR(fence)) {
>   			err = PTR_ERR(fence);
> @@ -1581,7 +1574,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
>   		}
>   	}
>   
> -	for (i = 0; i < num_syncs; i++)
> +	for (i = 0; last_op && i < num_syncs; i++)
>   		xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
>   
>   	return cf ? &cf->base : fence;
> @@ -1680,15 +1673,27 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence)
>   
>   static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>   			struct xe_engine *e, struct xe_sync_entry *syncs,
> -			u32 num_syncs, struct async_op_fence *afence)
> +			u32 num_syncs, struct async_op_fence *afence,
> +			bool immediate, bool first_op, bool last_op)
>   {
>   	struct dma_fence *fence;
>   
>   	xe_vm_assert_held(vm);
>   
> -	fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
> -	if (IS_ERR(fence))
> -		return PTR_ERR(fence);
> +	if (immediate) {
> +		fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op,
> +				       last_op);
> +		if (IS_ERR(fence))
> +			return PTR_ERR(fence);
> +	} else {
> +		int i;
> +
> +		XE_BUG_ON(!xe_vm_in_fault_mode(vm));
> +
> +		fence = dma_fence_get_stub();
> +		for (i = 0; last_op && i < num_syncs; i++)
This construct is often misread, people missing the last_op condition. 
Can we do if (last_op) {}?
> +			xe_sync_entry_signal(&syncs[i], NULL, fence);
> +	}
>   	if (afence)
>   		add_async_op_fence_cb(vm, fence, afence);
>   
> @@ -1698,32 +1703,35 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>   
>   static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
>   		      struct xe_bo *bo, struct xe_sync_entry *syncs,
> -		      u32 num_syncs, struct async_op_fence *afence)
> +		      u32 num_syncs, struct async_op_fence *afence,
> +		      bool immediate, bool first_op, bool last_op)

Hm. I wonder whether we could do the in-syncs - out-syncs split further 
out so we don't have to pass the first_op, last_op at all? Or is 
something prohibiting this?

>   {
>   	int err;
>   
>   	xe_vm_assert_held(vm);
>   	xe_bo_assert_held(bo);
>   
> -	if (bo) {
> +	if (bo && immediate) {
>   		err = xe_bo_validate(bo, vm, true);
>   		if (err)
>   			return err;
>   	}
>   
> -	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
> +	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate,
> +			    first_op, last_op);
>   }
>   
>   static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
>   			struct xe_engine *e, struct xe_sync_entry *syncs,
> -			u32 num_syncs, struct async_op_fence *afence)
> +			u32 num_syncs, struct async_op_fence *afence,
> +			bool first_op, bool last_op)
>   {
>   	struct dma_fence *fence;
>   
>   	xe_vm_assert_held(vm);
> -	xe_bo_assert_held(vma->bo);
> +	xe_bo_assert_held(xe_vma_bo(vma));
>   
> -	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
> +	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op);
>   	if (IS_ERR(fence))
>   		return PTR_ERR(fence);
>   	if (afence)
> @@ -1946,26 +1954,27 @@ static const u32 region_to_mem_type[] = {
>   static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>   			  struct xe_engine *e, u32 region,
>   			  struct xe_sync_entry *syncs, u32 num_syncs,
> -			  struct async_op_fence *afence)
> +			  struct async_op_fence *afence, bool first_op,
> +			  bool last_op)
>   {
>   	int err;
>   
>   	XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
>   
>   	if (!xe_vma_is_userptr(vma)) {
> -		err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
> +		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
>   		if (err)
>   			return err;
>   	}
>   
>   	if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
> -		return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
> -				  afence);
> +		return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
> +				  afence, true, first_op, last_op);
>   	} else {
>   		int i;
>   
>   		/* Nothing to do, signal fences now */
> -		for (i = 0; i < num_syncs; i++)
> +		for (i = 0; last_op && i < num_syncs; i++)

Here is another instance of that construct.

>   			xe_sync_entry_signal(&syncs[i], NULL,
>   					     dma_fence_get_stub());
>   		if (afence)
> @@ -1976,29 +1985,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>   
>   #define VM_BIND_OP(op)	(op & 0xffff)
>   
> -static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
> -			   struct xe_engine *e, struct xe_bo *bo, u32 op,
> -			   u32 region, struct xe_sync_entry *syncs,
> -			   u32 num_syncs, struct async_op_fence *afence)
> -{
> -	switch (VM_BIND_OP(op)) {
> -	case XE_VM_BIND_OP_MAP:
> -		return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
> -	case XE_VM_BIND_OP_UNMAP:
> -	case XE_VM_BIND_OP_UNMAP_ALL:
> -		return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
> -	case XE_VM_BIND_OP_MAP_USERPTR:
> -		return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
> -	case XE_VM_BIND_OP_PREFETCH:
> -		return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
> -				      afence);
> -		break;
> -	default:
> -		XE_BUG_ON("NOT POSSIBLE");
> -		return -EINVAL;
> -	}
> -}
> -

Will continue on Monday.




More information about the Intel-xe mailing list