[Intel-xe] [PATCH v5 4/8] drm/xe: Port Xe to GPUVA

Fri Apr 21 12:52:54 UTC 2023

On 4/4/23 03:42, Matthew Brost wrote:
> Rather than open coding VM binds and VMA tracking, use the GPUVA
> library. GPUVA provides a common infrastructure for VM binds to use mmap
> / munmap semantics and support for VK sparse bindings.
>
> The concepts are:
>
> 1) xe_vm inherits from drm_gpuva_manager
> 2) xe_vma inherits from drm_gpuva
> 3) xe_vma_op inherits from drm_gpuva_op
> 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the
> GPUVA code to generate an VMA operations list which is parsed, commited,
> and executed.
>
> v2 (CI): Add break after default in case statement.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_bo.c                  |   10 +-
>   drivers/gpu/drm/xe/xe_device.c              |    2 +-
>   drivers/gpu/drm/xe/xe_exec.c                |    2 +-
>   drivers/gpu/drm/xe/xe_gt_pagefault.c        |   23 +-
>   drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |   14 +-
>   drivers/gpu/drm/xe/xe_guc_ct.c              |    6 +-
>   drivers/gpu/drm/xe/xe_migrate.c             |    8 +-
>   drivers/gpu/drm/xe/xe_pt.c                  |  106 +-
>   drivers/gpu/drm/xe/xe_trace.h               |   10 +-
>   drivers/gpu/drm/xe/xe_vm.c                  | 1799 +++++++++----------
>   drivers/gpu/drm/xe/xe_vm.h                  |   66 +-
>   drivers/gpu/drm/xe/xe_vm_madvise.c          |   87 +-
>   drivers/gpu/drm/xe/xe_vm_types.h            |  165 +-
>   13 files changed, 1126 insertions(+), 1172 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 5460e6fe3c1f..3a482c61c3ec 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -391,7 +391,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
>   {
>   	struct dma_resv_iter cursor;
>   	struct dma_fence *fence;
> -	struct xe_vma *vma;
> +	struct drm_gpuva *gpuva;
> +	struct drm_gem_object *obj = &bo->ttm.base;
>   	int ret = 0;
>   
>   	dma_resv_assert_held(bo->ttm.base.resv);
> @@ -404,8 +405,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
>   		dma_resv_iter_end(&cursor);
>   	}
>   
> -	list_for_each_entry(vma, &bo->vmas, bo_link) {
> -		struct xe_vm *vm = vma->vm;
> +	drm_gem_for_each_gpuva(gpuva, obj) {
> +		struct xe_vma *vma = gpuva_to_vma(gpuva);
> +		struct xe_vm *vm = xe_vma_vm(vma);
>   
>   		trace_xe_vma_evict(vma);
>   
> @@ -430,10 +432,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
>   			} else {
>   				ret = timeout;
>   			}
> -

Unrelated

>   		} else {
>   			bool vm_resv_locked = false;
> -			struct xe_vm *vm = vma->vm;
>   
>   			/*
>   			 * We need to put the vma on the vm's rebind_list,
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index a79f934e3d2d..d0d70adedba6 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -130,7 +130,7 @@ static struct drm_driver driver = {
>   	.driver_features =
>   	    DRIVER_GEM |
>   	    DRIVER_RENDER | DRIVER_SYNCOBJ |
> -	    DRIVER_SYNCOBJ_TIMELINE,
> +	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
>   	.open = xe_file_open,
>   	.postclose = xe_file_close,
>   
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index ea869f2452ef..214d82bc906b 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -118,7 +118,7 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
>   		if (xe_vma_is_userptr(vma))
>   			continue;
>   
> -		err = xe_bo_validate(vma->bo, vm, false);
> +		err = xe_bo_validate(xe_vma_bo(vma), vm, false);
>   		if (err) {
>   			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
>   			*tv = NULL;
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index 1677640e1075..f7a066090a13 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -75,9 +75,10 @@ static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma)
>   		!(BIT(gt->info.id) & vma->usm.gt_invalidated);
>   }
>   
> -static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
> +static bool vma_matches(struct xe_vma *vma, u64 page_addr)
>   {
> -	if (lookup->start > vma->end || lookup->end < vma->start)
> +	if (page_addr > xe_vma_end(vma) - 1 ||
> +	    page_addr + SZ_4K < xe_vma_start(vma))
>   		return false;
>   
>   	return true;
> @@ -90,16 +91,14 @@ static bool only_needs_bo_lock(struct xe_bo *bo)
>   
>   static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
>   {
> -	struct xe_vma *vma = NULL, lookup;
> +	struct xe_vma *vma = NULL;
>   
> -	lookup.start = page_addr;
> -	lookup.end = lookup.start + SZ_4K - 1;
>   	if (vm->usm.last_fault_vma) {   /* Fast lookup */
> -		if (vma_matches(vm->usm.last_fault_vma, &lookup))
> +		if (vma_matches(vm->usm.last_fault_vma, page_addr))
>   			vma = vm->usm.last_fault_vma;
>   	}
>   	if (!vma)
> -		vma = xe_vm_find_overlapping_vma(vm, &lookup);
> +		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
>   
>   	return vma;
>   }
> @@ -170,7 +169,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>   	}
>   
>   	/* Lock VM and BOs dma-resv */
> -	bo = vma->bo;
> +	bo = xe_vma_bo(vma);
>   	if (only_needs_bo_lock(bo)) {
>   		/* This path ensures the BO's LRU is updated */
>   		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
> @@ -487,12 +486,8 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
>   {
>   	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
>   		sub_granularity_in_byte(acc->granularity);
> -	struct xe_vma lookup;
> -
> -	lookup.start = page_va;
> -	lookup.end = lookup.start + SZ_4K - 1;
>   
> -	return xe_vm_find_overlapping_vma(vm, &lookup);
> +	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
>   }
>   
>   static int handle_acc(struct xe_gt *gt, struct acc *acc)
> @@ -536,7 +531,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
>   		goto unlock_vm;
>   
>   	/* Lock VM and BOs dma-resv */
> -	bo = vma->bo;
> +	bo = xe_vma_bo(vma);
>   	if (only_needs_bo_lock(bo)) {
>   		/* This path ensures the BO's LRU is updated */
>   		ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index f279e21300aa..155f37aaf31c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -201,8 +201,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>   	if (!xe->info.has_range_tlb_invalidation) {
>   		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
>   	} else {
> -		u64 start = vma->start;
> -		u64 length = vma->end - vma->start + 1;
> +		u64 start = xe_vma_start(vma);
> +		u64 length = xe_vma_size(vma);
>   		u64 align, end;
>   
>   		if (length < SZ_4K)
> @@ -215,12 +215,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>   		 * address mask covering the required range.
>   		 */
>   		align = roundup_pow_of_two(length);
> -		start = ALIGN_DOWN(vma->start, align);
> -		end = ALIGN(vma->start + length, align);
> +		start = ALIGN_DOWN(xe_vma_start(vma), align);
> +		end = ALIGN(xe_vma_start(vma) + length, align);
>   		length = align;
>   		while (start + length < end) {
>   			length <<= 1;
> -			start = ALIGN_DOWN(vma->start, length);
> +			start = ALIGN_DOWN(xe_vma_start(vma), length);
>   		}
>   
>   		/*
> @@ -229,7 +229,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>   		 */
>   		if (length >= SZ_2M) {
>   			length = max_t(u64, SZ_16M, length);
> -			start = ALIGN_DOWN(vma->start, length);
> +			start = ALIGN_DOWN(xe_vma_start(vma), length);
>   		}
>   
>   		XE_BUG_ON(length < SZ_4K);
> @@ -238,7 +238,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>   		XE_BUG_ON(!IS_ALIGNED(start, length));
>   
>   		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> -		action[len++] = vma->vm->usm.asid;
> +		action[len++] = xe_vma_vm(vma)->usm.asid;
>   		action[len++] = lower_32_bits(start);
>   		action[len++] = upper_32_bits(start);
>   		action[len++] = ilog2(length) - ilog2(SZ_4K);
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 5e00b75d3ca2..e5ed9022a0a2 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -783,13 +783,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
>   	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
>   		g2h_fence->fail = true;
>   		g2h_fence->error =
> -			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
> +			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
>   		g2h_fence->hint =
> -			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
> +			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
>   	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
>   		g2h_fence->retry = true;
>   		g2h_fence->reason =
> -			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]);
> +			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
>   	} else if (g2h_fence->response_buffer) {
>   		g2h_fence->response_len = response_len;
>   		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index e8978440c725..fee4c0028a2f 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -1049,8 +1049,10 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
>   		return ERR_PTR(-ETIME);
>   
>   	if (wait_vm && !dma_resv_test_signaled(&vm->resv,
> -					       DMA_RESV_USAGE_BOOKKEEP))
> +					       DMA_RESV_USAGE_BOOKKEEP)) {
> +		vm_dbg(&vm->xe->drm, "wait on VM for munmap");
>   		return ERR_PTR(-ETIME);
> +	}
>   
>   	if (ops->pre_commit) {
>   		err = ops->pre_commit(pt_update);
> @@ -1138,7 +1140,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
>   	u64 addr;
>   	int err = 0;
>   	bool usm = !eng && xe->info.supports_usm;
> -	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
> +	bool first_munmap_rebind = vma &&
> +		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
>   
>   	/* Use the CPU if no in syncs and engine is idle */
>   	if (no_in_syncs(syncs, num_syncs) && (!eng || xe_engine_is_idle(eng))) {
> @@ -1259,6 +1262,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
>   	 * trigger preempts before moving forward
>   	 */
>   	if (first_munmap_rebind) {
> +		vm_dbg(&vm->xe->drm, "wait on first_munmap_rebind");
>   		err = job_add_deps(job, &vm->resv,
>   				   DMA_RESV_USAGE_BOOKKEEP);
>   		if (err)
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 6b2943efcdbc..37a1ce6f62a3 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -94,7 +94,7 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
>   				&cur);
>   		return xe_res_dma(&cur) + offset;
>   	} else {
> -		return xe_bo_addr(vma->bo, offset, page_size, is_vram);
> +		return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram);
>   	}
>   }
>   
> @@ -159,7 +159,7 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
>   
>   	if (is_vram) {
>   		pte |= GEN12_PPGTT_PTE_LM;
> -		if (vma && vma->use_atomic_access_pte_bit)
> +		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
>   			pte |= GEN12_USM_PPGTT_PTE_AE;
>   	}
>   
> @@ -738,7 +738,7 @@ static int
>   xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>   		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
>   {
> -	struct xe_bo *bo = vma->bo;
> +	struct xe_bo *bo = xe_vma_bo(vma);
>   	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
>   	struct xe_res_cursor curs;
>   	struct xe_pt_stage_bind_walk xe_walk = {
> @@ -747,22 +747,23 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>   			.shifts = xe_normal_pt_shifts,
>   			.max_level = XE_PT_HIGHEST_LEVEL,
>   		},
> -		.vm = vma->vm,
> +		.vm = xe_vma_vm(vma),
>   		.gt = gt,
>   		.curs = &curs,
> -		.va_curs_start = vma->start,
> -		.pte_flags = vma->pte_flags,
> +		.va_curs_start = xe_vma_start(vma),
> +		.pte_flags = xe_vma_read_only(vma) ? PTE_READ_ONLY : 0,
>   		.wupd.entries = entries,
> -		.needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
> +		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) &&
> +			is_vram,
>   	};
> -	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
> +	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[gt->info.id];
>   	int ret;
>   
>   	if (is_vram) {
>   		struct xe_gt *bo_gt = xe_bo_to_gt(bo);
>   
>   		xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
> -		if (vma && vma->use_atomic_access_pte_bit)
> +		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
>   			xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
>   		xe_walk.dma_offset = bo_gt->mem.vram.io_start -
>   			gt_to_xe(gt)->mem.vram.io_start;
> @@ -778,17 +779,16 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
>   
>   	xe_bo_assert_held(bo);
>   	if (xe_vma_is_userptr(vma))
> -		xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
> -				&curs);
> +		xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), &curs);
>   	else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
> -		xe_res_first(bo->ttm.resource, vma->bo_offset,
> -			     vma->end - vma->start + 1, &curs);
> +		xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
> +			     xe_vma_size(vma), &curs);
>   	else
> -		xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
> -				vma->end - vma->start + 1, &curs);
> +		xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma),
> +				xe_vma_size(vma), &curs);
>   
> -	ret = drm_pt_walk_range(&pt->drm, pt->level, vma->start, vma->end + 1,
> -				&xe_walk.drm);
> +	ret = drm_pt_walk_range(&pt->drm, pt->level, xe_vma_start(vma),
> +				xe_vma_end(vma), &xe_walk.drm);
>   
>   	*num_entries = xe_walk.wupd.num_used_entries;
>   	return ret;
> @@ -923,13 +923,13 @@ bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
>   		},
>   		.gt = gt,
>   	};
> -	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
> +	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[gt->info.id];
>   
>   	if (!(vma->gt_present & BIT(gt->info.id)))
>   		return false;
>   
> -	(void)drm_pt_walk_shared(&pt->drm, pt->level, vma->start, vma->end + 1,
> -				 &xe_walk.drm);
> +	(void)drm_pt_walk_shared(&pt->drm, pt->level, xe_vma_start(vma),
> +				 xe_vma_end(vma), &xe_walk.drm);
>   
>   	return xe_walk.needs_invalidate;
>   }
> @@ -966,21 +966,21 @@ static void xe_pt_abort_bind(struct xe_vma *vma,
>   			continue;
>   
>   		for (j = 0; j < entries[i].qwords; j++)
> -			xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL);
> +			xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
>   		kfree(entries[i].pt_entries);
>   	}
>   }
>   
>   static void xe_pt_commit_locks_assert(struct xe_vma *vma)
>   {
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   
>   	lockdep_assert_held(&vm->lock);
>   
>   	if (xe_vma_is_userptr(vma))
>   		lockdep_assert_held_read(&vm->userptr.notifier_lock);
>   	else
> -		dma_resv_assert_held(vma->bo->ttm.base.resv);
> +		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);
>   
>   	dma_resv_assert_held(&vm->resv);
>   }
> @@ -1013,7 +1013,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
>   
>   			if (xe_pt_entry(pt_dir, j_))
>   				xe_pt_destroy(xe_pt_entry(pt_dir, j_),
> -					      vma->vm->flags, deferred);
> +					      xe_vma_vm(vma)->flags, deferred);
>   
>   			pt_dir->dir.entries[j_] = &newpte->drm;
>   		}
> @@ -1074,7 +1074,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
>   	static u32 count;
>   
>   	if (count++ % divisor == divisor - 1) {
> -		struct xe_vm *vm = vma->vm;
> +		struct xe_vm *vm = xe_vma_vm(vma);
>   
>   		vma->userptr.divisor = divisor << 1;
>   		spin_lock(&vm->userptr.invalidated_lock);
> @@ -1117,7 +1117,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
>   		container_of(pt_update, typeof(*userptr_update), base);
>   	struct xe_vma *vma = pt_update->vma;
>   	unsigned long notifier_seq = vma->userptr.notifier_seq;
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   
>   	userptr_update->locked = false;
>   
> @@ -1288,20 +1288,20 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   		},
>   		.bind = true,
>   	};
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	u32 num_entries;
>   	struct dma_fence *fence;
>   	struct invalidation_fence *ifence = NULL;
>   	int err;
>   
>   	bind_pt_update.locked = false;
> -	xe_bo_assert_held(vma->bo);
> +	xe_bo_assert_held(xe_vma_bo(vma));
>   	xe_vm_assert_held(vm);
>   	XE_BUG_ON(xe_gt_is_media_type(gt));
>   
> -	vm_dbg(&vma->vm->xe->drm,
> +	vm_dbg(&xe_vma_vm(vma)->xe->drm,
>   	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
> -	       vma->start, vma->end, e);
> +	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
>   
>   	err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind);
>   	if (err)
> @@ -1310,23 +1310,28 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   
>   	xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
>   
> -	if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
> +	if (rebind && !xe_vm_no_dma_fences(xe_vma_vm(vma))) {
>   		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
>   		if (!ifence)
>   			return ERR_PTR(-ENOMEM);
>   	}
>   
>   	fence = xe_migrate_update_pgtables(gt->migrate,
> -					   vm, vma->bo,
> +					   vm, xe_vma_bo(vma),
>   					   e ? e : vm->eng[gt->info.id],
>   					   entries, num_entries,
>   					   syncs, num_syncs,
>   					   &bind_pt_update.base);
>   	if (!IS_ERR(fence)) {
> +		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
>   		LLIST_HEAD(deferred);
>   
> +
> +		if (last_munmap_rebind)
> +			vm_dbg(&vm->xe->drm, "last_munmap_rebind");
> +
>   		/* TLB invalidation must be done before signaling rebind */
> -		if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
> +		if (rebind && !xe_vm_no_dma_fences(xe_vma_vm(vma))) {
>   			int err = invalidation_fence_init(gt, ifence, fence,
>   							  vma);
>   			if (err) {
> @@ -1339,12 +1344,12 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   
>   		/* add shared fence now for pagetable delayed destroy */
>   		dma_resv_add_fence(&vm->resv, fence, !rebind &&
> -				   vma->last_munmap_rebind ?
> +				   last_munmap_rebind ?
>   				   DMA_RESV_USAGE_KERNEL :
>   				   DMA_RESV_USAGE_BOOKKEEP);
>   
> -		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
> -			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
> +		if (!xe_vma_is_userptr(vma) && !xe_vma_bo(vma)->vm)
> +			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>   					   DMA_RESV_USAGE_BOOKKEEP);
>   		xe_pt_commit_bind(vma, entries, num_entries, rebind,
>   				  bind_pt_update.locked ? &deferred : NULL);
> @@ -1357,8 +1362,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   			up_read(&vm->userptr.notifier_lock);
>   			xe_bo_put_commit(&deferred);
>   		}
> -		if (!rebind && vma->last_munmap_rebind &&
> -		    xe_vm_in_compute_mode(vm))
> +		if (!rebind && last_munmap_rebind && xe_vm_in_compute_mode(vm))
>   			queue_work(vm->xe->ordered_wq,
>   				   &vm->preempt.rebind_work);
>   	} else {
> @@ -1506,14 +1510,14 @@ static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
>   			.max_level = XE_PT_HIGHEST_LEVEL,
>   		},
>   		.gt = gt,
> -		.modified_start = vma->start,
> -		.modified_end = vma->end + 1,
> +		.modified_start = xe_vma_start(vma),
> +		.modified_end = xe_vma_end(vma),
>   		.wupd.entries = entries,
>   	};
> -	struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
> +	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[gt->info.id];
>   
> -	(void)drm_pt_walk_shared(&pt->drm, pt->level, vma->start, vma->end + 1,
> -				 &xe_walk.drm);
> +	(void)drm_pt_walk_shared(&pt->drm, pt->level, xe_vma_start(vma),
> +				 xe_vma_end(vma), &xe_walk.drm);
>   
>   	return xe_walk.wupd.num_used_entries;
>   }
> @@ -1525,7 +1529,7 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
>   				  const struct xe_vm_pgtable_update *update)
>   {
>   	struct xe_vma *vma = pt_update->vma;
> -	u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level);
> +	u64 empty = __xe_pt_empty_pte(gt, xe_vma_vm(vma), update->pt->level);
>   	int i;
>   
>   	XE_BUG_ON(xe_gt_is_media_type(gt));
> @@ -1563,7 +1567,7 @@ xe_pt_commit_unbind(struct xe_vma *vma,
>   			     i++) {
>   				if (xe_pt_entry(pt_dir, i))
>   					xe_pt_destroy(xe_pt_entry(pt_dir, i),
> -						      vma->vm->flags, deferred);
> +						      xe_vma_vm(vma)->flags, deferred);
>   
>   				pt_dir->dir.entries[i] = NULL;
>   			}
> @@ -1612,19 +1616,19 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   			.vma = vma,
>   		},
>   	};
> -	struct xe_vm *vm = vma->vm;
> +	struct xe_vm *vm = xe_vma_vm(vma);
>   	u32 num_entries;
>   	struct dma_fence *fence = NULL;
>   	struct invalidation_fence *ifence;
>   	LLIST_HEAD(deferred);
>   
> -	xe_bo_assert_held(vma->bo);
> +	xe_bo_assert_held(xe_vma_bo(vma));
>   	xe_vm_assert_held(vm);
>   	XE_BUG_ON(xe_gt_is_media_type(gt));
>   
> -	vm_dbg(&vma->vm->xe->drm,
> +	vm_dbg(&xe_vma_vm(vma)->xe->drm,
>   	       "Preparing unbind, with range [%llx...%llx) engine %p.\n",
> -	       vma->start, vma->end, e);
> +	       xe_vma_start(vma), xe_vma_end(vma) - 1, e);
>   
>   	num_entries = xe_pt_stage_unbind(gt, vma, entries);
>   	XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
> @@ -1663,8 +1667,8 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   				   DMA_RESV_USAGE_BOOKKEEP);
>   
>   		/* This fence will be installed by caller when doing eviction */
> -		if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
> -			dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
> +		if (!xe_vma_is_userptr(vma) && !xe_vma_bo(vma)->vm)
> +			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>   					   DMA_RESV_USAGE_BOOKKEEP);
>   		xe_pt_commit_unbind(vma, entries, num_entries,
>   				    unbind_pt_update.locked ? &deferred : NULL);
> diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> index 2f8eb7ebe9a7..12e12673fc91 100644
> --- a/drivers/gpu/drm/xe/xe_trace.h
> +++ b/drivers/gpu/drm/xe/xe_trace.h
> @@ -18,7 +18,7 @@
>   #include "xe_gt_types.h"
>   #include "xe_guc_engine_types.h"
>   #include "xe_sched_job.h"
> -#include "xe_vm_types.h"
> +#include "xe_vm.h"
>   
>   DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
>   		    TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
> @@ -368,10 +368,10 @@ DECLARE_EVENT_CLASS(xe_vma,
>   
>   		    TP_fast_assign(
>   			   __entry->vma = (unsigned long)vma;
> -			   __entry->asid = vma->vm->usm.asid;
> -			   __entry->start = vma->start;
> -			   __entry->end = vma->end;
> -			   __entry->ptr = (u64)vma->userptr.ptr;
> +			   __entry->asid = xe_vma_vm(vma)->usm.asid;
> +			   __entry->start = xe_vma_start(vma);
> +			   __entry->end = xe_vma_end(vma) - 1;
> +			   __entry->ptr = xe_vma_userptr(vma);
>   			   ),
>   
>   		    TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",

Is it possible to split this patch (for review and possible regression 
debugging purposes) to introduce new macros first in one patch and then 
have the relevant xe_vm changes in a separate patch?

Anyway I'll continue reviewing the patch as is.

/Thomas