[Intel-xe] [PATCH v4 8/9] drm/xe: Port Xe to GPUVA

Thomas Hellström thomas.hellstrom at linux.intel.com
Fri Jul 7 15:38:50 UTC 2023


On Fri, 2023-06-30 at 10:58 -0700, Matthew Brost wrote:
> Rather than open coding VM binds and VMA tracking, use the GPUVA
> library. GPUVA provides a common infrastructure for VM binds to use
> mmap
> / munmap semantics and support for VK sparse bindings.
> 
> The concepts are:
> 
> 1) xe_vm inherits from drm_gpuva_manager
> 2) xe_vma inherits from drm_gpuva
> 3) xe_vma_op inherits from drm_gpuva_op
> 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the
> GPUVA code to generate an VMA operations list which is parsed,
> committed,
> and executed.
> 
> v2 (CI): Add break after default in case statement.
> v3: Rebase
> v4: Fix some error handling
> v5: Use unlocked version VMA in error paths
> v6: Rebase, address some review feedback mainly Thomas H
> v7: Fix compile error in xe_vma_op_unwind, address checkpatch
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>


> ---
>  drivers/gpu/drm/xe/tests/xe_migrate.c |    2 +-
>  drivers/gpu/drm/xe/xe_bo.c            |    7 +-
>  drivers/gpu/drm/xe/xe_device.c        |    2 +-
>  drivers/gpu/drm/xe/xe_gt_pagefault.c  |   20 +-
>  drivers/gpu/drm/xe/xe_migrate.c       |   10 +-
>  drivers/gpu/drm/xe/xe_pt.c            |   40 +-
>  drivers/gpu/drm/xe/xe_pt.h            |    2 +-
>  drivers/gpu/drm/xe/xe_vm.c            | 1794 ++++++++++++-----------
> --
>  drivers/gpu/drm/xe/xe_vm.h            |   35 +-
>  drivers/gpu/drm/xe/xe_vm_madvise.c    |   77 +-
>  drivers/gpu/drm/xe/xe_vm_types.h      |  173 ++-
>  11 files changed, 1069 insertions(+), 1093 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c
> b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 4c79c1dfa772..aedfb3dd559e 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct
> xe_migrate *m, struct kunit *test)
>         /* First part of the test, are we updating our pagetable bo
> with a new entry? */
>         xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
> u64,
>                   0xdeaddeadbeefbeef);
> -       expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
> +       expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
>         if (m->eng->vm->flags & XE_VM_FLAGS_64K)
>                 expected |= XE_PTE_PS64;
>         if (xe_bo_is_vram(pt))
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 0cd179ba41a5..3ce959a2ee91 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -397,7 +397,8 @@ static int xe_bo_trigger_rebind(struct xe_device
> *xe, struct xe_bo *bo,
>  {
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
> -       struct xe_vma *vma;
> +       struct drm_gpuva *gpuva;
> +       struct drm_gem_object *obj = &bo->ttm.base;
>         int ret = 0;
>  
>         dma_resv_assert_held(bo->ttm.base.resv);
> @@ -410,7 +411,8 @@ static int xe_bo_trigger_rebind(struct xe_device
> *xe, struct xe_bo *bo,
>                 dma_resv_iter_end(&cursor);
>         }
>  
> -       list_for_each_entry(vma, &bo->vmas, bo_link) {
> +       drm_gem_for_each_gpuva(gpuva, obj) {
> +               struct xe_vma *vma = gpuva_to_vma(gpuva);
>                 struct xe_vm *vm = xe_vma_vm(vma);
>  
>                 trace_xe_vma_evict(vma);
> @@ -439,7 +441,6 @@ static int xe_bo_trigger_rebind(struct xe_device
> *xe, struct xe_bo *bo,
>  
>                 } else {
>                         bool vm_resv_locked = false;
> -                       struct xe_vm *vm = xe_vma_vm(vma);
>  
>                         /*
>                          * We need to put the vma on the vm's
> rebind_list,
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index c7985af85a53..5d5859c87041 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -135,7 +135,7 @@ static struct drm_driver driver = {
>         .driver_features =
>             DRIVER_GEM |
>             DRIVER_RENDER | DRIVER_SYNCOBJ |
> -           DRIVER_SYNCOBJ_TIMELINE,
> +           DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
>         .open = xe_file_open,
>         .postclose = xe_file_close,
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index 0e91ab67d617..125e4744fa38 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -75,10 +75,10 @@ static bool vma_is_valid(struct xe_gt *gt, struct
> xe_vma *vma)
>                 !(BIT(gt->info.id) & vma->usm.tile_invalidated);
>  }
>  
> -static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
> +static bool vma_matches(struct xe_vma *vma, u64 page_addr)
>  {
> -       if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 ||
> -           xe_vma_end(lookup) - 1 < xe_vma_start(vma))
> +       if (page_addr > xe_vma_end(vma) - 1 ||
> +           page_addr + SZ_4K - 1 < xe_vma_start(vma))
>                 return false;
>  
>         return true;
> @@ -91,16 +91,14 @@ static bool only_needs_bo_lock(struct xe_bo *bo)
>  
>  static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
>  {
> -       struct xe_vma *vma = NULL, lookup;
> +       struct xe_vma *vma = NULL;
>  
> -       lookup.start = page_addr;
> -       lookup.end = lookup.start + SZ_4K - 1;
>         if (vm->usm.last_fault_vma) {   /* Fast lookup */
> -               if (vma_matches(vm->usm.last_fault_vma, &lookup))
> +               if (vma_matches(vm->usm.last_fault_vma, page_addr))
>                         vma = vm->usm.last_fault_vma;
>         }
>         if (!vma)
> -               vma = xe_vm_find_overlapping_vma(vm, &lookup);
> +               vma = xe_vm_find_overlapping_vma(vm, page_addr,
> SZ_4K);
>  
>         return vma;
>  }
> @@ -489,12 +487,8 @@ static struct xe_vma *get_acc_vma(struct xe_vm
> *vm, struct acc *acc)
>  {
>         u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity)
> - 1) *
>                 sub_granularity_in_byte(acc->granularity);
> -       struct xe_vma lookup;
> -
> -       lookup.start = page_va;
> -       lookup.end = lookup.start + SZ_4K - 1;
>  
> -       return xe_vm_find_overlapping_vma(vm, &lookup);
> +       return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
>  }
>  
>  static int handle_acc(struct xe_gt *gt, struct acc *acc)
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 55e113dd7e82..2755a02473cf 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -197,7 +197,7 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>         /* Map the entire BO in our level 0 pt */
>         for (i = 0, level = 0; i < num_entries; level++) {
>                 entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
> -                                     XE_CACHE_WB, 0, 0);
> +                                     XE_CACHE_WB, 0);
>  
>                 xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
> entry);
>  
> @@ -216,7 +216,7 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>                      i += vm->flags & XE_VM_FLAGS_64K ?
> XE_64K_PAGE_SIZE :
>                      XE_PAGE_SIZE) {
>                         entry = xe_pte_encode(NULL, batch, i,
> -                                             XE_CACHE_WB, 0, 0);
> +                                             XE_CACHE_WB, 0);
>  
>                         xe_map_wr(xe, &bo->vmap, map_ofs + level * 8,
> u64,
>                                   entry);
> @@ -1159,7 +1159,8 @@ xe_migrate_update_pgtables(struct xe_migrate
> *m,
>         u64 addr;
>         int err = 0;
>         bool usm = !eng && xe->info.supports_usm;
> -       bool first_munmap_rebind = vma && vma->first_munmap_rebind;
> +       bool first_munmap_rebind = vma &&
> +               vma->gpuva.flags & XE_VMA_FIRST_REBIND;
>  
>         /* Use the CPU if no in syncs and engine is idle */
>         if (no_in_syncs(syncs, num_syncs) && (!eng ||
> xe_engine_is_idle(eng))) {
> @@ -1231,8 +1232,7 @@ xe_migrate_update_pgtables(struct xe_migrate
> *m,
>  
>                         BUG_ON(pt_bo->size != SZ_4K);
>  
> -                       addr = xe_pte_encode(NULL, pt_bo, 0,
> XE_CACHE_WB,
> -                                            0, 0);
> +                       addr = xe_pte_encode(NULL, pt_bo, 0,
> XE_CACHE_WB, 0);
>                         bb->cs[bb->len++] = lower_32_bits(addr);
>                         bb->cs[bb->len++] = upper_32_bits(addr);
>                 }
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index a697d43ec293..00855681c0d5 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -100,15 +100,15 @@ static dma_addr_t vma_addr(struct xe_vma *vma,
> u64 offset,
>         }
>  }
>  
> -static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32
> flags,
> -                       u32 pt_level)
> +static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
> +                       struct xe_vma *vma, u32 pt_level)
>  {
>         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
>  
> -       if (unlikely(flags & XE_PTE_FLAG_READ_ONLY))
> +       if (unlikely(vma && xe_vma_read_only(vma)))
>                 pte &= ~XE_PAGE_RW;
>  
> -       if (unlikely(flags & XE_PTE_FLAG_NULL))
> +       if (unlikely(vma && xe_vma_is_null(vma)))
>                 pte |= XE_PTE_NULL;
>  
>         /* FIXME: I don't think the PPAT handling is correct for MTL
> */
> @@ -142,7 +142,6 @@ static u64 __pte_encode(u64 pte, enum
> xe_cache_level cache, u32 flags,
>   * @bo: If @vma is NULL, representing the memory to point to.
>   * @offset: The offset into @vma or @bo.
>   * @cache: The cache level indicating
> - * @flags: Currently only supports PTE_READ_ONLY for read-only
> access.
>   * @pt_level: The page-table level of the page-table into which the
> entry
>   * is to be inserted.
>   *
> @@ -150,7 +149,7 @@ static u64 __pte_encode(u64 pte, enum
> xe_cache_level cache, u32 flags,
>   */
>  u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
>                   u64 offset, enum xe_cache_level cache,
> -                 u32 flags, u32 pt_level)
> +                 u32 pt_level)
>  {
>         u64 pte;
>         bool is_vram;
> @@ -162,11 +161,11 @@ u64 xe_pte_encode(struct xe_vma *vma, struct
> xe_bo *bo,
>  
>         if (is_vram) {
>                 pte |= XE_PPGTT_PTE_LM;
> -               if (vma && vma->use_atomic_access_pte_bit)
> +               if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
>                         pte |= XE_USM_PPGTT_PTE_AE;
>         }
>  
> -       return __pte_encode(pte, cache, flags, pt_level);
> +       return __pte_encode(pte, cache, vma, pt_level);
>  }
>  
>  static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> @@ -179,7 +178,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile
> *tile, struct xe_vm *vm,
>  
>         if (level == 0) {
>                 u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id],
> 0,
> -                                         XE_CACHE_WB, 0, 0);
> +                                         XE_CACHE_WB, 0);
>  
>                 return empty;
>         } else {
> @@ -424,10 +423,9 @@ struct xe_pt_stage_bind_walk {
>          */
>         bool needs_64K;
>         /**
> -        * @pte_flags: Flags determining PTE setup. These are not
> flags
> -        * encoded directly in the PTE. See @default_pte for those.
> +        * @vma: VMA being mapped
>          */
> -       u32 pte_flags;
> +       struct xe_vma *vma;
>  
>         /* Also input, but is updated during the walk*/
>         /** @curs: The DMA address cursor. */
> @@ -564,7 +562,7 @@ static bool xe_pt_hugepte_possible(u64 addr, u64
> next, unsigned int level,
>                 return false;
>  
>         /* null VMA's do not have dma addresses */
> -       if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
> +       if (xe_vma_is_null(xe_walk->vma))
>                 return true;
>  
>         /* Is the DMA address huge PTE size aligned? */
> @@ -590,7 +588,7 @@ xe_pt_scan_64K(u64 addr, u64 next, struct
> xe_pt_stage_bind_walk *xe_walk)
>                 return false;
>  
>         /* null VMA's do not have dma addresses */
> -       if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
> +       if (xe_vma_is_null(xe_walk->vma))
>                 return true;
>  
>         xe_res_next(&curs, addr - xe_walk->va_curs_start);
> @@ -643,14 +641,13 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent,
> pgoff_t offset,
>         /* Is this a leaf entry ?*/
>         if (level == 0 || xe_pt_hugepte_possible(addr, next, level,
> xe_walk)) {
>                 struct xe_res_cursor *curs = xe_walk->curs;
> -               bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL;
> +               bool is_null = xe_vma_is_null(xe_walk->vma);
>  
>                 XE_WARN_ON(xe_walk->va_curs_start != addr);
>  
>                 pte = __pte_encode(is_null ? 0 :
>                                    xe_res_dma(curs) + xe_walk-
> >dma_offset,
> -                                  xe_walk->cache, xe_walk-
> >pte_flags,
> -                                  level);
> +                                  xe_walk->cache, xe_walk->vma,
> level);
>                 pte |= xe_walk->default_pte;
>  
>                 /*
> @@ -762,7 +759,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct
> xe_vma *vma,
>                 .tile = tile,
>                 .curs = &curs,
>                 .va_curs_start = xe_vma_start(vma),
> -               .pte_flags = vma->pte_flags,
> +               .vma = vma,
>                 .wupd.entries = entries,
>                 .needs_64K = (xe_vma_vm(vma)->flags &
> XE_VM_FLAGS_64K) && is_vram,
>         };
> @@ -771,7 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct
> xe_vma *vma,
>  
>         if (is_vram) {
>                 xe_walk.default_pte = XE_PPGTT_PTE_LM;
> -               if (vma && vma->use_atomic_access_pte_bit)
> +               if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
>                         xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
>                 xe_walk.dma_offset = vram_region_gpu_offset(bo-
> >ttm.resource);
>                 xe_walk.cache = XE_CACHE_WB;
> @@ -1343,6 +1340,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_engine *e,
>                                            syncs, num_syncs,
>                                            &bind_pt_update.base);
>         if (!IS_ERR(fence)) {
> +               bool last_munmap_rebind = vma->gpuva.flags &
> XE_VMA_LAST_REBIND;
>                 LLIST_HEAD(deferred);
>  
>                 /* TLB invalidation must be done before signaling
> rebind */
> @@ -1359,7 +1357,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_engine *e,
>  
>                 /* add shared fence now for pagetable delayed destroy
> */
>                 dma_resv_add_fence(&vm->resv, fence, !rebind &&
> -                                  vma->last_munmap_rebind ?
> +                                  last_munmap_rebind ?
>                                    DMA_RESV_USAGE_KERNEL :
>                                    DMA_RESV_USAGE_BOOKKEEP);
>  
> @@ -1377,7 +1375,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_engine *e,
>                         up_read(&vm->userptr.notifier_lock);
>                         xe_bo_put_commit(&deferred);
>                 }
> -               if (!rebind && vma->last_munmap_rebind &&
> +               if (!rebind && last_munmap_rebind &&
>                     xe_vm_in_compute_mode(vm))
>                         queue_work(vm->xe->ordered_wq,
>                                    &vm->preempt.rebind_work);
> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> index 54e8a043d353..aaf4b7b851e2 100644
> --- a/drivers/gpu/drm/xe/xe_pt.h
> +++ b/drivers/gpu/drm/xe/xe_pt.h
> @@ -50,5 +50,5 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
>  
>  u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
>                   u64 offset, enum xe_cache_level cache,
> -                 u32 flags, u32 pt_level);
> +                 u32 pt_level);
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 3ea872857b9e..1da61beb1765 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -65,7 +65,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>         lockdep_assert_held(&vm->lock);
>         XE_BUG_ON(!xe_vma_is_userptr(vma));
>  retry:
> -       if (vma->destroyed)
> +       if (vma->gpuva.flags & XE_VMA_DESTROYED)
>                 return 0;
>  
>         notifier_seq = mmu_interval_read_begin(&vma-
> >userptr.notifier);
> @@ -466,7 +466,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct
> ww_acquire_ctx *ww,
>                 xe_bo_assert_held(xe_vma_bo(vma));
>  
>                 list_del_init(&vma->notifier.rebind_link);
> -               if (vma->tile_present && !vma->destroyed)
> +               if (vma->tile_present && !(vma->gpuva.flags &
> XE_VMA_DESTROYED))
>                         list_move_tail(&vma->rebind_link, &vm-
> >rebind_list);
>         }
>         spin_unlock(&vm->notifier.list_lock);
> @@ -609,7 +609,8 @@ static void preempt_rebind_work_func(struct
> work_struct *w)
>                 goto out_unlock;
>  
>         list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> -               if (xe_vma_has_no_bo(vma) || vma->destroyed)
> +               if (xe_vma_has_no_bo(vma) ||
> +                   vma->gpuva.flags & XE_VMA_DESTROYED)
>                         continue;
>  
>                 err = xe_bo_validate(xe_vma_bo(vma), vm, false);
> @@ -723,7 +724,8 @@ static bool vma_userptr_invalidate(struct
> mmu_interval_notifier *mni,
>          * Tell exec and rebind worker they need to repin and rebind
> this
>          * userptr.
>          */
> -       if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma-
> >tile_present) {
> +       if (!xe_vm_in_fault_mode(vm) &&
> +           !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma-
> >tile_present) {
>                 spin_lock(&vm->userptr.invalidated_lock);
>                 list_move_tail(&vma->userptr.invalidate_link,
>                                &vm->userptr.invalidated);
> @@ -828,7 +830,8 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
>  
>  static struct dma_fence *
>  xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
> -              struct xe_sync_entry *syncs, u32 num_syncs);
> +              struct xe_sync_entry *syncs, u32 num_syncs,
> +              bool first_op, bool last_op);
>  
>  struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  {
> @@ -849,7 +852,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm,
> bool rebind_worker)
>                         trace_xe_vma_rebind_worker(vma);
>                 else
>                         trace_xe_vma_rebind_exec(vma);
> -               fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
> +               fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false,
> false);
>                 if (IS_ERR(fence))
>                         return fence;
>         }
> @@ -885,14 +888,14 @@ static struct xe_vma *xe_vma_create(struct
> xe_vm *vm,
>         INIT_LIST_HEAD(&vma->notifier.rebind_link);
>         INIT_LIST_HEAD(&vma->extobj.link);
>  
> -       vma->vm = vm;
> -       vma->start = start;
> -       vma->end = end;
> -       vma->pte_flags = 0;
> +       INIT_LIST_HEAD(&vma->gpuva.gem.entry);
> +       vma->gpuva.mgr = &vm->mgr;
> +       vma->gpuva.va.addr = start;
> +       vma->gpuva.va.range = end - start + 1;
>         if (read_only)
> -               vma->pte_flags |= XE_PTE_FLAG_READ_ONLY;
> +               vma->gpuva.flags |= XE_VMA_READ_ONLY;
>         if (is_null)
> -               vma->pte_flags |= XE_PTE_FLAG_NULL;
> +               vma->gpuva.flags |= DRM_GPUVA_SPARSE;
>  
>         if (tile_mask) {
>                 vma->tile_mask = tile_mask;
> @@ -902,19 +905,21 @@ static struct xe_vma *xe_vma_create(struct
> xe_vm *vm,
>         }
>  
>         if (vm->xe->info.platform == XE_PVC)
> -               vma->use_atomic_access_pte_bit = true;
> +               vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>  
>         if (bo) {
>                 xe_bo_assert_held(bo);
> -               vma->bo_offset = bo_offset_or_userptr;
> -               vma->bo = xe_bo_get(bo);
> -               list_add_tail(&vma->bo_link, &bo->vmas);
> +
> +               drm_gem_object_get(&bo->ttm.base);
> +               vma->gpuva.gem.obj = &bo->ttm.base;
> +               vma->gpuva.gem.offset = bo_offset_or_userptr;
> +               drm_gpuva_link(&vma->gpuva);
>         } else /* userptr or null */ {
>                 if (!is_null) {
>                         u64 size = end - start + 1;
>                         int err;
>  
> -                       vma->userptr.ptr = bo_offset_or_userptr;
> +                       vma->gpuva.gem.offset = bo_offset_or_userptr;
>  
>                         err = mmu_interval_notifier_insert(&vma-
> >userptr.notifier,
>                                                            current-
> >mm,
> @@ -989,9 +994,14 @@ static struct xe_vma *
>  bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
>                             struct xe_vma *ignore)
>  {
> -       struct xe_vma *vma;
> +       struct drm_gpuva *gpuva;
> +       struct drm_gem_object *obj = &bo->ttm.base;
> +
> +       xe_bo_assert_held(bo);
> +
> +       drm_gem_for_each_gpuva(gpuva, obj) {
> +               struct xe_vma *vma = gpuva_to_vma(gpuva);
>  
> -       list_for_each_entry(vma, &bo->vmas, bo_link) {
>                 if (vma != ignore && xe_vma_vm(vma) == vm)
>                         return vma;
>         }
> @@ -1014,6 +1024,8 @@ static bool bo_has_vm_references(struct xe_bo
> *bo, struct xe_vm *vm,
>  
>  static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
>  {
> +       lockdep_assert_held_write(&vm->lock);
> +
>         list_add(&vma->extobj.link, &vm->extobj.list);
>         vm->extobj.entries++;
>  }
> @@ -1047,19 +1059,21 @@ static void xe_vma_destroy(struct xe_vma
> *vma, struct dma_fence *fence)
>         XE_BUG_ON(!list_empty(&vma->unbind_link));
>  
>         if (xe_vma_is_userptr(vma)) {
> -               XE_WARN_ON(!vma->destroyed);
> +               XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
> +
>                 spin_lock(&vm->userptr.invalidated_lock);
>                 list_del_init(&vma->userptr.invalidate_link);
>                 spin_unlock(&vm->userptr.invalidated_lock);
>                 list_del(&vma->userptr_link);
>         } else if (!xe_vma_is_null(vma)) {
>                 xe_bo_assert_held(xe_vma_bo(vma));
> -               list_del(&vma->bo_link);
>  
>                 spin_lock(&vm->notifier.list_lock);
>                 list_del(&vma->notifier.rebind_link);
>                 spin_unlock(&vm->notifier.list_lock);
>  
> +               drm_gpuva_unlink(&vma->gpuva);
> +
>                 if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) {
>                         struct xe_vma *other;
>  
> @@ -1114,78 +1128,64 @@ static void xe_vma_destroy_unlocked(struct
> xe_vma *vma)
>                 xe_bo_put(bo);
>  }
>  
> -static struct xe_vma *to_xe_vma(const struct rb_node *node)
> -{
> -       BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
> -       return (struct xe_vma *)node;
> -}
> -
> -static int xe_vma_cmp(struct xe_vma *a, struct xe_vma *b)
> -{
> -       if (xe_vma_end(a) - 1 < xe_vma_start(b)) {
> -               return -1;
> -       } else if (xe_vma_end(b) - 1 < xe_vma_start(a)) {
> -               return 1;
> -       } else {
> -               return 0;
> -       }
> -}
> -
> -static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node
> *b)
> -{
> -       return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
> -}
> -
> -int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
> -{
> -       struct xe_vma *cmp = to_xe_vma(node);
> -       struct xe_vma *own = (struct xe_vma *)key;
> -
> -       if (xe_vma_start(own) > xe_vma_end(cmp) - 1)
> -               return 1;
> -
> -       if (xe_vma_end(own) - 1 < xe_vma_start(cmp))
> -               return -1;
> -
> -       return 0;
> -}
> -
>  struct xe_vma *
> -xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma)
> +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
>  {
> -       struct rb_node *node;
> +       struct drm_gpuva *gpuva;
>  
>         lockdep_assert_held(&vm->lock);
>  
>         if (xe_vm_is_closed_or_banned(vm))
>                 return NULL;
>  
> -       XE_BUG_ON(xe_vma_end(vma) > vm->size);
> +       XE_BUG_ON(start + range > vm->size);
>  
> -       node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
> +       gpuva = drm_gpuva_find_first(&vm->mgr, start, range);
>  
> -       return node ? to_xe_vma(node) : NULL;
> +       return gpuva ? gpuva_to_vma(gpuva) : NULL;
>  }
>  
> -static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
> +static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
>  {
> +       int err;
> +
>         XE_BUG_ON(xe_vma_vm(vma) != vm);
>         lockdep_assert_held(&vm->lock);
>  
> -       rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
> +       err = drm_gpuva_insert(&vm->mgr, &vma->gpuva);
> +       XE_WARN_ON(err);        /* Shouldn't be possible */
> +
> +       return err;
>  }
>  
> -static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
> +static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma,
> bool remove)
>  {
>         XE_BUG_ON(xe_vma_vm(vma) != vm);
>         lockdep_assert_held(&vm->lock);
>  
> -       rb_erase(&vma->vm_node, &vm->vmas);
> +       if (remove)
> +               drm_gpuva_remove(&vma->gpuva);
>         if (vm->usm.last_fault_vma == vma)
>                 vm->usm.last_fault_vma = NULL;
>  }
>  
> -static void async_op_work_func(struct work_struct *w);
> +static struct drm_gpuva_op *xe_vm_op_alloc(void)
> +{
> +       struct xe_vma_op *op;
> +
> +       op = kzalloc(sizeof(*op), GFP_KERNEL);
> +
> +       if (unlikely(!op))
> +               return NULL;
> +
> +       return &op->base;
> +}
> +
> +static struct drm_gpuva_fn_ops gpuva_ops = {
> +       .op_alloc = xe_vm_op_alloc,
> +};
> +
> +static void xe_vma_op_work_func(struct work_struct *w);
>  static void vm_destroy_work_func(struct work_struct *w);
>  
>  struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> @@ -1205,7 +1205,6 @@ struct xe_vm *xe_vm_create(struct xe_device
> *xe, u32 flags)
>  
>         vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
>  
> -       vm->vmas = RB_ROOT;
>         vm->flags = flags;
>  
>         init_rwsem(&vm->lock);
> @@ -1221,7 +1220,7 @@ struct xe_vm *xe_vm_create(struct xe_device
> *xe, u32 flags)
>         spin_lock_init(&vm->notifier.list_lock);
>  
>         INIT_LIST_HEAD(&vm->async_ops.pending);
> -       INIT_WORK(&vm->async_ops.work, async_op_work_func);
> +       INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
>         spin_lock_init(&vm->async_ops.lock);
>  
>         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
> @@ -1241,6 +1240,8 @@ struct xe_vm *xe_vm_create(struct xe_device
> *xe, u32 flags)
>         if (err)
>                 goto err_put;
>  
> +       drm_gpuva_manager_init(&vm->mgr, "Xe VM", 0, vm->size, 0, 0,
> +                              &gpuva_ops);
>         if (IS_DGFX(xe) && xe->info.vram_flags &
> XE_VRAM_FLAGS_NEED64K)
>                 vm->flags |= XE_VM_FLAGS_64K;
>  
> @@ -1346,6 +1347,7 @@ struct xe_vm *xe_vm_create(struct xe_device
> *xe, u32 flags)
>                         xe_pt_destroy(vm->pt_root[id], vm->flags,
> NULL);
>         }
>         dma_resv_unlock(&vm->resv);
> +       drm_gpuva_manager_destroy(&vm->mgr);
>  err_put:
>         dma_resv_fini(&vm->resv);
>         kfree(vm);
> @@ -1395,14 +1397,19 @@ static void vm_error_capture(struct xe_vm
> *vm, int err,
>  
>  void xe_vm_close_and_put(struct xe_vm *vm)
>  {
> -       struct rb_root contested = RB_ROOT;
> +       struct list_head contested;
>         struct ww_acquire_ctx ww;
>         struct xe_device *xe = vm->xe;
>         struct xe_tile *tile;
> +       struct xe_vma *vma, *next_vma;
> +       struct drm_gpuva *gpuva;
>         u8 id;
> +       DRM_GPUVA_ITER(it, &vm->mgr, 0);
>  
>         XE_BUG_ON(vm->preempt.num_engines);
>  
> +       INIT_LIST_HEAD(&contested);
> +
>         vm->size = 0;
>         smp_mb();
>         flush_async_ops(vm);
> @@ -1419,16 +1426,17 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  
>         down_write(&vm->lock);
>         xe_vm_lock(vm, &ww, 0, false);
> -       while (vm->vmas.rb_node) {
> -               struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
> +       drm_gpuva_iter_for_each(gpuva, it) {
> +               vma = gpuva_to_vma(gpuva);
>  
>                 if (xe_vma_has_no_bo(vma)) {
>                         down_read(&vm->userptr.notifier_lock);
> -                       vma->destroyed = true;
> +                       vma->gpuva.flags |= XE_VMA_DESTROYED;
>                         up_read(&vm->userptr.notifier_lock);
>                 }
>  
> -               rb_erase(&vma->vm_node, &vm->vmas);
> +               xe_vm_remove_vma(vm, vma, false);
> +               drm_gpuva_iter_remove(&it);
>  
>                 /* easy case, remove from VMA? */
>                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
> @@ -1436,7 +1444,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>                         continue;
>                 }
>  
> -               rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
> +               list_add_tail(&vma->unbind_link, &contested);
>         }
>  
>         /*
> @@ -1459,19 +1467,14 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>         }
>         xe_vm_unlock(vm, &ww);
>  
> -       if (contested.rb_node) {
> -
> -               /*
> -                * VM is now dead, cannot re-add nodes to vm->vmas if
> it's NULL
> -                * Since we hold a refcount to the bo, we can remove
> and free
> -                * the members safely without locking.
> -                */
> -               while (contested.rb_node) {
> -                       struct xe_vma *vma =
> to_xe_vma(contested.rb_node);
> -
> -                       rb_erase(&vma->vm_node, &contested);
> -                       xe_vma_destroy_unlocked(vma);
> -               }
> +       /*
> +        * VM is now dead, cannot re-add nodes to vm->vmas if it's
> NULL
> +        * Since we hold a refcount to the bo, we can remove and free
> +        * the members safely without locking.
> +        */
> +       list_for_each_entry_safe(vma, next_vma, &contested,
> unbind_link) {
> +               list_del_init(&vma->unbind_link);
> +               xe_vma_destroy_unlocked(vma);
>         }
>  
>         if (vm->async_ops.error_capture.addr)
> @@ -1480,6 +1483,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>         XE_WARN_ON(!list_empty(&vm->extobj.list));
>         up_write(&vm->lock);
>  
> +       drm_gpuva_manager_destroy(&vm->mgr);
> +
>         mutex_lock(&xe->usm.lock);
>         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
>                 xe->usm.num_vm_in_fault_mode--;
> @@ -1565,7 +1570,8 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm,
> struct xe_tile *tile)
>  
>  static struct dma_fence *
>  xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
> -                struct xe_sync_entry *syncs, u32 num_syncs)
> +                struct xe_sync_entry *syncs, u32 num_syncs,
> +                bool first_op, bool last_op)
>  {
>         struct xe_tile *tile;
>         struct dma_fence *fence = NULL;
> @@ -1590,7 +1596,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_engine *e,
>                 if (!(vma->tile_present & BIT(id)))
>                         goto next;
>  
> -               fence = __xe_pt_unbind_vma(tile, vma, e, syncs,
> num_syncs);
> +               fence = __xe_pt_unbind_vma(tile, vma, e, first_op ?
> syncs : NULL,
> +                                          first_op ? num_syncs : 0);
>                 if (IS_ERR(fence)) {
>                         err = PTR_ERR(fence);
>                         goto err_fences;
> @@ -1616,8 +1623,11 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_engine *e,
>                 }
>         }
>  
> -       for (i = 0; i < num_syncs; i++)
> -               xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base
> : fence);
> +       if (last_op) {
> +               for (i = 0; i < num_syncs; i++)
> +                       xe_sync_entry_signal(&syncs[i], NULL,
> +                                            cf ? &cf->base : fence);
> +       }
>  
>         return cf ? &cf->base : !fence ? dma_fence_get_stub() :
> fence;
>  
> @@ -1635,7 +1645,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_engine *e,
>  
>  static struct dma_fence *
>  xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
> -              struct xe_sync_entry *syncs, u32 num_syncs)
> +              struct xe_sync_entry *syncs, u32 num_syncs,
> +              bool first_op, bool last_op)
>  {
>         struct xe_tile *tile;
>         struct dma_fence *fence;
> @@ -1660,7 +1671,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_engine *e,
>                 if (!(vma->tile_mask & BIT(id)))
>                         goto next;
>  
> -               fence = __xe_pt_bind_vma(tile, vma, e, syncs,
> num_syncs,
> +               fence = __xe_pt_bind_vma(tile, vma, e, first_op ?
> syncs : NULL,
> +                                        first_op ? num_syncs : 0,
>                                          vma->tile_present &
> BIT(id));
>                 if (IS_ERR(fence)) {
>                         err = PTR_ERR(fence);
> @@ -1687,8 +1699,11 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_engine *e,
>                 }
>         }
>  
> -       for (i = 0; i < num_syncs; i++)
> -               xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base
> : fence);
> +       if (last_op) {
> +               for (i = 0; i < num_syncs; i++)
> +                       xe_sync_entry_signal(&syncs[i], NULL,
> +                                            cf ? &cf->base : fence);
> +       }
>  
>         return cf ? &cf->base : fence;
>  
> @@ -1786,15 +1801,29 @@ int xe_vm_async_fence_wait_start(struct
> dma_fence *fence)
>  
>  static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>                         struct xe_engine *e, struct xe_sync_entry
> *syncs,
> -                       u32 num_syncs, struct async_op_fence *afence)
> +                       u32 num_syncs, struct async_op_fence *afence,
> +                       bool immediate, bool first_op, bool last_op)
>  {
>         struct dma_fence *fence;
>  
>         xe_vm_assert_held(vm);
>  
> -       fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
> -       if (IS_ERR(fence))
> -               return PTR_ERR(fence);
> +       if (immediate) {
> +               fence = xe_vm_bind_vma(vma, e, syncs, num_syncs,
> first_op,
> +                                      last_op);
> +               if (IS_ERR(fence))
> +                       return PTR_ERR(fence);
> +       } else {
> +               int i;
> +
> +               XE_BUG_ON(!xe_vm_in_fault_mode(vm));
> +
> +               fence = dma_fence_get_stub();
> +               if (last_op) {
> +                       for (i = 0; i < num_syncs; i++)
> +                               xe_sync_entry_signal(&syncs[i], NULL,
> fence);
> +               }
> +       }
>         if (afence)
>                 add_async_op_fence_cb(vm, fence, afence);
>  
> @@ -1804,32 +1833,35 @@ static int __xe_vm_bind(struct xe_vm *vm,
> struct xe_vma *vma,
>  
>  static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> xe_engine *e,
>                       struct xe_bo *bo, struct xe_sync_entry *syncs,
> -                     u32 num_syncs, struct async_op_fence *afence)
> +                     u32 num_syncs, struct async_op_fence *afence,
> +                     bool immediate, bool first_op, bool last_op)
>  {
>         int err;
>  
>         xe_vm_assert_held(vm);
>         xe_bo_assert_held(bo);
>  
> -       if (bo) {
> +       if (bo && immediate) {
>                 err = xe_bo_validate(bo, vm, true);
>                 if (err)
>                         return err;
>         }
>  
> -       return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
> +       return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence,
> immediate,
> +                           first_op, last_op);
>  }
>  
>  static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
>                         struct xe_engine *e, struct xe_sync_entry
> *syncs,
> -                       u32 num_syncs, struct async_op_fence *afence)
> +                       u32 num_syncs, struct async_op_fence *afence,
> +                       bool first_op, bool last_op)
>  {
>         struct dma_fence *fence;
>  
>         xe_vm_assert_held(vm);
>         xe_bo_assert_held(xe_vma_bo(vma));
>  
> -       fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
> +       fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op,
> last_op);
>         if (IS_ERR(fence))
>                 return PTR_ERR(fence);
>         if (afence)
> @@ -2059,7 +2091,8 @@ static const u32 region_to_mem_type[] = {
>  static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>                           struct xe_engine *e, u32 region,
>                           struct xe_sync_entry *syncs, u32 num_syncs,
> -                         struct async_op_fence *afence)
> +                         struct async_op_fence *afence, bool
> first_op,
> +                         bool last_op)
>  {
>         int err;
>  
> @@ -2073,14 +2106,16 @@ static int xe_vm_prefetch(struct xe_vm *vm,
> struct xe_vma *vma,
>  
>         if (vma->tile_mask != (vma->tile_present & ~vma-
> >usm.tile_invalidated)) {
>                 return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs,
> num_syncs,
> -                                 afence);
> +                                 afence, true, first_op, last_op);
>         } else {
>                 int i;
>  
>                 /* Nothing to do, signal fences now */
> -               for (i = 0; i < num_syncs; i++)
> -                       xe_sync_entry_signal(&syncs[i], NULL,
> -                                            dma_fence_get_stub());
> +               if (last_op) {
> +                       for (i = 0; i < num_syncs; i++)
> +                               xe_sync_entry_signal(&syncs[i], NULL,
> +                                                   
> dma_fence_get_stub());
> +               }
>                 if (afence)
>                         dma_fence_signal(&afence->fence);
>                 return 0;
> @@ -2089,29 +2124,6 @@ static int xe_vm_prefetch(struct xe_vm *vm,
> struct xe_vma *vma,
>  
>  #define VM_BIND_OP(op) (op & 0xffff)
>  
> -static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
> -                          struct xe_engine *e, struct xe_bo *bo, u32
> op,
> -                          u32 region, struct xe_sync_entry *syncs,
> -                          u32 num_syncs, struct async_op_fence
> *afence)
> -{
> -       switch (VM_BIND_OP(op)) {
> -       case XE_VM_BIND_OP_MAP:
> -               return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs,
> afence);
> -       case XE_VM_BIND_OP_UNMAP:
> -       case XE_VM_BIND_OP_UNMAP_ALL:
> -               return xe_vm_unbind(vm, vma, e, syncs, num_syncs,
> afence);
> -       case XE_VM_BIND_OP_MAP_USERPTR:
> -               return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs,
> afence);
> -       case XE_VM_BIND_OP_PREFETCH:
> -               return xe_vm_prefetch(vm, vma, e, region, syncs,
> num_syncs,
> -                                     afence);
> -               break;
> -       default:
> -               XE_BUG_ON("NOT POSSIBLE");
> -               return -EINVAL;
> -       }
> -}
> -
>  struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
>  {
>         int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
> @@ -2127,810 +2139,840 @@ static void xe_vm_tv_populate(struct xe_vm
> *vm, struct ttm_validate_buffer *tv)
>         tv->bo = xe_vm_ttm_bo(vm);
>  }
>  
> -static bool is_map_op(u32 op)
> -{
> -       return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
> -               VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
> -}
> -
> -static bool is_unmap_op(u32 op)
> +static void vm_set_async_error(struct xe_vm *vm, int err)
>  {
> -       return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
> -               VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
> +       lockdep_assert_held(&vm->lock);
> +       vm->async_ops.error = err;
>  }
>  
> -static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
> -                        struct xe_engine *e, struct xe_bo *bo,
> -                        struct drm_xe_vm_bind_op *bind_op,
> -                        struct xe_sync_entry *syncs, u32 num_syncs,
> -                        struct async_op_fence *afence)
> +static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo
> *bo,
> +                                   u64 addr, u64 range, u32 op)
>  {
> -       LIST_HEAD(objs);
> -       LIST_HEAD(dups);
> -       struct ttm_validate_buffer tv_bo, tv_vm;
> -       struct ww_acquire_ctx ww;
> -       struct xe_bo *vbo;
> -       int err, i;
> +       struct xe_device *xe = vm->xe;
> +       struct xe_vma *vma;
> +       bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
>  
>         lockdep_assert_held(&vm->lock);
> -       XE_BUG_ON(!list_empty(&vma->unbind_link));
> -
> -       /* Binds deferred to faults, signal fences now */
> -       if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
> -           !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
> -               for (i = 0; i < num_syncs; i++)
> -                       xe_sync_entry_signal(&syncs[i], NULL,
> -                                            dma_fence_get_stub());
> -               if (afence)
> -                       dma_fence_signal(&afence->fence);
> -               return 0;
> -       }
> -
> -       xe_vm_tv_populate(vm, &tv_vm);
> -       list_add_tail(&tv_vm.head, &objs);
> -       vbo = xe_vma_bo(vma);
> -       if (vbo) {
> -               /*
> -                * An unbind can drop the last reference to the BO
> and
> -                * the BO is needed for ttm_eu_backoff_reservation so
> -                * take a reference here.
> -                */
> -               xe_bo_get(vbo);
> -
> -               if (!vbo->vm) {
> -                       tv_bo.bo = &vbo->ttm;
> -                       tv_bo.num_shared = 1;
> -                       list_add(&tv_bo.head, &objs);
> -               }
> -       }
>  
> -again:
> -       err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
> -       if (!err) {
> -               err = __vm_bind_ioctl(vm, vma, e, bo,
> -                                     bind_op->op, bind_op->region,
> syncs,
> -                                     num_syncs, afence);
> -               ttm_eu_backoff_reservation(&ww, &objs);
> -               if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
> -                       lockdep_assert_held_write(&vm->lock);
> -                       err = xe_vma_userptr_pin_pages(vma);
> -                       if (!err)
> -                               goto again;
> -               }
> +       switch (VM_BIND_OP(op)) {
> +       case XE_VM_BIND_OP_MAP:
> +       case XE_VM_BIND_OP_MAP_USERPTR:
> +               vma = xe_vm_find_overlapping_vma(vm, addr, range);
> +               if (XE_IOCTL_ERR(xe, vma && !async))
> +                       return -EBUSY;
> +               break;
> +       case XE_VM_BIND_OP_UNMAP:
> +       case XE_VM_BIND_OP_PREFETCH:
> +               vma = xe_vm_find_overlapping_vma(vm, addr, range);
> +               if (XE_IOCTL_ERR(xe, !vma))
> +                       return -ENODATA;        /* Not an actual
> error, IOCTL
> +                                                  cleans up returns
> and 0 */
> +               if (XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
> +                                xe_vma_end(vma) != addr + range) &&
> !async))
> +                       return -EINVAL;
> +               break;
> +       case XE_VM_BIND_OP_UNMAP_ALL:
> +               if (XE_IOCTL_ERR(xe, list_empty(&bo-
> >ttm.base.gpuva.list)))
> +                       return -ENODATA;        /* Not an actual
> error, IOCTL
> +                                                  cleans up returns
> and 0 */
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
> +               return -EINVAL;
>         }
> -       xe_bo_put(vbo);
>  
> -       return err;
> +       return 0;
>  }
>  
> -struct async_op {
> -       struct xe_vma *vma;
> -       struct xe_engine *engine;
> -       struct xe_bo *bo;
> -       struct drm_xe_vm_bind_op bind_op;
> -       struct xe_sync_entry *syncs;
> -       u32 num_syncs;
> -       struct list_head link;
> -       struct async_op_fence *fence;
> -};
> -
> -static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
> +static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
> +                            bool post_commit)
>  {
> -       while (op->num_syncs--)
> -               xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
> -       kfree(op->syncs);
> -       xe_bo_put(op->bo);
> -       if (op->engine)
> -               xe_engine_put(op->engine);
> -       xe_vm_put(vm);
> -       if (op->fence)
> -               dma_fence_put(&op->fence->fence);
> -       kfree(op);
> +       down_read(&vm->userptr.notifier_lock);
> +       vma->gpuva.flags |= XE_VMA_DESTROYED;
> +       up_read(&vm->userptr.notifier_lock);
> +       if (post_commit)
> +               xe_vm_remove_vma(vm, vma, true);
>  }
>  
> -static struct async_op *next_async_op(struct xe_vm *vm)
> +#undef ULL
> +#define ULL    unsigned long long
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
> +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
>  {
> -       return list_first_entry_or_null(&vm->async_ops.pending,
> -                                       struct async_op, link);
> -}
> +       struct xe_vma *vma;
>  
> -static void vm_set_async_error(struct xe_vm *vm, int err)
> +       switch (op->op) {
> +       case DRM_GPUVA_OP_MAP:
> +               vm_dbg(&xe->drm, "MAP: addr=0x%016llx,
> range=0x%016llx",
> +                      (ULL)op->map.va.addr, (ULL)op->map.va.range);
> +               break;
> +       case DRM_GPUVA_OP_REMAP:
> +               vma = gpuva_to_vma(op->remap.unmap->va);
> +               vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx,
> range=0x%016llx, keep=%d",
> +                      (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
> +                      op->unmap.keep ? 1 : 0);
> +               if (op->remap.prev)
> +                       vm_dbg(&xe->drm,
> +                              "REMAP:PREV: addr=0x%016llx,
> range=0x%016llx",
> +                              (ULL)op->remap.prev->va.addr,
> +                              (ULL)op->remap.prev->va.range);
> +               if (op->remap.next)
> +                       vm_dbg(&xe->drm,
> +                              "REMAP:NEXT: addr=0x%016llx,
> range=0x%016llx",
> +                              (ULL)op->remap.next->va.addr,
> +                              (ULL)op->remap.next->va.range);
> +               break;
> +       case DRM_GPUVA_OP_UNMAP:
> +               vma = gpuva_to_vma(op->unmap.va);
> +               vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx,
> range=0x%016llx, keep=%d",
> +                      (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
> +                      op->unmap.keep ? 1 : 0);
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
> +       }
> +}
> +#else
> +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
>  {
> -       lockdep_assert_held(&vm->lock);
> -       vm->async_ops.error = err;
>  }
> +#endif
>  
> -static void async_op_work_func(struct work_struct *w)
> +/*
> + * Create operations list from IOCTL arguments, setup operations
> fields so parse
> + * and commit steps are decoupled from IOCTL arguments. This step
> can fail.
> + */
> +static struct drm_gpuva_ops *
> +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> +                        u64 bo_offset_or_userptr, u64 addr, u64
> range,
> +                        u32 operation, u64 tile_mask, u32 region)
>  {
> -       struct xe_vm *vm = container_of(w, struct xe_vm,
> async_ops.work);
> +       struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
> +       struct ww_acquire_ctx ww;
> +       struct drm_gpuva_ops *ops;
> +       struct drm_gpuva_op *__op;
> +       struct xe_vma_op *op;
> +       int err;
>  
> -       for (;;) {
> -               struct async_op *op;
> -               int err;
> +       lockdep_assert_held_write(&vm->lock);
>  
> -               if (vm->async_ops.error && !xe_vm_is_closed(vm))
> -                       break;
> +       vm_dbg(&vm->xe->drm,
> +              "op=%d, addr=0x%016llx, range=0x%016llx,
> bo_offset_or_userptr=0x%016llx",
> +              VM_BIND_OP(operation), (ULL)addr, (ULL)range,
> +              (ULL)bo_offset_or_userptr);
>  
> -               spin_lock_irq(&vm->async_ops.lock);
> -               op = next_async_op(vm);
> -               if (op)
> -                       list_del_init(&op->link);
> -               spin_unlock_irq(&vm->async_ops.lock);
> +       switch (VM_BIND_OP(operation)) {
> +       case XE_VM_BIND_OP_MAP:
> +       case XE_VM_BIND_OP_MAP_USERPTR:
> +               ops = drm_gpuva_sm_map_ops_create(&vm->mgr, addr,
> range,
> +                                                 obj,
> bo_offset_or_userptr);
> +               if (IS_ERR(ops))
> +                       return ops;
>  
> -               if (!op)
> -                       break;
> +               drm_gpuva_for_each_op(__op, ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
>  
> -               if (!xe_vm_is_closed(vm)) {
> -                       bool first, last;
> +                       op->tile_mask = tile_mask;
> +                       op->map.immediate =
> +                               operation &
> XE_VM_BIND_FLAG_IMMEDIATE;
> +                       op->map.read_only =
> +                               operation & XE_VM_BIND_FLAG_READONLY;
> +                       op->map.is_null = operation &
> XE_VM_BIND_FLAG_NULL;
> +               }
> +               break;
> +       case XE_VM_BIND_OP_UNMAP:
> +               ops = drm_gpuva_sm_unmap_ops_create(&vm->mgr, addr,
> range);
> +               if (IS_ERR(ops))
> +                       return ops;
>  
> -                       down_write(&vm->lock);
> -again:
> -                       first = op->vma->first_munmap_rebind;
> -                       last = op->vma->last_munmap_rebind;
> -#ifdef TEST_VM_ASYNC_OPS_ERROR
> -#define FORCE_ASYNC_OP_ERROR   BIT(31)
> -                       if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR))
> {
> -                               err = vm_bind_ioctl(vm, op->vma, op-
> >engine,
> -                                                   op->bo, &op-
> >bind_op,
> -                                                   op->syncs, op-
> >num_syncs,
> -                                                   op->fence);
> -                       } else {
> -                               err = -ENOMEM;
> -                               op->bind_op.op &=
> ~FORCE_ASYNC_OP_ERROR;
> -                       }
> -#else
> -                       err = vm_bind_ioctl(vm, op->vma, op->engine,
> op->bo,
> -                                           &op->bind_op, op->syncs,
> -                                           op->num_syncs, op-
> >fence);
> -#endif
> -                       /*
> -                        * In order for the fencing to work (stall
> behind
> -                        * existing jobs / prevent new jobs from
> running) all
> -                        * the dma-resv slots need to be programmed
> in a batch
> -                        * relative to execs / the rebind worker. The
> vm->lock
> -                        * ensure this.
> -                        */
> -                       if (!err && ((first && VM_BIND_OP(op-
> >bind_op.op) ==
> -                                     XE_VM_BIND_OP_UNMAP) ||
> -                                    vm-
> >async_ops.munmap_rebind_inflight)) {
> -                               if (last) {
> -                                       op->vma->last_munmap_rebind =
> false;
> -                                       vm-
> >async_ops.munmap_rebind_inflight =
> -                                               false;
> -                               } else {
> -                                       vm-
> >async_ops.munmap_rebind_inflight =
> -                                               true;
> -
> -                                       async_op_cleanup(vm, op);
> -
> -                                       spin_lock_irq(&vm-
> >async_ops.lock);
> -                                       op = next_async_op(vm);
> -                                       XE_BUG_ON(!op);
> -                                       list_del_init(&op->link);
> -                                       spin_unlock_irq(&vm-
> >async_ops.lock);
> -
> -                                       goto again;
> -                               }
> -                       }
> -                       if (err) {
> -                               trace_xe_vma_fail(op->vma);
> -                               drm_warn(&vm->xe->drm, "Async VM
> op(%d) failed with %d",
> -                                        VM_BIND_OP(op->bind_op.op),
> -                                        err);
> +               drm_gpuva_for_each_op(__op, ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
>  
> -                               spin_lock_irq(&vm->async_ops.lock);
> -                               list_add(&op->link, &vm-
> >async_ops.pending);
> -                               spin_unlock_irq(&vm->async_ops.lock);
> +                       op->tile_mask = tile_mask;
> +               }
> +               break;
> +       case XE_VM_BIND_OP_PREFETCH:
> +               ops = drm_gpuva_prefetch_ops_create(&vm->mgr, addr,
> range);
> +               if (IS_ERR(ops))
> +                       return ops;
>  
> -                               vm_set_async_error(vm, err);
> -                               up_write(&vm->lock);
> +               drm_gpuva_for_each_op(__op, ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
>  
> -                               if (vm->async_ops.error_capture.addr)
> -                                       vm_error_capture(vm, err,
> -                                                        op-
> >bind_op.op,
> -                                                        op-
> >bind_op.addr,
> -                                                        op-
> >bind_op.range);
> -                               break;
> -                       }
> -                       up_write(&vm->lock);
> -               } else {
> -                       trace_xe_vma_flush(op->vma);
> +                       op->tile_mask = tile_mask;
> +                       op->prefetch.region = region;
> +               }
> +               break;
> +       case XE_VM_BIND_OP_UNMAP_ALL:
> +               XE_BUG_ON(!bo);
>  
> -                       if (is_unmap_op(op->bind_op.op)) {
> -                               down_write(&vm->lock);
> -                               xe_vma_destroy_unlocked(op->vma);
> -                               up_write(&vm->lock);
> -                       }
> +               err = xe_bo_lock(bo, &ww, 0, true);
> +               if (err)
> +                       return ERR_PTR(err);
> +               ops = drm_gpuva_gem_unmap_ops_create(&vm->mgr, obj);
> +               xe_bo_unlock(bo, &ww);
> +               if (IS_ERR(ops))
> +                       return ops;
>  
> -                       if (op->fence &&
> !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> -                                                  &op->fence-
> >fence.flags)) {
> -                               if (!xe_vm_no_dma_fences(vm)) {
> -                                       op->fence->started = true;
> -                                       smp_wmb();
> -                                       wake_up_all(&op->fence->wq);
> -                               }
> -                               dma_fence_signal(&op->fence->fence);
> -                       }
> +               drm_gpuva_for_each_op(__op, ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
> +
> +                       op->tile_mask = tile_mask;
>                 }
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
> +               ops = ERR_PTR(-EINVAL);
> +       }
>  
> -               async_op_cleanup(vm, op);
> +#ifdef TEST_VM_ASYNC_OPS_ERROR
> +       if (operation & FORCE_ASYNC_OP_ERROR) {
> +               op = list_first_entry_or_null(&ops->list, struct
> xe_vma_op,
> +                                             base.entry);
> +               if (op)
> +                       op->inject_error = true;
>         }
> +#endif
> +
> +       if (!IS_ERR(ops))
> +               drm_gpuva_for_each_op(__op, ops)
> +                       print_op(vm->xe, __op);
> +
> +       return ops;
>  }
>  
> -static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma
> *vma,
> -                                struct xe_engine *e, struct xe_bo
> *bo,
> -                                struct drm_xe_vm_bind_op *bind_op,
> -                                struct xe_sync_entry *syncs, u32
> num_syncs)
> +static struct xe_vma *new_vma(struct xe_vm *vm, struct
> drm_gpuva_op_map *op,
> +                             u64 tile_mask, bool read_only, bool
> is_null)
>  {
> -       struct async_op *op;
> -       bool installed = false;
> -       u64 seqno;
> -       int i;
> +       struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) :
> NULL;
> +       struct xe_vma *vma;
> +       struct ww_acquire_ctx ww;
> +       int err;
>  
> -       lockdep_assert_held(&vm->lock);
> +       lockdep_assert_held_write(&vm->lock);
>  
> -       op = kmalloc(sizeof(*op), GFP_KERNEL);
> -       if (!op) {
> -               return -ENOMEM;
> +       if (bo) {
> +               err = xe_bo_lock(bo, &ww, 0, true);
> +               if (err)
> +                       return ERR_PTR(err);
>         }
> +       vma = xe_vma_create(vm, bo, op->gem.offset,
> +                           op->va.addr, op->va.addr +
> +                           op->va.range - 1, read_only, is_null,
> +                           tile_mask);
> +       if (bo)
> +               xe_bo_unlock(bo, &ww);
>  
> -       if (num_syncs) {
> -               op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
> -               if (!op->fence) {
> -                       kfree(op);
> -                       return -ENOMEM;
> -               }
> +       if (xe_vma_is_userptr(vma)) {
> +               err = xe_vma_userptr_pin_pages(vma);
> +               if (err) {
> +                       prep_vma_destroy(vm, vma, false);
> +                       xe_vma_destroy_unlocked(vma);
> +                       return ERR_PTR(err);
> +               }
> +       } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
> +               vm_insert_extobj(vm, vma);
> +               err = add_preempt_fences(vm, bo);
> +               if (err) {
> +                       prep_vma_destroy(vm, vma, false);
> +                       xe_vma_destroy_unlocked(vma);
> +                       return ERR_PTR(err);
> +               }
> +       }
> +
> +       return vma;
> +}
> +
> +/*
> + * Parse operations list and create any resources needed for the
> operations
> + * prior to fully committing to the operations. This setup can fail.
> + */
> +static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct
> xe_engine *e,
> +                                  struct drm_gpuva_ops **ops, int
> num_ops_list,
> +                                  struct xe_sync_entry *syncs, u32
> num_syncs,
> +                                  struct list_head *ops_list, bool
> async)
> +{
> +       struct xe_vma_op *last_op = NULL;
> +       struct list_head *async_list = NULL;
> +       struct async_op_fence *fence = NULL;
> +       int err, i;
> +
> +       lockdep_assert_held_write(&vm->lock);
> +       XE_BUG_ON(num_ops_list > 1 && !async);
> +
> +       if (num_syncs && async) {
> +               u64 seqno;
> +
> +               fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> +               if (!fence)
> +                       return -ENOMEM;
>  
>                 seqno = e ? ++e->bind.fence_seqno : ++vm-
> >async_ops.fence.seqno;
> -               dma_fence_init(&op->fence->fence,
> &async_op_fence_ops,
> +               dma_fence_init(&fence->fence, &async_op_fence_ops,
>                                &vm->async_ops.lock, e ? e-
> >bind.fence_ctx :
>                                vm->async_ops.fence.context, seqno);
>  
>                 if (!xe_vm_no_dma_fences(vm)) {
> -                       op->fence->vm = vm;
> -                       op->fence->started = false;
> -                       init_waitqueue_head(&op->fence->wq);
> +                       fence->vm = vm;
> +                       fence->started = false;
> +                       init_waitqueue_head(&fence->wq);
>                 }
> -       } else {
> -               op->fence = NULL;
>         }
> -       op->vma = vma;
> -       op->engine = e;
> -       op->bo = bo;
> -       op->bind_op = *bind_op;
> -       op->syncs = syncs;
> -       op->num_syncs = num_syncs;
> -       INIT_LIST_HEAD(&op->link);
> -
> -       for (i = 0; i < num_syncs; i++)
> -               installed |= xe_sync_entry_signal(&syncs[i], NULL,
> -                                                 &op->fence->fence);
>  
> -       if (!installed && op->fence)
> -               dma_fence_signal(&op->fence->fence);
> +       for (i = 0; i < num_ops_list; ++i) {
> +               struct drm_gpuva_ops *__ops = ops[i];
> +               struct drm_gpuva_op *__op;
>  
> -       spin_lock_irq(&vm->async_ops.lock);
> -       list_add_tail(&op->link, &vm->async_ops.pending);
> -       spin_unlock_irq(&vm->async_ops.lock);
> +               drm_gpuva_for_each_op(__op, __ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
> +                       bool first = !async_list;
>  
> -       if (!vm->async_ops.error)
> -               queue_work(system_unbound_wq, &vm->async_ops.work);
> +                       XE_BUG_ON(!first && !async);
>  
> -       return 0;
> -}
> +                       INIT_LIST_HEAD(&op->link);
> +                       if (first)
> +                               async_list = ops_list;
> +                       list_add_tail(&op->link, async_list);
>  
> -static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
> -                              struct xe_engine *e, struct xe_bo *bo,
> -                              struct drm_xe_vm_bind_op *bind_op,
> -                              struct xe_sync_entry *syncs, u32
> num_syncs)
> -{
> -       struct xe_vma *__vma, *next;
> -       struct list_head rebind_list;
> -       struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
> -       u32 num_in_syncs = 0, num_out_syncs = 0;
> -       bool first = true, last;
> -       int err;
> -       int i;
> +                       if (first) {
> +                               op->flags |= XE_VMA_OP_FIRST;
> +                               op->num_syncs = num_syncs;
> +                               op->syncs = syncs;
> +                       }
>  
> -       lockdep_assert_held(&vm->lock);
> +                       op->engine = e;
>  
> -       /* Not a linked list of unbinds + rebinds, easy */
> -       if (list_empty(&vma->unbind_link))
> -               return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
> -                                            syncs, num_syncs);
> +                       switch (op->base.op) {
> +                       case DRM_GPUVA_OP_MAP:
> +                       {
> +                               struct xe_vma *vma;
>  
> -       /*
> -        * Linked list of unbinds + rebinds, decompose syncs into 'in
> / out'
> -        * passing the 'in' to the first operation and 'out' to the
> last. Also
> -        * the reference counting is a little tricky, increment the
> VM / bind
> -        * engine ref count on all but the last operation and
> increment the BOs
> -        * ref count on each rebind.
> -        */
> +                               vma = new_vma(vm, &op->base.map,
> +                                             op->tile_mask, op-
> >map.read_only,
> +                                             op->map.is_null);
> +                               if (IS_ERR(vma)) {
> +                                       err = PTR_ERR(vma);
> +                                       goto free_fence;
> +                               }
>  
> -       XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
> -                 VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL
> &&
> -                 VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
> +                               op->map.vma = vma;
> +                               break;
> +                       }
> +                       case DRM_GPUVA_OP_REMAP:
> +                               if (op->base.remap.prev) {
> +                                       struct xe_vma *vma;
> +                                       bool read_only =
> +                                               op->base.remap.unmap-
> >va->flags &
> +                                               XE_VMA_READ_ONLY;
> +                                       bool is_null =
> +                                               op->base.remap.unmap-
> >va->flags &
> +                                               DRM_GPUVA_SPARSE;
> +
> +                                       vma = new_vma(vm, op-
> >base.remap.prev,
> +                                                     op->tile_mask,
> read_only,
> +                                                     is_null);
> +                                       if (IS_ERR(vma)) {
> +                                               err = PTR_ERR(vma);
> +                                               goto free_fence;
> +                                       }
> +
> +                                       op->remap.prev = vma;
> +                               }
>  
> -       /* Decompose syncs */
> -       if (num_syncs) {
> -               in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs,
> GFP_KERNEL);
> -               out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs,
> GFP_KERNEL);
> -               if (!in_syncs || !out_syncs) {
> -                       err = -ENOMEM;
> -                       goto out_error;
> -               }
> +                               if (op->base.remap.next) {
> +                                       struct xe_vma *vma;
> +                                       bool read_only =
> +                                               op->base.remap.unmap-
> >va->flags &
> +                                               XE_VMA_READ_ONLY;
> +
> +                                       bool is_null =
> +                                               op->base.remap.unmap-
> >va->flags &
> +                                               DRM_GPUVA_SPARSE;
> +
> +                                       vma = new_vma(vm, op-
> >base.remap.next,
> +                                                     op->tile_mask,
> read_only,
> +                                                     is_null);
> +                                       if (IS_ERR(vma)) {
> +                                               err = PTR_ERR(vma);
> +                                               goto free_fence;
> +                                       }
> +
> +                                       op->remap.next = vma;
> +                               }
>  
> -               for (i = 0; i < num_syncs; ++i) {
> -                       bool signal = syncs[i].flags &
> DRM_XE_SYNC_SIGNAL;
> +                               /* XXX: Support no doing remaps */
> +                               op->remap.start =
> +                                       xe_vma_start(gpuva_to_vma(op-
> >base.remap.unmap->va));
> +                               op->remap.range =
> +                                       xe_vma_size(gpuva_to_vma(op-
> >base.remap.unmap->va));
> +                               break;
> +                       case DRM_GPUVA_OP_UNMAP:
> +                               op->unmap.start =
> +                                       xe_vma_start(gpuva_to_vma(op-
> >base.unmap.va));
> +                               op->unmap.range =
> +                                       xe_vma_size(gpuva_to_vma(op-
> >base.unmap.va));
> +                               break;
> +                       case DRM_GPUVA_OP_PREFETCH:
> +                               /* Nothing to do */
> +                               break;
> +                       default:
> +                               XE_BUG_ON("NOT POSSIBLE");
> +                       }
>  
> -                       if (signal)
> -                               out_syncs[num_out_syncs++] =
> syncs[i];
> -                       else
> -                               in_syncs[num_in_syncs++] = syncs[i];
> +                       last_op = op;
>                 }
> -       }
>  
> -       /* Do unbinds + move rebinds to new list */
> -       INIT_LIST_HEAD(&rebind_list);
> -       list_for_each_entry_safe(__vma, next, &vma->unbind_link,
> unbind_link) {
> -               if (__vma->destroyed ||
> -                   VM_BIND_OP(bind_op->op) ==
> XE_VM_BIND_OP_PREFETCH) {
> -                       list_del_init(&__vma->unbind_link);
> -                       xe_bo_get(bo);
> -                       err = __vm_bind_ioctl_async(xe_vm_get(vm),
> __vma,
> -                                                   e ?
> xe_engine_get(e) : NULL,
> -                                                   bo, bind_op,
> first ?
> -                                                   in_syncs : NULL,
> -                                                   first ?
> num_in_syncs : 0);
> -                       if (err) {
> -                               xe_bo_put(bo);
> -                               xe_vm_put(vm);
> -                               if (e)
> -                                       xe_engine_put(e);
> -                               goto out_error;
> -                       }
> -                       in_syncs = NULL;
> -                       first = false;
> -               } else {
> -                       list_move_tail(&__vma->unbind_link,
> &rebind_list);
> -               }
> -       }
> -       last = list_empty(&rebind_list);
> -       if (!last) {
> -               xe_vm_get(vm);
> -               if (e)
> -                       xe_engine_get(e);
> -       }
> -       err = __vm_bind_ioctl_async(vm, vma, e,
> -                                   bo, bind_op,
> -                                   first ? in_syncs :
> -                                   last ? out_syncs : NULL,
> -                                   first ? num_in_syncs :
> -                                   last ? num_out_syncs : 0);
> -       if (err) {
> -               if (!last) {
> -                       xe_vm_put(vm);
> -                       if (e)
> -                               xe_engine_put(e);
> -               }
> -               goto out_error;
> +               last_op->ops = __ops;
>         }
> -       in_syncs = NULL;
> -
> -       /* Do rebinds */
> -       list_for_each_entry_safe(__vma, next, &rebind_list,
> unbind_link) {
> -               list_del_init(&__vma->unbind_link);
> -               last = list_empty(&rebind_list);
> -
> -               if (xe_vma_is_userptr(__vma)) {
> -                       bind_op->op = XE_VM_BIND_FLAG_ASYNC |
> -                               XE_VM_BIND_OP_MAP_USERPTR;
> -               } else {
> -                       bind_op->op = XE_VM_BIND_FLAG_ASYNC |
> -                               XE_VM_BIND_OP_MAP;
> -                       xe_bo_get(xe_vma_bo(__vma));
> -               }
>  
> -               if (!last) {
> -                       xe_vm_get(vm);
> -                       if (e)
> -                               xe_engine_get(e);
> -               }
> +       if (!last_op)
> +               return -ENODATA;
>  
> -               err = __vm_bind_ioctl_async(vm, __vma, e,
> -                                           xe_vma_bo(__vma),
> bind_op, last ?
> -                                           out_syncs : NULL,
> -                                           last ? num_out_syncs :
> 0);
> -               if (err) {
> -                       if (!last) {
> -                               xe_vm_put(vm);
> -                               if (e)
> -                                       xe_engine_put(e);
> -                       }
> -                       goto out_error;
> -               }
> -       }
> +       last_op->flags |= XE_VMA_OP_LAST;
> +       last_op->num_syncs = num_syncs;
> +       last_op->syncs = syncs;
> +       last_op->fence = fence;
>  
> -       kfree(syncs);
>         return 0;
>  
> -out_error:
> -       kfree(in_syncs);
> -       kfree(out_syncs);
> -       kfree(syncs);
> -
> +free_fence:
> +       kfree(fence);
>         return err;
>  }
>  
> -static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo
> *bo,
> -                                     u64 addr, u64 range, u32 op)
> +static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
>  {
> -       struct xe_device *xe = vm->xe;
> -       struct xe_vma *vma, lookup;
> -       bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
> -
> -       lockdep_assert_held(&vm->lock);
> +       int err = 0;
>  
> -       lookup.start = addr;
> -       lookup.end = addr + range - 1;
> +       lockdep_assert_held_write(&vm->lock);
>  
> -       switch (VM_BIND_OP(op)) {
> -       case XE_VM_BIND_OP_MAP:
> -       case XE_VM_BIND_OP_MAP_USERPTR:
> -               vma = xe_vm_find_overlapping_vma(vm, &lookup);
> -               if (XE_IOCTL_ERR(xe, vma))
> -                       return -EBUSY;
> +       switch (op->base.op) {
> +       case DRM_GPUVA_OP_MAP:
> +               err |= xe_vm_insert_vma(vm, op->map.vma);
>                 break;
> -       case XE_VM_BIND_OP_UNMAP:
> -       case XE_VM_BIND_OP_PREFETCH:
> -               vma = xe_vm_find_overlapping_vma(vm, &lookup);
> -               if (XE_IOCTL_ERR(xe, !vma) ||
> -                   XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr ||
> -                                xe_vma_end(vma) != addr + range) &&
> !async))
> -                       return -EINVAL;
> +       case DRM_GPUVA_OP_REMAP:
> +               prep_vma_destroy(vm, gpuva_to_vma(op-
> >base.remap.unmap->va),
> +                                true);
> +               if (op->remap.prev)
> +                       err |= xe_vm_insert_vma(vm, op->remap.prev);
> +               if (op->remap.next)
> +                       err |= xe_vm_insert_vma(vm, op->remap.next);
>                 break;
> -       case XE_VM_BIND_OP_UNMAP_ALL:
> +       case DRM_GPUVA_OP_UNMAP:
> +               prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va),
> true);
> +               break;
> +       case DRM_GPUVA_OP_PREFETCH:
> +               /* Nothing to do */
>                 break;
>         default:
>                 XE_BUG_ON("NOT POSSIBLE");
> -               return -EINVAL;
>         }
>  
> -       return 0;
> -}
> -
> -static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
> -{
> -       down_read(&vm->userptr.notifier_lock);
> -       vma->destroyed = true;
> -       up_read(&vm->userptr.notifier_lock);
> -       xe_vm_remove_vma(vm, vma);
> +       op->flags |= XE_VMA_OP_COMMITTED;
> +       return err;
>  }
>  
> -static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma
> *vma)
> +static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> +                              struct xe_vma_op *op)
>  {
> +       LIST_HEAD(objs);
> +       LIST_HEAD(dups);
> +       struct ttm_validate_buffer tv_bo, tv_vm;
> +       struct ww_acquire_ctx ww;
> +       struct xe_bo *vbo;
>         int err;
>  
> -       if (xe_vma_bo(vma) && !xe_vma_bo(vma)->vm) {
> -               vm_insert_extobj(vm, vma);
> -               err = add_preempt_fences(vm, xe_vma_bo(vma));
> -               if (err)
> -                       return err;
> -       }
> -
> -       return 0;
> -}
> +       lockdep_assert_held_write(&vm->lock);
>  
> -/*
> - * Find all overlapping VMAs in lookup range and add to a list in
> the returned
> - * VMA, all of VMAs found will be unbound. Also possibly add 2 new
> VMAs that
> - * need to be bound if first / last VMAs are not fully unbound. This
> is akin to
> - * how munmap works.
> - */
> -static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
> -                                           struct xe_vma *lookup)
> -{
> -       struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
> -       struct rb_node *node;
> -       struct xe_vma *first = vma, *last = vma, *new_first = NULL,
> -                     *new_last = NULL, *__vma, *next;
> -       int err = 0;
> -       bool first_munmap_rebind = false;
> +       xe_vm_tv_populate(vm, &tv_vm);
> +       list_add_tail(&tv_vm.head, &objs);
> +       vbo = xe_vma_bo(vma);
> +       if (vbo) {
> +               /*
> +                * An unbind can drop the last reference to the BO
> and
> +                * the BO is needed for ttm_eu_backoff_reservation so
> +                * take a reference here.
> +                */
> +               xe_bo_get(vbo);
>  
> -       lockdep_assert_held(&vm->lock);
> -       XE_BUG_ON(!vma);
> -
> -       node = &vma->vm_node;
> -       while ((node = rb_next(node))) {
> -               if (!xe_vma_cmp_vma_cb(lookup, node)) {
> -                       __vma = to_xe_vma(node);
> -                       list_add_tail(&__vma->unbind_link, &vma-
> >unbind_link);
> -                       last = __vma;
> -               } else {
> -                       break;
> +               if (!vbo->vm) {
> +                       tv_bo.bo = &vbo->ttm;
> +                       tv_bo.num_shared = 1;
> +                       list_add(&tv_bo.head, &objs);
>                 }
>         }
>  
> -       node = &vma->vm_node;
> -       while ((node = rb_prev(node))) {
> -               if (!xe_vma_cmp_vma_cb(lookup, node)) {
> -                       __vma = to_xe_vma(node);
> -                       list_add(&__vma->unbind_link, &vma-
> >unbind_link);
> -                       first = __vma;
> -               } else {
> -                       break;
> -               }
> +again:
> +       err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
> +       if (err) {
> +               xe_bo_put(vbo);
> +               return err;
>         }
>  
> -       if (xe_vma_start(first) != xe_vma_start(lookup)) {
> -               struct ww_acquire_ctx ww;
> +       xe_vm_assert_held(vm);
> +       xe_bo_assert_held(xe_vma_bo(vma));
>  
> -               if (xe_vma_bo(first))
> -                       err = xe_bo_lock(xe_vma_bo(first), &ww, 0,
> true);
> -               if (err)
> -                       goto unwind;
> -               new_first = xe_vma_create(xe_vma_vm(first),
> xe_vma_bo(first),
> -                                         xe_vma_bo(first) ?
> -                                         xe_vma_bo_offset(first) :
> -                                         xe_vma_userptr(first),
> -                                         xe_vma_start(first),
> -                                         xe_vma_start(lookup) - 1,
> -                                         xe_vma_read_only(first),
> -                                         (first->pte_flags &
> -                                          XE_PTE_FLAG_NULL),
> -                                         first->tile_mask);
> -               if (xe_vma_bo(first))
> -                       xe_bo_unlock(xe_vma_bo(first), &ww);
> -               if (!new_first) {
> -                       err = -ENOMEM;
> -                       goto unwind;
> -               }
> -               if (xe_vma_is_userptr(first)) {
> -                       err = xe_vma_userptr_pin_pages(new_first);
> +       switch (op->base.op) {
> +       case DRM_GPUVA_OP_MAP:
> +               err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma),
> +                                op->syncs, op->num_syncs, op->fence,
> +                                op->map.immediate ||
> !xe_vm_in_fault_mode(vm),
> +                                op->flags & XE_VMA_OP_FIRST,
> +                                op->flags & XE_VMA_OP_LAST);
> +               break;
> +       case DRM_GPUVA_OP_REMAP:
> +       {
> +               bool prev = !!op->remap.prev;
> +               bool next = !!op->remap.next;
> +
> +               if (!op->remap.unmap_done) {
> +                       vm->async_ops.munmap_rebind_inflight = true;
> +                       if (prev || next)
> +                               vma->gpuva.flags |=
> XE_VMA_FIRST_REBIND;
> +                       err = xe_vm_unbind(vm, vma, op->engine, op-
> >syncs,
> +                                          op->num_syncs,
> +                                          !prev && !next ? op->fence
> : NULL,
> +                                          op->flags &
> XE_VMA_OP_FIRST,
> +                                          op->flags & XE_VMA_OP_LAST
> && !prev &&
> +                                          !next);
>                         if (err)
> -                               goto unwind;
> +                               break;
> +                       op->remap.unmap_done = true;
>                 }
> -               err = prep_replacement_vma(vm, new_first);
> -               if (err)
> -                       goto unwind;
> -       }
>  
> -       if (xe_vma_end(last) != xe_vma_end(lookup)) {
> -               struct ww_acquire_ctx ww;
> -               u64 chunk = xe_vma_end(lookup) - xe_vma_start(last);
> -
> -               if (xe_vma_bo(last))
> -                       err = xe_bo_lock(xe_vma_bo(last), &ww, 0,
> true);
> -               if (err)
> -                       goto unwind;
> -               new_last = xe_vma_create(xe_vma_vm(last),
> xe_vma_bo(last),
> -                                        xe_vma_bo(last) ?
> -                                        xe_vma_bo_offset(last) +
> chunk :
> -                                        xe_vma_userptr(last) +
> chunk,
> -                                        xe_vma_start(last) + chunk,
> -                                        xe_vma_end(last) - 1,
> -                                        xe_vma_read_only(last),
> -                                        (last->pte_flags &
> XE_PTE_FLAG_NULL),
> -                                        last->tile_mask);
> -               if (xe_vma_bo(last))
> -                       xe_bo_unlock(xe_vma_bo(last), &ww);
> -               if (!new_last) {
> -                       err = -ENOMEM;
> -                       goto unwind;
> -               }
> -               if (xe_vma_is_userptr(last)) {
> -                       err = xe_vma_userptr_pin_pages(new_last);
> +               if (prev) {
> +                       op->remap.prev->gpuva.flags |=
> XE_VMA_LAST_REBIND;
> +                       err = xe_vm_bind(vm, op->remap.prev, op-
> >engine,
> +                                        xe_vma_bo(op->remap.prev),
> op->syncs,
> +                                        op->num_syncs,
> +                                        !next ? op->fence : NULL,
> true, false,
> +                                        op->flags & XE_VMA_OP_LAST
> && !next);
> +                       op->remap.prev->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
>                         if (err)
> -                               goto unwind;
> +                               break;
> +                       op->remap.prev = NULL;
>                 }
> -               err = prep_replacement_vma(vm, new_last);
> -               if (err)
> -                       goto unwind;
> -       }
>  
> -       prep_vma_destroy(vm, vma);
> -       if (list_empty(&vma->unbind_link) && (new_first || new_last))
> -               vma->first_munmap_rebind = true;
> -       list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
> -               if ((new_first || new_last) && !first_munmap_rebind)
> {
> -                       __vma->first_munmap_rebind = true;
> -                       first_munmap_rebind = true;
> +               if (next) {
> +                       op->remap.next->gpuva.flags |=
> XE_VMA_LAST_REBIND;
> +                       err = xe_vm_bind(vm, op->remap.next, op-
> >engine,
> +                                        xe_vma_bo(op->remap.next),
> +                                        op->syncs, op->num_syncs,
> +                                        op->fence, true, false,
> +                                        op->flags & XE_VMA_OP_LAST);
> +                       op->remap.next->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
> +                       if (err)
> +                               break;
> +                       op->remap.next = NULL;
>                 }
> -               prep_vma_destroy(vm, __vma);
> -       }
> -       if (new_first) {
> -               xe_vm_insert_vma(vm, new_first);
> -               list_add_tail(&new_first->unbind_link, &vma-
> >unbind_link);
> -               if (!new_last)
> -                       new_first->last_munmap_rebind = true;
> +               vm->async_ops.munmap_rebind_inflight = false;
> +
> +               break;
>         }
> -       if (new_last) {
> -               xe_vm_insert_vma(vm, new_last);
> -               list_add_tail(&new_last->unbind_link, &vma-
> >unbind_link);
> -               new_last->last_munmap_rebind = true;
> +       case DRM_GPUVA_OP_UNMAP:
> +               err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
> +                                  op->num_syncs, op->fence,
> +                                  op->flags & XE_VMA_OP_FIRST,
> +                                  op->flags & XE_VMA_OP_LAST);
> +               break;
> +       case DRM_GPUVA_OP_PREFETCH:
> +               err = xe_vm_prefetch(vm, vma, op->engine, op-
> >prefetch.region,
> +                                    op->syncs, op->num_syncs, op-
> >fence,
> +                                    op->flags & XE_VMA_OP_FIRST,
> +                                    op->flags & XE_VMA_OP_LAST);
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
>         }
>  
> -       return vma;
> -
> -unwind:
> -       list_for_each_entry_safe(__vma, next, &vma->unbind_link,
> unbind_link)
> -               list_del_init(&__vma->unbind_link);
> -       if (new_last) {
> -               prep_vma_destroy(vm, new_last);
> -               xe_vma_destroy_unlocked(new_last);
> -       }
> -       if (new_first) {
> -               prep_vma_destroy(vm, new_first);
> -               xe_vma_destroy_unlocked(new_first);
> +       ttm_eu_backoff_reservation(&ww, &objs);
> +       if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
> +               lockdep_assert_held_write(&vm->lock);
> +               err = xe_vma_userptr_pin_pages(vma);
> +               if (!err)
> +                       goto again;
>         }
> +       xe_bo_put(vbo);
>  
> -       return ERR_PTR(err);
> +       if (err)
> +               trace_xe_vma_fail(vma);
> +
> +       return err;
>  }
>  
> -/*
> - * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range
> to prefetch
> - */
> -static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
> -                                             struct xe_vma *lookup,
> -                                             u32 region)
> +static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
>  {
> -       struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup),
> *__vma,
> -                     *next;
> -       struct rb_node *node;
> +       int ret = 0;
>  
> -       if (!xe_vma_has_no_bo(vma)) {
> -               if (!xe_bo_can_migrate(xe_vma_bo(vma),
> region_to_mem_type[region]))
> -                       return ERR_PTR(-EINVAL);
> -       }
> +       lockdep_assert_held_write(&vm->lock);
>  
> -       node = &vma->vm_node;
> -       while ((node = rb_next(node))) {
> -               if (!xe_vma_cmp_vma_cb(lookup, node)) {
> -                       __vma = to_xe_vma(node);
> -                       if (!xe_vma_has_no_bo(__vma)) {
> -                               if
> (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
> -                                       goto flush_list;
> -                       }
> -                       list_add_tail(&__vma->unbind_link, &vma-
> >unbind_link);
> -               } else {
> -                       break;
> -               }
> +#ifdef TEST_VM_ASYNC_OPS_ERROR
> +       if (op->inject_error) {
> +               op->inject_error = false;
> +               return -ENOMEM;
>         }
> +#endif
>  
> -       node = &vma->vm_node;
> -       while ((node = rb_prev(node))) {
> -               if (!xe_vma_cmp_vma_cb(lookup, node)) {
> -                       __vma = to_xe_vma(node);
> -                       if (!xe_vma_has_no_bo(__vma)) {
> -                               if
> (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region]))
> -                                       goto flush_list;
> -                       }
> -                       list_add(&__vma->unbind_link, &vma-
> >unbind_link);
> -               } else {
> -                       break;
> -               }
> +       switch (op->base.op) {
> +       case DRM_GPUVA_OP_MAP:
> +               ret = __xe_vma_op_execute(vm, op->map.vma, op);
> +               break;
> +       case DRM_GPUVA_OP_REMAP:
> +       {
> +               struct xe_vma *vma;
> +
> +               if (!op->remap.unmap_done)
> +                       vma = gpuva_to_vma(op->base.remap.unmap->va);
> +               else if (op->remap.prev)
> +                       vma = op->remap.prev;
> +               else
> +                       vma = op->remap.next;
> +
> +               ret = __xe_vma_op_execute(vm, vma, op);
> +               break;
> +       }
> +       case DRM_GPUVA_OP_UNMAP:
> +               ret = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> >base.unmap.va),
> +                                         op);
> +               break;
> +       case DRM_GPUVA_OP_PREFETCH:
> +               ret = __xe_vma_op_execute(vm,
> +                                         gpuva_to_vma(op-
> >base.prefetch.va),
> +                                         op);
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
>         }
>  
> -       return vma;
> +       return ret;
> +}
>  
> -flush_list:
> -       list_for_each_entry_safe(__vma, next, &vma->unbind_link,
> -                                unbind_link)
> -               list_del_init(&__vma->unbind_link);
> +static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op
> *op)
> +{
> +       bool last = op->flags & XE_VMA_OP_LAST;
>  
> -       return ERR_PTR(-EINVAL);
> +       if (last) {
> +               while (op->num_syncs--)
> +                       xe_sync_entry_cleanup(&op->syncs[op-
> >num_syncs]);
> +               kfree(op->syncs);
> +               if (op->engine)
> +                       xe_engine_put(op->engine);
> +               if (op->fence)
> +                       dma_fence_put(&op->fence->fence);
> +       }
> +       if (!list_empty(&op->link)) {
> +               spin_lock_irq(&vm->async_ops.lock);
> +               list_del(&op->link);
> +               spin_unlock_irq(&vm->async_ops.lock);
> +       }
> +       if (op->ops)
> +               drm_gpuva_ops_free(&vm->mgr, op->ops);
> +       if (last)
> +               xe_vm_put(vm);
>  }
>  
> -static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
> -                                               struct xe_bo *bo)
> +static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
> +                            bool post_commit)
>  {
> -       struct xe_vma *first = NULL, *vma;
> +       lockdep_assert_held_write(&vm->lock);
>  
> -       lockdep_assert_held(&vm->lock);
> -       xe_bo_assert_held(bo);
> +       switch (op->base.op) {
> +       case DRM_GPUVA_OP_MAP:
> +               if (op->map.vma) {
> +                       prep_vma_destroy(vm, op->map.vma,
> post_commit);
> +                       xe_vma_destroy_unlocked(op->map.vma);
> +               }
> +               break;
> +       case DRM_GPUVA_OP_UNMAP:
> +       {
> +               struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
>  
> -       list_for_each_entry(vma, &bo->vmas, bo_link) {
> -               if (xe_vma_vm(vma) != vm)
> -                       continue;
> +               down_read(&vm->userptr.notifier_lock);
> +               vma->gpuva.flags &= ~XE_VMA_DESTROYED;
> +               up_read(&vm->userptr.notifier_lock);
> +               if (post_commit)
> +                       xe_vm_insert_vma(vm, vma);
> +               break;
> +       }
> +       case DRM_GPUVA_OP_REMAP:
> +       {
> +               struct xe_vma *vma = gpuva_to_vma(op-
> >base.remap.unmap->va);
>  
> -               prep_vma_destroy(vm, vma);
> -               if (!first)
> -                       first = vma;
> -               else
> -                       list_add_tail(&vma->unbind_link, &first-
> >unbind_link);
> +               if (op->remap.prev) {
> +                       prep_vma_destroy(vm, op->remap.prev,
> post_commit);
> +                       xe_vma_destroy_unlocked(op->remap.prev);
> +               }
> +               if (op->remap.next) {
> +                       prep_vma_destroy(vm, op->remap.next,
> post_commit);
> +                       xe_vma_destroy_unlocked(op->remap.next);
> +               }
> +               down_read(&vm->userptr.notifier_lock);
> +               vma->gpuva.flags &= ~XE_VMA_DESTROYED;
> +               up_read(&vm->userptr.notifier_lock);
> +               if (post_commit)
> +                       xe_vm_insert_vma(vm, vma);
> +               break;
> +       }
> +       case DRM_GPUVA_OP_PREFETCH:
> +               /* Nothing to do */
> +               break;
> +       default:
> +               XE_BUG_ON("NOT POSSIBLE");
>         }
> +}
>  
> -       return first;
> +static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
> +{
> +       return list_first_entry_or_null(&vm->async_ops.pending,
> +                                       struct xe_vma_op, link);
>  }
>  
> -static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
> -                                              struct xe_bo *bo,
> -                                              u64
> bo_offset_or_userptr,
> -                                              u64 addr, u64 range,
> u32 op,
> -                                              u64 tile_mask, u32
> region)
> +static void xe_vma_op_work_func(struct work_struct *w)
>  {
> -       struct ww_acquire_ctx ww;
> -       struct xe_vma *vma, lookup;
> -       int err;
> +       struct xe_vm *vm = container_of(w, struct xe_vm,
> async_ops.work);
>  
> -       lockdep_assert_held(&vm->lock);
> +       for (;;) {
> +               struct xe_vma_op *op;
> +               int err;
>  
> -       lookup.start = addr;
> -       lookup.end = addr + range - 1;
> +               if (vm->async_ops.error && !xe_vm_is_closed(vm))
> +                       break;
>  
> -       switch (VM_BIND_OP(op)) {
> -       case XE_VM_BIND_OP_MAP:
> -               if (bo) {
> -                       err = xe_bo_lock(bo, &ww, 0, true);
> -                       if (err)
> -                               return ERR_PTR(err);
> -               }
> -               vma = xe_vma_create(vm, bo, bo_offset_or_userptr,
> addr,
> -                                   addr + range - 1,
> -                                   op & XE_VM_BIND_FLAG_READONLY,
> -                                   op & XE_VM_BIND_FLAG_NULL,
> -                                   tile_mask);
> -               if (bo)
> -                       xe_bo_unlock(bo, &ww);
> -               if (!vma)
> -                       return ERR_PTR(-ENOMEM);
> +               spin_lock_irq(&vm->async_ops.lock);
> +               op = next_vma_op(vm);
> +               spin_unlock_irq(&vm->async_ops.lock);
> +
> +               if (!op)
> +                       break;
>  
> -               xe_vm_insert_vma(vm, vma);
> -               if (bo && !bo->vm) {
> -                       vm_insert_extobj(vm, vma);
> -                       err = add_preempt_fences(vm, bo);
> +               if (!xe_vm_is_closed(vm)) {
> +                       down_write(&vm->lock);
> +                       err = xe_vma_op_execute(vm, op);
>                         if (err) {
> -                               prep_vma_destroy(vm, vma);
> +                               drm_warn(&vm->xe->drm,
> +                                        "Async VM op(%d) failed with
> %d",
> +                                        op->base.op, err);
> +                               vm_set_async_error(vm, err);
> +                               up_write(&vm->lock);
> +
> +                               if (vm->async_ops.error_capture.addr)
> +                                       vm_error_capture(vm, err, 0,
> 0, 0);
> +                               break;
> +                       }
> +                       up_write(&vm->lock);
> +               } else {
> +                       struct xe_vma *vma;
> +
> +                       switch (op->base.op) {
> +                       case DRM_GPUVA_OP_REMAP:
> +                               vma = gpuva_to_vma(op-
> >base.remap.unmap->va);
> +                               trace_xe_vma_flush(vma);
> +
> +                               down_write(&vm->lock);
> +                               xe_vma_destroy_unlocked(vma);
> +                               up_write(&vm->lock);
> +                               break;
> +                       case DRM_GPUVA_OP_UNMAP:
> +                               vma = gpuva_to_vma(op-
> >base.unmap.va);
> +                               trace_xe_vma_flush(vma);
> +
> +                               down_write(&vm->lock);
>                                 xe_vma_destroy_unlocked(vma);
> +                               up_write(&vm->lock);
> +                               break;
> +                       default:
> +                               /* Nothing to do */
> +                               break;
> +                       }
>  
> -                               return ERR_PTR(err);
> +                       if (op->fence &&
> !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +                                                  &op->fence-
> >fence.flags)) {
> +                               if (!xe_vm_no_dma_fences(vm)) {
> +                                       op->fence->started = true;
> +                                       wake_up_all(&op->fence->wq);
> +                               }
> +                               dma_fence_signal(&op->fence->fence);
>                         }
>                 }
> -               break;
> -       case XE_VM_BIND_OP_UNMAP:
> -               vma = vm_unbind_lookup_vmas(vm, &lookup);
> -               break;
> -       case XE_VM_BIND_OP_PREFETCH:
> -               vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
> -               break;
> -       case XE_VM_BIND_OP_UNMAP_ALL:
> -               XE_BUG_ON(!bo);
>  
> -               err = xe_bo_lock(bo, &ww, 0, true);
> +               xe_vma_op_cleanup(vm, op);
> +       }
> +}
> +
> +static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
> +                                   struct list_head *ops_list, bool
> async)
> +{
> +       struct xe_vma_op *op, *last_op, *next;
> +       int err;
> +
> +       lockdep_assert_held_write(&vm->lock);
> +
> +       list_for_each_entry(op, ops_list, link) {
> +               last_op = op;
> +               err = xe_vma_op_commit(vm, op);
>                 if (err)
> -                       return ERR_PTR(err);
> -               vma = vm_unbind_all_lookup_vmas(vm, bo);
> -               if (!vma)
> -                       vma = ERR_PTR(-EINVAL);
> -               xe_bo_unlock(bo, &ww);
> -               break;
> -       case XE_VM_BIND_OP_MAP_USERPTR:
> -               XE_BUG_ON(bo);
> -
> -               vma = xe_vma_create(vm, NULL, bo_offset_or_userptr,
> addr,
> -                                   addr + range - 1,
> -                                   op & XE_VM_BIND_FLAG_READONLY,
> -                                   op & XE_VM_BIND_FLAG_NULL,
> -                                   tile_mask);
> -               if (!vma)
> -                       return ERR_PTR(-ENOMEM);
> +                       goto unwind;
> +       }
>  
> -               err = xe_vma_userptr_pin_pages(vma);
> -               if (err) {
> -                       prep_vma_destroy(vm, vma);
> -                       xe_vma_destroy_unlocked(vma);
> +       if (!async) {
> +               err = xe_vma_op_execute(vm, last_op);
> +               if (err)
> +                       goto unwind;
> +               xe_vma_op_cleanup(vm, last_op);
> +       } else {
> +               int i;
> +               bool installed = false;
>  
> -                       return ERR_PTR(err);
> -               } else {
> -                       xe_vm_insert_vma(vm, vma);
> -               }
> -               break;
> -       default:
> -               XE_BUG_ON("NOT POSSIBLE");
> -               vma = ERR_PTR(-EINVAL);
> +               for (i = 0; i < last_op->num_syncs; i++)
> +                       installed |= xe_sync_entry_signal(&last_op-
> >syncs[i],
> +                                                         NULL,
> +                                                         &last_op-
> >fence->fence);
> +               if (!installed && last_op->fence)
> +                       dma_fence_signal(&last_op->fence->fence);
> +
> +               spin_lock_irq(&vm->async_ops.lock);
> +               list_splice_tail(ops_list, &vm->async_ops.pending);
> +               spin_unlock_irq(&vm->async_ops.lock);
> +
> +               if (!vm->async_ops.error)
> +                       queue_work(system_unbound_wq, &vm-
> >async_ops.work);
>         }
>  
> -       return vma;
> +       return 0;
> +
> +unwind:
> +       list_for_each_entry_reverse(op, ops_list, link)
> +               xe_vma_op_unwind(vm, op, op->flags &
> XE_VMA_OP_COMMITTED);
> +       list_for_each_entry_safe(op, next, ops_list, link)
> +               xe_vma_op_cleanup(vm, op);
> +
> +       return err;
> +}
> +
> +/*
> + * Unwind operations list, called after a failure of
> vm_bind_ioctl_ops_create or
> + * vm_bind_ioctl_ops_parse.
> + */
> +static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
> +                                    struct drm_gpuva_ops **ops,
> +                                    int num_ops_list)
> +{
> +       int i;
> +
> +       for (i = 0; i < num_ops_list; ++i) {
> +               struct drm_gpuva_ops *__ops = ops[i];
> +               struct drm_gpuva_op *__op;
> +
> +               if (!__ops)
> +                       continue;
> +
> +               drm_gpuva_for_each_op(__op, __ops) {
> +                       struct xe_vma_op *op =
> gpuva_op_to_vma_op(__op);
> +
> +                       xe_vma_op_unwind(vm, op, false);
> +               }
> +       }
>  }
>  
>  #ifdef TEST_VM_ASYNC_OPS_ERROR
> @@ -2956,8 +2998,6 @@ static int vm_bind_ioctl_check_args(struct
> xe_device *xe,
>         int i;
>  
>         if (XE_IOCTL_ERR(xe, args->extensions) ||
> -           XE_IOCTL_ERR(xe, args->pad || args->pad2) ||
> -           XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])
> ||
>             XE_IOCTL_ERR(xe, !args->num_binds) ||
>             XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
>                 return -EINVAL;
> @@ -2989,14 +3029,7 @@ static int vm_bind_ioctl_check_args(struct
> xe_device *xe,
>                 u32 obj = (*bind_ops)[i].obj;
>                 u64 obj_offset = (*bind_ops)[i].obj_offset;
>                 u32 region = (*bind_ops)[i].region;
> -               bool is_null = op &  XE_VM_BIND_FLAG_NULL;
> -
> -               if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) ||
> -                   XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] ||
> -                                    (*bind_ops)[i].reserved[1])) {
> -                       err = -EINVAL;
> -                       goto free_bind_ops;
> -               }
> +               bool is_null = op & XE_VM_BIND_FLAG_NULL;
>  
>                 if (i == 0) {
>                         *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
> @@ -3076,15 +3109,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>         struct drm_xe_vm_bind *args = data;
>         struct drm_xe_sync __user *syncs_user;
>         struct xe_bo **bos = NULL;
> -       struct xe_vma **vmas = NULL;
> +       struct drm_gpuva_ops **ops = NULL;
>         struct xe_vm *vm;
>         struct xe_engine *e = NULL;
>         u32 num_syncs;
>         struct xe_sync_entry *syncs = NULL;
>         struct drm_xe_vm_bind_op *bind_ops;
> +       LIST_HEAD(ops_list);
>         bool async;
>         int err;
> -       int i, j = 0;
> +       int i;
>  
>         err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
>         if (err)
> @@ -3174,8 +3208,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>                 goto release_vm_lock;
>         }
>  
> -       vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
> -       if (!vmas) {
> +       ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL);
> +       if (!ops) {
>                 err = -ENOMEM;
>                 goto release_vm_lock;
>         }
> @@ -3227,7 +3261,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++)
> {
>                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
>                                           &syncs_user[num_syncs],
> false,
> -                                         xe_vm_in_fault_mode(vm));
> +                                         xe_vm_no_dma_fences(vm));
>                 if (err)
>                         goto free_syncs;
>         }
> @@ -3238,7 +3272,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>                 u64 addr = bind_ops[i].addr;
>                 u32 op = bind_ops[i].op;
>  
> -               err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr,
> range, op);
> +               err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr,
> range, op);
>                 if (err)
>                         goto free_syncs;
>         }
> @@ -3251,126 +3285,43 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>                 u64 tile_mask = bind_ops[i].tile_mask;
>                 u32 region = bind_ops[i].region;
>  
> -               vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i],
> obj_offset,
> -                                                  addr, range, op,
> tile_mask,
> -                                                  region);
> -               if (IS_ERR(vmas[i])) {
> -                       err = PTR_ERR(vmas[i]);
> -                       vmas[i] = NULL;
> -                       goto destroy_vmas;
> +               ops[i] = vm_bind_ioctl_ops_create(vm, bos[i],
> obj_offset,
> +                                                 addr, range, op,
> tile_mask,
> +                                                 region);
> +               if (IS_ERR(ops[i])) {
> +                       err = PTR_ERR(ops[i]);
> +                       ops[i] = NULL;
> +                       goto unwind_ops;
>                 }
>         }
>  
> -       for (j = 0; j < args->num_binds; ++j) {
> -               struct xe_sync_entry *__syncs;
> -               u32 __num_syncs = 0;
> -               bool first_or_last = j == 0 || j == args->num_binds -
> 1;
> -
> -               if (args->num_binds == 1) {
> -                       __num_syncs = num_syncs;
> -                       __syncs = syncs;
> -               } else if (first_or_last && num_syncs) {
> -                       bool first = j == 0;
> -
> -                       __syncs = kmalloc(sizeof(*__syncs) *
> num_syncs,
> -                                         GFP_KERNEL);
> -                       if (!__syncs) {
> -                               err = ENOMEM;
> -                               break;
> -                       }
> -
> -                       /* in-syncs on first bind, out-syncs on last
> bind */
> -                       for (i = 0; i < num_syncs; ++i) {
> -                               bool signal = syncs[i].flags &
> -                                       DRM_XE_SYNC_SIGNAL;
> -
> -                               if ((first && !signal) || (!first &&
> signal))
> -                                       __syncs[__num_syncs++] =
> syncs[i];
> -                       }
> -               } else {
> -                       __num_syncs = 0;
> -                       __syncs = NULL;
> -               }
> -
> -               if (async) {
> -                       bool last = j == args->num_binds - 1;
> -
> -                       /*
> -                        * Each pass of async worker drops the ref,
> take a ref
> -                        * here, 1 set of refs taken above
> -                        */
> -                       if (!last) {
> -                               if (e)
> -                                       xe_engine_get(e);
> -                               xe_vm_get(vm);
> -                       }
> -
> -                       err = vm_bind_ioctl_async(vm, vmas[j], e,
> bos[j],
> -                                                 bind_ops + j,
> __syncs,
> -                                                 __num_syncs);
> -                       if (err && !last) {
> -                               if (e)
> -                                       xe_engine_put(e);
> -                               xe_vm_put(vm);
> -                       }
> -                       if (err)
> -                               break;
> -               } else {
> -                       XE_BUG_ON(j != 0);      /* Not supported */
> -                       err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
> -                                           bind_ops + j, __syncs,
> -                                           __num_syncs, NULL);
> -                       break;  /* Needed so cleanup loops work */
> -               }
> -       }
> +       err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds,
> +                                     syncs, num_syncs, &ops_list,
> async);
> +       if (err)
> +               goto unwind_ops;
>  
> -       /* Most of cleanup owned by the async bind worker */
> -       if (async && !err) {
> -               up_write(&vm->lock);
> -               if (args->num_binds > 1)
> -                       kfree(syncs);
> -               goto free_objs;
> -       }
> +       err = vm_bind_ioctl_ops_commit(vm, &ops_list, async);
> +       up_write(&vm->lock);
>  
> -destroy_vmas:
> -       for (i = j; err && i < args->num_binds; ++i) {
> -               u32 op = bind_ops[i].op;
> -               struct xe_vma *vma, *next;
> +       for (i = 0; i < args->num_binds; ++i)
> +               xe_bo_put(bos[i]);
>  
> -               if (!vmas[i])
> -                       break;
> +       kfree(bos);
> +       kfree(ops);
> +       if (args->num_binds > 1)
> +               kfree(bind_ops);
>  
> -               list_for_each_entry_safe(vma, next, &vmas[i]-
> >unbind_link,
> -                                        unbind_link) {
> -                       list_del_init(&vma->unbind_link);
> -                       if (!vma->destroyed) {
> -                               prep_vma_destroy(vm, vma);
> -                               xe_vma_destroy_unlocked(vma);
> -                       }
> -               }
> +       return err;
>  
> -               switch (VM_BIND_OP(op)) {
> -               case XE_VM_BIND_OP_MAP:
> -                       prep_vma_destroy(vm, vmas[i]);
> -                       xe_vma_destroy_unlocked(vmas[i]);
> -                       break;
> -               case XE_VM_BIND_OP_MAP_USERPTR:
> -                       prep_vma_destroy(vm, vmas[i]);
> -                       xe_vma_destroy_unlocked(vmas[i]);
> -                       break;
> -               }
> -       }
> +unwind_ops:
> +       vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
>  free_syncs:
> -       while (num_syncs--) {
> -               if (async && j &&
> -                   !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
> -                       continue;       /* Still in async worker */
> +       while (num_syncs--)
>                 xe_sync_entry_cleanup(&syncs[num_syncs]);
> -       }
>  
>         kfree(syncs);
>  put_obj:
> -       for (i = j; i < args->num_binds; ++i)
> +       for (i = 0; i < args->num_binds; ++i)
>                 xe_bo_put(bos[i]);
>  release_vm_lock:
>         up_write(&vm->lock);
> @@ -3381,10 +3332,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>                 xe_engine_put(e);
>  free_objs:
>         kfree(bos);
> -       kfree(vmas);
> +       kfree(ops);
>         if (args->num_binds > 1)
>                 kfree(bind_ops);
> -       return err;
> +       return err == -ENODATA ? 0 : err;
>  }
>  
>  /*
> @@ -3479,7 +3430,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
>  
>  int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int
> gt_id)
>  {
> -       struct rb_node *node;
> +       DRM_GPUVA_ITER(it, &vm->mgr, 0);
> +       struct drm_gpuva *gpuva;
>         bool is_vram;
>         uint64_t addr;
>  
> @@ -3493,8 +3445,8 @@ int xe_analyze_vm(struct drm_printer *p, struct
> xe_vm *vm, int gt_id)
>                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
> is_vram ? "VRAM" : "SYS");
>         }
>  
> -       for (node = rb_first(&vm->vmas); node; node = rb_next(node))
> {
> -               struct xe_vma *vma = to_xe_vma(node);
> +       drm_gpuva_iter_for_each(gpuva, it) {
> +               struct xe_vma *vma = gpuva_to_vma(gpuva);
>                 bool is_userptr = xe_vma_is_userptr(vma);
>                 bool is_null = xe_vma_is_null(vma);
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 3d542ceb5327..962a36126169 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -6,6 +6,7 @@
>  #ifndef _XE_VM_H_
>  #define _XE_VM_H_
>  
> +#include "xe_bo_types.h"
>  #include "xe_macros.h"
>  #include "xe_map.h"
>  #include "xe_vm_types.h"
> @@ -61,7 +62,22 @@ static inline bool
> xe_vm_is_closed_or_banned(struct xe_vm *vm)
>  }
>  
>  struct xe_vma *
> -xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
> +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
> +
> +static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
> +{
> +       return container_of(gpuva->mgr, struct xe_vm, mgr);
> +}
> +
> +static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
> +{
> +       return container_of(gpuva, struct xe_vma, gpuva);
> +}
> +
> +static inline struct xe_vma_op *gpuva_op_to_vma_op(struct
> drm_gpuva_op *op)
> +{
> +       return container_of(op, struct xe_vma_op, base);
> +}
>  
>  /*
>   * Let's abstract start, size, end, bo_offset, vm, and bo as the
> underlying
> @@ -69,12 +85,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm,
> struct xe_vma *vma);
>   */
>  static inline u64 xe_vma_start(struct xe_vma *vma)
>  {
> -       return vma->start;
> +       return vma->gpuva.va.addr;
>  }
>  
>  static inline u64 xe_vma_size(struct xe_vma *vma)
>  {
> -       return vma->end - vma->start + 1;
> +       return vma->gpuva.va.range;
>  }
>  
>  static inline u64 xe_vma_end(struct xe_vma *vma)
> @@ -84,32 +100,33 @@ static inline u64 xe_vma_end(struct xe_vma *vma)
>  
>  static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
>  {
> -       return vma->bo_offset;
> +       return vma->gpuva.gem.offset;
>  }
>  
>  static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
>  {
> -       return vma->bo;
> +       return !vma->gpuva.gem.obj ? NULL :
> +               container_of(vma->gpuva.gem.obj, struct xe_bo,
> ttm.base);
>  }
>  
>  static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
>  {
> -       return vma->vm;
> +       return container_of(vma->gpuva.mgr, struct xe_vm, mgr);
>  }
>  
>  static inline bool xe_vma_read_only(struct xe_vma *vma)
>  {
> -       return vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
> +       return vma->gpuva.flags & XE_VMA_READ_ONLY;
>  }
>  
>  static inline u64 xe_vma_userptr(struct xe_vma *vma)
>  {
> -       return vma->userptr.ptr;
> +       return vma->gpuva.gem.offset;
>  }
>  
>  static inline bool xe_vma_is_null(struct xe_vma *vma)
>  {
> -       return vma->pte_flags & XE_PTE_FLAG_NULL;
> +       return vma->gpuva.flags & DRM_GPUVA_SPARSE;
>  }
>  
>  static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c
> b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index f29a67cb941f..1be628287190 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -210,19 +210,13 @@ static const madvise_func madvise_funcs[] = {
>         [DRM_XE_VM_MADVISE_PIN] = madvise_pin,
>  };
>  
> -static struct xe_vma *node_to_vma(const struct rb_node *node)
> -{
> -       BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
> -       return (struct xe_vma *)node;
> -}
> -
>  static struct xe_vma **
>  get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
>  {
> -       struct xe_vma **vmas;
> -       struct xe_vma *vma, *__vma, lookup;
> +       struct xe_vma **vmas, **__vmas;
> +       struct drm_gpuva *gpuva;
>         int max_vmas = 8;
> -       struct rb_node *node;
> +       DRM_GPUVA_ITER(it, &vm->mgr, addr);
>  
>         lockdep_assert_held(&vm->lock);
>  
> @@ -230,62 +224,23 @@ get_vmas(struct xe_vm *vm, int *num_vmas, u64
> addr, u64 range)
>         if (!vmas)
>                 return NULL;
>  
> -       lookup.start = addr;
> -       lookup.end = addr + range - 1;
> +       drm_gpuva_iter_for_each_range(gpuva, it, addr + range) {
> +               struct xe_vma *vma = gpuva_to_vma(gpuva);
>  
> -       vma = xe_vm_find_overlapping_vma(vm, &lookup);
> -       if (!vma)
> -               return vmas;
> +               if (xe_vma_is_userptr(vma))
> +                       continue;
>  
> -       if (!xe_vma_is_userptr(vma)) {
> -               vmas[*num_vmas] = vma;
> -               *num_vmas += 1;
> -       }
> -
> -       node = &vma->vm_node;
> -       while ((node = rb_next(node))) {
> -               if (!xe_vma_cmp_vma_cb(&lookup, node)) {
> -                       __vma = node_to_vma(node);
> -                       if (xe_vma_is_userptr(__vma))
> -                               continue;
> -
> -                       if (*num_vmas == max_vmas) {
> -                               struct xe_vma **__vmas =
> -                                       krealloc(vmas, max_vmas *
> sizeof(*vmas),
> -                                                GFP_KERNEL);
> -
> -                               if (!__vmas)
> -                                       return NULL;
> -                               vmas = __vmas;
> -                       }
> -                       vmas[*num_vmas] = __vma;
> -                       *num_vmas += 1;
> -               } else {
> -                       break;
> +               if (*num_vmas == max_vmas) {
> +                       max_vmas <<= 1;
> +                       __vmas = krealloc(vmas, max_vmas *
> sizeof(*vmas),
> +                                         GFP_KERNEL);
> +                       if (!__vmas)
> +                               return NULL;
> +                       vmas = __vmas;
>                 }
> -       }
>  
> -       node = &vma->vm_node;
> -       while ((node = rb_prev(node))) {
> -               if (!xe_vma_cmp_vma_cb(&lookup, node)) {
> -                       __vma = node_to_vma(node);
> -                       if (xe_vma_is_userptr(__vma))
> -                               continue;
> -
> -                       if (*num_vmas == max_vmas) {
> -                               struct xe_vma **__vmas =
> -                                       krealloc(vmas, max_vmas *
> sizeof(*vmas),
> -                                                GFP_KERNEL);
> -
> -                               if (!__vmas)
> -                                       return NULL;
> -                               vmas = __vmas;
> -                       }
> -                       vmas[*num_vmas] = __vma;
> -                       *num_vmas += 1;
> -               } else {
> -                       break;
> -               }
> +               vmas[*num_vmas] = vma;
> +               *num_vmas += 1;
>         }
>  
>         return vmas;
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> b/drivers/gpu/drm/xe/xe_vm_types.h
> index 286de52160b9..cad806bafbfc 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -6,6 +6,8 @@
>  #ifndef _XE_VM_TYPES_H_
>  #define _XE_VM_TYPES_H_
>  
> +#include <drm/drm_gpuva_mgr.h>
> +
>  #include <linux/dma-resv.h>
>  #include <linux/kref.h>
>  #include <linux/mmu_notifier.h>
> @@ -14,30 +16,23 @@
>  #include "xe_device_types.h"
>  #include "xe_pt_types.h"
>  
> +struct async_op_fence;
>  struct xe_bo;
> +struct xe_sync_entry;
>  struct xe_vm;
>  
> -struct xe_vma {
> -       struct rb_node vm_node;
> -       /** @vm: VM which this VMA belongs to */
> -       struct xe_vm *vm;
> +#define TEST_VM_ASYNC_OPS_ERROR
> +#define FORCE_ASYNC_OP_ERROR   BIT(31)
>  
> -       /**
> -        * @start: start address of this VMA within its address
> domain, end -
> -        * start + 1 == VMA size
> -        */
> -       u64 start;
> -       /** @end: end address of this VMA within its address domain
> */
> -       u64 end;
> -       /** @pte_flags: pte flags for this VMA */
> -#define XE_PTE_FLAG_READ_ONLY          BIT(0)
> -#define XE_PTE_FLAG_NULL               BIT(1)
> -       u32 pte_flags;
> -
> -       /** @bo: BO if not a userptr, must be NULL is userptr */
> -       struct xe_bo *bo;
> -       /** @bo_offset: offset into BO if not a userptr, unused for
> userptr */
> -       u64 bo_offset;
> +#define XE_VMA_READ_ONLY       DRM_GPUVA_USERBITS
> +#define XE_VMA_DESTROYED       (DRM_GPUVA_USERBITS << 1)
> +#define XE_VMA_ATOMIC_PTE_BIT  (DRM_GPUVA_USERBITS << 2)
> +#define XE_VMA_FIRST_REBIND    (DRM_GPUVA_USERBITS << 3)
> +#define XE_VMA_LAST_REBIND     (DRM_GPUVA_USERBITS << 4)
> +
> +struct xe_vma {
> +       /** @gpuva: Base GPUVA object */
> +       struct drm_gpuva gpuva;
>  
>         /** @tile_mask: Tile mask of where to create binding for this
> VMA */
>         u64 tile_mask;
> @@ -51,40 +46,8 @@ struct xe_vma {
>          */
>         u64 tile_present;
>  
> -       /**
> -        * @destroyed: VMA is destroyed, in the sense that it
> shouldn't be
> -        * subject to rebind anymore. This field must be written
> under
> -        * the vm lock in write mode and the userptr.notifier_lock in
> -        * either mode. Read under the vm lock or the
> userptr.notifier_lock in
> -        * write mode.
> -        */
> -       bool destroyed;
> -
> -       /**
> -        * @first_munmap_rebind: VMA is first in a sequence of ops
> that triggers
> -        * a rebind (munmap style VM unbinds). This indicates the
> operation
> -        * using this VMA must wait on all dma-resv slots (wait for
> pending jobs
> -        * / trigger preempt fences).
> -        */
> -       bool first_munmap_rebind;
> -
> -       /**
> -        * @last_munmap_rebind: VMA is first in a sequence of ops
> that triggers
> -        * a rebind (munmap style VM unbinds). This indicates the
> operation
> -        * using this VMA must install itself into kernel dma-resv
> slot (blocks
> -        * future jobs) and kick the rebind work in compute mode.
> -        */
> -       bool last_munmap_rebind;
> -
> -       /** @use_atomic_access_pte_bit: Set atomic access bit in PTE
> */
> -       bool use_atomic_access_pte_bit;
> -
> -       union {
> -               /** @bo_link: link into BO if not a userptr */
> -               struct list_head bo_link;
> -               /** @userptr_link: link into VM repin list if userptr
> */
> -               struct list_head userptr_link;
> -       };
> +       /** @userptr_link: link into VM repin list if userptr */
> +       struct list_head userptr_link;
>  
>         /**
>          * @rebind_link: link into VM if this VMA needs rebinding,
> and
> @@ -107,8 +70,6 @@ struct xe_vma {
>  
>         /** @userptr: user pointer state */
>         struct {
> -               /** @ptr: user pointer */
> -               uintptr_t ptr;
>                 /** @invalidate_link: Link for the
> vm::userptr.invalidated list */
>                 struct list_head invalidate_link;
>                 /**
> @@ -156,6 +117,9 @@ struct xe_device;
>  #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
>  
>  struct xe_vm {
> +       /** @mgr: base GPUVA used to track VMAs */
> +       struct drm_gpuva_manager mgr;
> +
>         struct xe_device *xe;
>  
>         struct kref refcount;
> @@ -170,7 +134,6 @@ struct xe_vm {
>         struct ttm_lru_bulk_move lru_bulk_move;
>  
>         u64 size;
> -       struct rb_root vmas;
>  
>         struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
>         struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
> @@ -348,4 +311,100 @@ struct xe_vm {
>         bool batch_invalidate_tlb;
>  };
>  
> +/** struct xe_vma_op_map - VMA map operation */
> +struct xe_vma_op_map {
> +       /** @vma: VMA to map */
> +       struct xe_vma *vma;
> +       /** @immediate: Immediate bind */
> +       bool immediate;
> +       /** @read_only: Read only */
> +       bool read_only;
> +       /** @is_null: is NULL binding */
> +       bool is_null;
> +};
> +
> +/** struct xe_vma_op_unmap - VMA unmap operation */
> +struct xe_vma_op_unmap {
> +       /** @start: start of the VMA unmap */
> +       u64 start;
> +       /** @range: range of the VMA unmap */
> +       u64 range;
> +};
> +
> +/** struct xe_vma_op_remap - VMA remap operation */
> +struct xe_vma_op_remap {
> +       /** @prev: VMA preceding part of a split mapping */
> +       struct xe_vma *prev;
> +       /** @next: VMA subsequent part of a split mapping */
> +       struct xe_vma *next;
> +       /** @start: start of the VMA unmap */
> +       u64 start;
> +       /** @range: range of the VMA unmap */
> +       u64 range;
> +       /** @unmap_done: unmap operation in done */
> +       bool unmap_done;
> +};
> +
> +/** struct xe_vma_op_prefetch - VMA prefetch operation */
> +struct xe_vma_op_prefetch {
> +       /** @region: memory region to prefetch to */
> +       u32 region;
> +};
> +
> +/** enum xe_vma_op_flags - flags for VMA operation */
> +enum xe_vma_op_flags {
> +       /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs
> */
> +       XE_VMA_OP_FIRST         = (0x1 << 0),
> +       /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> +       XE_VMA_OP_LAST          = (0x1 << 1),
> +       /** @XE_VMA_OP_COMMITTED: VMA operation committed */
> +       XE_VMA_OP_COMMITTED     = (0x1 << 2),
> +};
> +
> +/** struct xe_vma_op - VMA operation */
> +struct xe_vma_op {
> +       /** @base: GPUVA base operation */
> +       struct drm_gpuva_op base;
> +       /**
> +        * @ops: GPUVA ops, when set call drm_gpuva_ops_free after
> this
> +        * operations is processed
> +        */
> +       struct drm_gpuva_ops *ops;
> +       /** @engine: engine for this operation */
> +       struct xe_engine *engine;
> +       /**
> +        * @syncs: syncs for this operation, only used on first and
> last
> +        * operation
> +        */
> +       struct xe_sync_entry *syncs;
> +       /** @num_syncs: number of syncs */
> +       u32 num_syncs;
> +       /** @link: async operation link */
> +       struct list_head link;
> +       /**
> +        * @fence: async operation fence, signaled on last operation
> complete
> +        */
> +       struct async_op_fence *fence;
> +       /** @tile_mask: gt mask for this operation */
> +       u64 tile_mask;
> +       /** @flags: operation flags */
> +       enum xe_vma_op_flags flags;
> +
> +#ifdef TEST_VM_ASYNC_OPS_ERROR
> +       /** @inject_error: inject error to test async op error
> handling */
> +       bool inject_error;
> +#endif
> +
> +       union {
> +               /** @map: VMA map operation specific data */
> +               struct xe_vma_op_map map;
> +               /** @unmap: VMA unmap operation specific data */
> +               struct xe_vma_op_unmap unmap;
> +               /** @remap: VMA remap operation specific data */
> +               struct xe_vma_op_remap remap;
> +               /** @prefetch: VMA prefetch operation specific data
> */
> +               struct xe_vma_op_prefetch prefetch;
> +       };
> +};
> +
>  #endif



More information about the Intel-xe mailing list