[Intel-xe] [PATCH 1/5] drm/xe: remove async worker, sync binds, new error handling

Tue Aug 1 00:55:12 UTC 2023

On Wed, 2023-07-26 at 18:39 -0700, Matthew Brost wrote:
> Async worker is gone, all jobs and memory allocations done in IOCTL.

I tried asking some questions to the Documentation patch email some
time ago. If you could answer those questions there I'd be really
happy:

https://patchwork.freedesktop.org/patch/547671/?series=119672&rev=3

> 
> Async vs. sync now means when do bind operations complete relative to
> the IOCTL. Async completes when out-syncs signal while sync completes
> when the IOCTL returns. In-syncs and out-syncs are only allowed in async
> mode.

The documentation patch kinda suggested that there were 2 possible
failure modes for the async binds:
1 - ioctl returns some failure code
2 - ioctl succeeds, but something in the async thread fails after that

The paragraph above suggests case 2 above is not possible anymore. Can
you confirm that?

> 
> The error handling is similar to before, on memory allocation errors
> binds are pause, VM is put in an error state, and the bind IOCTL
> returns -ENOSPC. The user is allowed to issue sync unbinds, with the
> reclaim bit set, while in an error state. Bind operations without the
> reclaim bit set are rejected with -EALREADY until the exits the error
> state. To exit the error issue a restart bind operation which will pick
> up where the original failure left off.

In case errors can't happen after the ioctl returns success:

- Do errors mean "nothing happened" or is it possible that an error may
mean "some stuff was bound but not all"? Is there a way to say "if an
error happens, give me an error code and leave me in the exact same
state we were before"?
  - Corollary: if I only ever submit 1 bind per ioctl, do I get that
behavior?

- If errors always mean nothing happened, then why do do we even have
this "error state" thing? Shouldn't the -ENOSPC be enough to signal
user space that they may have to fix things? Why add all the
complication of the error state and reclaim bit stuff?

- Is there a way to *query* if we're in an error state?

- How do I "get out" of the error state so I can retry binds again?

In case errors can happen after the ioctl returns success:

- How can I figure out an error happened without attempting to bind
another buffer? Is there a way to receive some kind of signal?

Thanks,
Paulo

> 
> Signed-off-by: Matthew Brost <mattthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_engine.c          |   7 +-
>  drivers/gpu/drm/xe/xe_engine_types.h    |   1 +
>  drivers/gpu/drm/xe/xe_exec.c            |  43 --
>  drivers/gpu/drm/xe/xe_sync.c            |  14 +-
>  drivers/gpu/drm/xe/xe_sync.h            |   2 +-
>  drivers/gpu/drm/xe/xe_vm.c              | 692 ++++++------------------
>  drivers/gpu/drm/xe/xe_vm.h              |   2 -
>  drivers/gpu/drm/xe/xe_vm_types.h        |  37 +-
>  drivers/gpu/drm/xe/xe_wait_user_fence.c |  43 +-
>  include/uapi/drm/xe_drm.h               |  86 +--
>  10 files changed, 203 insertions(+), 724 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
> index 0102dad16e29..1c6b84f32823 100644
> --- a/drivers/gpu/drm/xe/xe_engine.c
> +++ b/drivers/gpu/drm/xe/xe_engine.c
> @@ -542,7 +542,10 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
>  	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
>  		return -EINVAL;
>  
> -	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
> +	if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
> +		bool sync = eci[0].engine_class ==
> +			DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
> +	
>  		for_each_gt(gt, xe, id) {
>  			struct xe_engine *new;
>  
> @@ -565,6 +568,8 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data,
>  					       args->width, hwe,
>  					       ENGINE_FLAG_PERSISTENT |
>  					       ENGINE_FLAG_VM |
> +					       (sync ? 0 :
> +						ENGINE_FLAG_VM_ASYNC) |
>  					       (id ?
>  					       ENGINE_FLAG_BIND_ENGINE_CHILD :
>  					       0));
> diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
> index 36bfaeec23f4..4949edfa0980 100644
> --- a/drivers/gpu/drm/xe/xe_engine_types.h
> +++ b/drivers/gpu/drm/xe/xe_engine_types.h
> @@ -59,6 +59,7 @@ struct xe_engine {
>  #define ENGINE_FLAG_VM			BIT(4)
>  #define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(5)
>  #define ENGINE_FLAG_WA			BIT(6)
> +#define ENGINE_FLAG_VM_ASYNC		BIT(7)
>  
>  	/**
>  	 * @flags: flags for this engine, should statically setup aside from ban
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index fff4a9d9d12a..58af1e0ef4d1 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -231,27 +231,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		}
>  	}
>  
> -	/*
> -	 * We can't install a job into the VM dma-resv shared slot before an
> -	 * async VM bind passed in as a fence without the risk of deadlocking as
> -	 * the bind can trigger an eviction which in turn depends on anything in
> -	 * the VM dma-resv shared slots. Not an ideal solution, but we wait for
> -	 * all dependent async VM binds to start (install correct fences into
> -	 * dma-resv slots) before moving forward.
> -	 */
> -	if (!xe_vm_no_dma_fences(vm) &&
> -	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
> -		for (i = 0; i < args->num_syncs; i++) {
> -			struct dma_fence *fence = syncs[i].fence;
> -
> -			if (fence) {
> -				err = xe_vm_async_fence_wait_start(fence);
> -				if (err)
> -					goto err_syncs;
> -			}
> -		}
> -	}
> -
>  retry:
>  	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
>  		err = down_write_killable(&vm->lock);
> @@ -264,28 +243,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (err)
>  		goto err_syncs;
>  
> -	/* We don't allow execs while the VM is in error state */
> -	if (vm->async_ops.error) {
> -		err = vm->async_ops.error;
> -		goto err_unlock_list;
> -	}
> -
> -	/*
> -	 * Extreme corner where we exit a VM error state with a munmap style VM
> -	 * unbind inflight which requires a rebind. In this case the rebind
> -	 * needs to install some fences into the dma-resv slots. The worker to
> -	 * do this queued, let that worker make progress by dropping vm->lock,
> -	 * flushing the worker and retrying the exec.
> -	 */
> -	if (vm->async_ops.munmap_rebind_inflight) {
> -		if (write_locked)
> -			up_write(&vm->lock);
> -		else
> -			up_read(&vm->lock);
> -		flush_work(&vm->async_ops.work);
> -		goto retry;
> -	}
> -
>  	if (write_locked) {
>  		err = xe_vm_userptr_pin(vm);
>  		downgrade_write(&vm->lock);
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 9fcd7802ba30..73ef259aa387 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -18,7 +18,6 @@
>  #include "xe_sched_job_types.h"
>  
>  #define SYNC_FLAGS_TYPE_MASK 0x3
> -#define SYNC_FLAGS_FENCE_INSTALLED	0x10000
>  
>  struct user_fence {
>  	struct xe_device *xe;
> @@ -223,12 +222,11 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
>  	return 0;
>  }
>  
> -bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
> +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
>  			  struct dma_fence *fence)
>  {
> -	if (!(sync->flags & DRM_XE_SYNC_SIGNAL) ||
> -	    sync->flags & SYNC_FLAGS_FENCE_INSTALLED)
> -		return false;
> +	if (!(sync->flags & DRM_XE_SYNC_SIGNAL))
> +		return;
>  
>  	if (sync->chain_fence) {
>  		drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
> @@ -260,12 +258,6 @@ bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
>  		job->user_fence.addr = sync->addr;
>  		job->user_fence.value = sync->timeline_value;
>  	}
> -
> -	/* TODO: external BO? */
> -
> -	sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
> -
> -	return true;
>  }
>  
>  void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
> diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> index 4cbcf7a19911..30958ddc4cdc 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -19,7 +19,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>  int xe_sync_entry_wait(struct xe_sync_entry *sync);
>  int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
>  			   struct xe_sched_job *job);
> -bool xe_sync_entry_signal(struct xe_sync_entry *sync,
> +void xe_sync_entry_signal(struct xe_sync_entry *sync,
>  			  struct xe_sched_job *job,
>  			  struct dma_fence *fence);
>  void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index ddb1b4af6458..77723f680f37 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -545,7 +545,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  	unsigned int fence_count = 0;
>  	LIST_HEAD(preempt_fences);
>  	ktime_t end = 0;
> -	int err;
> +	int err = 0;
>  	long wait;
>  	int __maybe_unused tries = 0;
>  
> @@ -561,22 +561,9 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  	}
>  
>  retry:
> -	if (vm->async_ops.error)
> +	if (vm->ops_state.error || vm->ops_state.munmap_rebind_inflight)
>  		goto out_unlock_outer;
>  
> -	/*
> -	 * Extreme corner where we exit a VM error state with a munmap style VM
> -	 * unbind inflight which requires a rebind. In this case the rebind
> -	 * needs to install some fences into the dma-resv slots. The worker to
> -	 * do this queued, let that worker make progress by dropping vm->lock
> -	 * and trying this again.
> -	 */
> -	if (vm->async_ops.munmap_rebind_inflight) {
> -		up_write(&vm->lock);
> -		flush_work(&vm->async_ops.work);
> -		goto retry;
> -	}
> -
>  	if (xe_vm_userptr_check_repin(vm)) {
>  		err = xe_vm_userptr_pin(vm);
>  		if (err)
> @@ -1187,7 +1174,6 @@ static struct drm_gpuva_fn_ops gpuva_ops = {
>  	.op_alloc = xe_vm_op_alloc,
>  };
>  
> -static void xe_vma_op_work_func(struct work_struct *w);
>  static void vm_destroy_work_func(struct work_struct *w);
>  
>  struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> @@ -1221,9 +1207,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  	INIT_LIST_HEAD(&vm->notifier.rebind_list);
>  	spin_lock_init(&vm->notifier.list_lock);
>  
> -	INIT_LIST_HEAD(&vm->async_ops.pending);
> -	INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func);
> -	spin_lock_init(&vm->async_ops.lock);
> +	INIT_LIST_HEAD(&vm->ops_state.pending);
>  
>  	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
>  
> @@ -1278,11 +1262,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  		vm->batch_invalidate_tlb = false;
>  	}
>  
> -	if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
> -		vm->async_ops.fence.context = dma_fence_context_alloc(1);
> -		vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
> -	}
> -
>  	/* Fill pt_root after allocating scratch tables */
>  	for_each_tile(tile, xe, id) {
>  		if (!vm->pt_root[id])
> @@ -1305,7 +1284,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  			migrate_vm = xe_migrate_get_vm(tile->migrate);
>  			eng = xe_engine_create_class(xe, gt, migrate_vm,
>  						     XE_ENGINE_CLASS_COPY,
> -						     ENGINE_FLAG_VM);
> +						     ENGINE_FLAG_VM |
> +						     ((flags & XE_VM_FLAG_ASYNC_DEFAULT) ?
> +						     ENGINE_FLAG_VM_ASYNC : 0));
>  			xe_vm_put(migrate_vm);
>  			if (IS_ERR(eng)) {
>  				xe_vm_close_and_put(vm);
> @@ -1360,42 +1341,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  	return ERR_PTR(err);
>  }
>  
> -static void flush_async_ops(struct xe_vm *vm)
> -{
> -	queue_work(system_unbound_wq, &vm->async_ops.work);
> -	flush_work(&vm->async_ops.work);
> -}
> -
> -static void vm_error_capture(struct xe_vm *vm, int err,
> -			     u32 op, u64 addr, u64 size)
> -{
> -	struct drm_xe_vm_bind_op_error_capture capture;
> -	u64 __user *address =
> -		u64_to_user_ptr(vm->async_ops.error_capture.addr);
> -	bool in_kthread = !current->mm;
> -
> -	capture.error = err;
> -	capture.op = op;
> -	capture.addr = addr;
> -	capture.size = size;
> -
> -	if (in_kthread) {
> -		if (!mmget_not_zero(vm->async_ops.error_capture.mm))
> -			goto mm_closed;
> -		kthread_use_mm(vm->async_ops.error_capture.mm);
> -	}
> -
> -	if (copy_to_user(address, &capture, sizeof(capture)))
> -		XE_WARN_ON("Copy to user failed");
> -
> -	if (in_kthread) {
> -		kthread_unuse_mm(vm->async_ops.error_capture.mm);
> -		mmput(vm->async_ops.error_capture.mm);
> -	}
> -
> -mm_closed:
> -	wake_up_all(&vm->async_ops.error_capture.wq);
> -}
> +static void vm_bind_ioctl_ops_cleanup(struct xe_vm *vm);
>  
>  static void xe_vm_close(struct xe_vm *vm)
>  {
> @@ -1417,7 +1363,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  	XE_BUG_ON(vm->preempt.num_engines);
>  
>  	xe_vm_close(vm);
> -	flush_async_ops(vm);
>  	if (xe_vm_in_compute_mode(vm))
>  		flush_work(&vm->preempt.rebind_work);
>  
> @@ -1430,6 +1375,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  	}
>  
>  	down_write(&vm->lock);
> +	vm_bind_ioctl_ops_cleanup(vm);
>  	xe_vm_lock(vm, &ww, 0, false);
>  	drm_gpuva_for_each_va_safe(gpuva, next, &vm->mgr) {
>  		vma = gpuva_to_vma(gpuva);
> @@ -1483,9 +1429,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  		xe_vma_destroy_unlocked(vma);
>  	}
>  
> -	if (vm->async_ops.error_capture.addr)
> -		wake_up_all(&vm->async_ops.error_capture.wq);
> -
>  	XE_WARN_ON(!list_empty(&vm->extobj.list));
>  	up_write(&vm->lock);
>  
> @@ -1640,10 +1583,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
>  
>  err_fences:
>  	if (fences) {
> -		while (cur_fence) {
> -			/* FIXME: Rewind the previous binds? */
> +		while (cur_fence)
>  			dma_fence_put(fences[--cur_fence]);
> -		}
>  		kfree(fences);
>  	}
>  
> @@ -1717,100 +1658,24 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
>  
>  err_fences:
>  	if (fences) {
> -		while (cur_fence) {
> -			/* FIXME: Rewind the previous binds? */
> +		while (cur_fence)
>  			dma_fence_put(fences[--cur_fence]);
> -		}
>  		kfree(fences);
>  	}
>  
>  	return ERR_PTR(err);
>  }
>  
> -struct async_op_fence {
> -	struct dma_fence fence;
> -	struct dma_fence *wait_fence;
> -	struct dma_fence_cb cb;
> -	struct xe_vm *vm;
> -	wait_queue_head_t wq;
> -	bool started;
> -};
> -
> -static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
> +static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_engine *e)
>  {
> -	return "xe";
> -}
> -
> -static const char *
> -async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
> -{
> -	return "async_op_fence";
> -}
> -
> -static const struct dma_fence_ops async_op_fence_ops = {
> -	.get_driver_name = async_op_fence_get_driver_name,
> -	.get_timeline_name = async_op_fence_get_timeline_name,
> -};
> -
> -static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> -{
> -	struct async_op_fence *afence =
> -		container_of(cb, struct async_op_fence, cb);
> -
> -	afence->fence.error = afence->wait_fence->error;
> -	dma_fence_signal(&afence->fence);
> -	xe_vm_put(afence->vm);
> -	dma_fence_put(afence->wait_fence);
> -	dma_fence_put(&afence->fence);
> -}
> -
> -static void add_async_op_fence_cb(struct xe_vm *vm,
> -				  struct dma_fence *fence,
> -				  struct async_op_fence *afence)
> -{
> -	int ret;
> -
> -	if (!xe_vm_no_dma_fences(vm)) {
> -		afence->started = true;
> -		smp_wmb();
> -		wake_up_all(&afence->wq);
> -	}
> -
> -	afence->wait_fence = dma_fence_get(fence);
> -	afence->vm = xe_vm_get(vm);
> -	dma_fence_get(&afence->fence);
> -	ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
> -	if (ret == -ENOENT) {
> -		afence->fence.error = afence->wait_fence->error;
> -		dma_fence_signal(&afence->fence);
> -	}
> -	if (ret) {
> -		xe_vm_put(vm);
> -		dma_fence_put(afence->wait_fence);
> -		dma_fence_put(&afence->fence);
> -	}
> -	XE_WARN_ON(ret && ret != -ENOENT);
> -}
> -
> -int xe_vm_async_fence_wait_start(struct dma_fence *fence)
> -{
> -	if (fence->ops == &async_op_fence_ops) {
> -		struct async_op_fence *afence =
> -			container_of(fence, struct async_op_fence, fence);
> -
> -		XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
> -
> -		smp_rmb();
> -		return wait_event_interruptible(afence->wq, afence->started);
> -	}
> -
> -	return 0;
> +	return e ? !(e->flags & ENGINE_FLAG_VM_ASYNC) :
> +		!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
>  }
>  
>  static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>  			struct xe_engine *e, struct xe_sync_entry *syncs,
> -			u32 num_syncs, struct async_op_fence *afence,
> -			bool immediate, bool first_op, bool last_op)
> +			u32 num_syncs, bool immediate, bool first_op,
> +			bool last_op)
>  {
>  	struct dma_fence *fence;
>  
> @@ -1832,17 +1697,18 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>  				xe_sync_entry_signal(&syncs[i], NULL, fence);
>  		}
>  	}
> -	if (afence)
> -		add_async_op_fence_cb(vm, fence, afence);
>  
> +	if (last_op && xe_vm_sync_mode(vm, e))
> +		dma_fence_wait(fence, true);
>  	dma_fence_put(fence);
> +
>  	return 0;
>  }
>  
>  static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
>  		      struct xe_bo *bo, struct xe_sync_entry *syncs,
> -		      u32 num_syncs, struct async_op_fence *afence,
> -		      bool immediate, bool first_op, bool last_op)
> +		      u32 num_syncs, bool immediate, bool first_op,
> +		      bool last_op)
>  {
>  	int err;
>  
> @@ -1855,14 +1721,13 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
>  			return err;
>  	}
>  
> -	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate,
> -			    first_op, last_op);
> +	return __xe_vm_bind(vm, vma, e, syncs, num_syncs, immediate, first_op,
> +			    last_op);
>  }
>  
>  static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
>  			struct xe_engine *e, struct xe_sync_entry *syncs,
> -			u32 num_syncs, struct async_op_fence *afence,
> -			bool first_op, bool last_op)
> +			u32 num_syncs, bool first_op, bool last_op)
>  {
>  	struct dma_fence *fence;
>  
> @@ -1872,103 +1737,18 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
>  	fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op);
>  	if (IS_ERR(fence))
>  		return PTR_ERR(fence);
> -	if (afence)
> -		add_async_op_fence_cb(vm, fence, afence);
>  
>  	xe_vma_destroy(vma, fence);
> +	if (last_op && xe_vm_sync_mode(vm, e))
> +		dma_fence_wait(fence, true);
>  	dma_fence_put(fence);
>  
>  	return 0;
>  }
>  
> -static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
> -					u64 value)
> -{
> -	if (XE_IOCTL_DBG(xe, !value))
> -		return -EINVAL;
> -
> -	if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
> -		return -EOPNOTSUPP;
> -
> -	if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr))
> -		return -EOPNOTSUPP;
> -
> -	vm->async_ops.error_capture.mm = current->mm;
> -	vm->async_ops.error_capture.addr = value;
> -	init_waitqueue_head(&vm->async_ops.error_capture.wq);
> -
> -	return 0;
> -}
> -
> -typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
> -				     u64 value);
> -
> -static const xe_vm_set_property_fn vm_set_property_funcs[] = {
> -	[XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
> -		vm_set_error_capture_address,
> -};
> -
> -static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
> -				    u64 extension)
> -{
> -	u64 __user *address = u64_to_user_ptr(extension);
> -	struct drm_xe_ext_vm_set_property ext;
> -	int err;
> -
> -	err = __copy_from_user(&ext, address, sizeof(ext));
> -	if (XE_IOCTL_DBG(xe, err))
> -		return -EFAULT;
> -
> -	if (XE_IOCTL_DBG(xe, ext.property >=
> -			 ARRAY_SIZE(vm_set_property_funcs)) ||
> -	    XE_IOCTL_DBG(xe, ext.pad) ||
> -	    XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
> -		return -EINVAL;
> -
> -	return vm_set_property_funcs[ext.property](xe, vm, ext.value);
> -}
> -
> -typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
> -				       u64 extension);
> -
> -static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
> -	[XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
> -};
> -
> -#define MAX_USER_EXTENSIONS	16
> -static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
> -			      u64 extensions, int ext_number)
> -{
> -	u64 __user *address = u64_to_user_ptr(extensions);
> -	struct xe_user_extension ext;
> -	int err;
> -
> -	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
> -		return -E2BIG;
> -
> -	err = __copy_from_user(&ext, address, sizeof(ext));
> -	if (XE_IOCTL_DBG(xe, err))
> -		return -EFAULT;
> -
> -	if (XE_IOCTL_DBG(xe, ext.pad) ||
> -	    XE_IOCTL_DBG(xe, ext.name >=
> -			 ARRAY_SIZE(vm_user_extension_funcs)))
> -		return -EINVAL;
> -
> -	err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
> -	if (XE_IOCTL_DBG(xe, err))
> -		return err;
> -
> -	if (ext.next_extension)
> -		return vm_user_extensions(xe, vm, ext.next_extension,
> -					  ++ext_number);
> -
> -	return 0;
> -}
> -
>  #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
>  				    DRM_XE_VM_CREATE_COMPUTE_MODE | \
> -				    DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
> +				    DRM_XE_VM_CREATE_ASYNC_DEFAULT | \
>  				    DRM_XE_VM_CREATE_FAULT_MODE)
>  
>  int xe_vm_create_ioctl(struct drm_device *dev, void *data,
> @@ -2008,12 +1788,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
>  			 !xe->info.supports_usm))
>  		return -EINVAL;
>  
> +	if (XE_IOCTL_DBG(xe, args->extensions))
> +		return -EINVAL;
> +
>  	if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
>  		flags |= XE_VM_FLAG_SCRATCH_PAGE;
>  	if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
>  		flags |= XE_VM_FLAG_COMPUTE_MODE;
> -	if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
> -		flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
> +	if (args->flags & DRM_XE_VM_CREATE_ASYNC_DEFAULT)
> +		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
>  	if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
>  		flags |= XE_VM_FLAG_FAULT_MODE;
>  
> @@ -2021,14 +1804,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
>  	if (IS_ERR(vm))
>  		return PTR_ERR(vm);
>  
> -	if (args->extensions) {
> -		err = vm_user_extensions(xe, vm, args->extensions, 0);
> -		if (XE_IOCTL_DBG(xe, err)) {
> -			xe_vm_close_and_put(vm);
> -			return err;
> -		}
> -	}
> -
>  	mutex_lock(&xef->vm.lock);
>  	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
>  	mutex_unlock(&xef->vm.lock);
> @@ -2098,8 +1873,7 @@ static const u32 region_to_mem_type[] = {
>  static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>  			  struct xe_engine *e, u32 region,
>  			  struct xe_sync_entry *syncs, u32 num_syncs,
> -			  struct async_op_fence *afence, bool first_op,
> -			  bool last_op)
> +			  bool first_op, bool last_op)
>  {
>  	int err;
>  
> @@ -2113,7 +1887,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>  
>  	if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
>  		return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs,
> -				  afence, true, first_op, last_op);
> +				  true, first_op, last_op);
>  	} else {
>  		int i;
>  
> @@ -2123,8 +1897,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
>  				xe_sync_entry_signal(&syncs[i], NULL,
>  						     dma_fence_get_stub());
>  		}
> -		if (afence)
> -			dma_fence_signal(&afence->fence);
> +
>  		return 0;
>  	}
>  }
> @@ -2146,49 +1919,10 @@ static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
>  	tv->bo = xe_vm_ttm_bo(vm);
>  }
>  
> -static void vm_set_async_error(struct xe_vm *vm, int err)
> +static void xe_vm_set_error(struct xe_vm *vm, int err)
>  {
>  	lockdep_assert_held(&vm->lock);
> -	vm->async_ops.error = err;
> -}
> -
> -static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
> -				    u64 addr, u64 range, u32 op)
> -{
> -	struct xe_device *xe = vm->xe;
> -	struct xe_vma *vma;
> -	bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
> -
> -	lockdep_assert_held(&vm->lock);
> -
> -	switch (VM_BIND_OP(op)) {
> -	case XE_VM_BIND_OP_MAP:
> -	case XE_VM_BIND_OP_MAP_USERPTR:
> -		vma = xe_vm_find_overlapping_vma(vm, addr, range);
> -		if (XE_IOCTL_DBG(xe, vma && !async))
> -			return -EBUSY;
> -		break;
> -	case XE_VM_BIND_OP_UNMAP:
> -	case XE_VM_BIND_OP_PREFETCH:
> -		vma = xe_vm_find_overlapping_vma(vm, addr, range);
> -		if (XE_IOCTL_DBG(xe, !vma))
> -			/* Not an actual error, IOCTL cleans up returns and 0 */
> -			return -ENODATA;
> -		if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr ||
> -				      xe_vma_end(vma) != addr + range) && !async))
> -			return -EINVAL;
> -		break;
> -	case XE_VM_BIND_OP_UNMAP_ALL:
> -		if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list)))
> -			/* Not an actual error, IOCTL cleans up returns and 0 */
> -			return -ENODATA;
> -		break;
> -	default:
> -		XE_BUG_ON("NOT POSSIBLE");
> -		return -EINVAL;
> -	}
> -
> -	return 0;
> +	vm->ops_state.error = err;
>  }
>  
>  static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
> @@ -2417,41 +2151,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  {
>  	struct xe_vma_op *last_op = NULL;
>  	struct list_head *async_list = NULL;
> -	struct async_op_fence *fence = NULL;
> -	int err, i;
> +	int i;
>  
>  	lockdep_assert_held_write(&vm->lock);
> -	XE_BUG_ON(num_ops_list > 1 && !async);
> -
> -	if (num_syncs && async) {
> -		u64 seqno;
> -
> -		fence = kmalloc(sizeof(*fence), GFP_KERNEL);
> -		if (!fence)
> -			return -ENOMEM;
> -
> -		seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
> -		dma_fence_init(&fence->fence, &async_op_fence_ops,
> -			       &vm->async_ops.lock, e ? e->bind.fence_ctx :
> -			       vm->async_ops.fence.context, seqno);
> -
> -		if (!xe_vm_no_dma_fences(vm)) {
> -			fence->vm = vm;
> -			fence->started = false;
> -			init_waitqueue_head(&fence->wq);
> -		}
> -	}
>  
>  	for (i = 0; i < num_ops_list; ++i) {
>  		struct drm_gpuva_ops *__ops = ops[i];
>  		struct drm_gpuva_op *__op;
> +		bool has_ops = false;
>  
>  		drm_gpuva_for_each_op(__op, __ops) {
>  			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
>  			bool first = !async_list;
>  
> -			XE_BUG_ON(!first && !async);
> -
> +			has_ops = true;
>  			INIT_LIST_HEAD(&op->link);
>  			if (first)
>  				async_list = ops_list;
> @@ -2473,10 +2186,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  				vma = new_vma(vm, &op->base.map,
>  					      op->tile_mask, op->map.read_only,
>  					      op->map.is_null);
> -				if (IS_ERR(vma)) {
> -					err = PTR_ERR(vma);
> -					goto free_fence;
> -				}
> +				if (IS_ERR(vma))
> +					return PTR_ERR(vma);
>  
>  				op->map.vma = vma;
>  				break;
> @@ -2501,10 +2212,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  					vma = new_vma(vm, op->base.remap.prev,
>  						      op->tile_mask, read_only,
>  						      is_null);
> -					if (IS_ERR(vma)) {
> -						err = PTR_ERR(vma);
> -						goto free_fence;
> -					}
> +					if (IS_ERR(vma))
> +						return PTR_ERR(vma);
>  
>  					op->remap.prev = vma;
>  
> @@ -2536,10 +2245,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  					vma = new_vma(vm, op->base.remap.next,
>  						      op->tile_mask, read_only,
>  						      is_null);
> -					if (IS_ERR(vma)) {
> -						err = PTR_ERR(vma);
> -						goto free_fence;
> -					}
> +					if (IS_ERR(vma))
> +						return PTR_ERR(vma);
>  
>  					op->remap.next = vma;
>  
> @@ -2566,9 +2273,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  			}
>  
>  			last_op = op;
> +
>  		}
>  
> -		last_op->ops = __ops;
> +		if (has_ops) {
> +			last_op->ops = __ops;
> +		} else {
> +			drm_gpuva_ops_free(&vm->mgr, __ops);
> +			ops[i] = NULL;
> +		}
>  	}
>  
>  	if (!last_op)
> @@ -2577,13 +2290,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e,
>  	last_op->flags |= XE_VMA_OP_LAST;
>  	last_op->num_syncs = num_syncs;
>  	last_op->syncs = syncs;
> -	last_op->fence = fence;
>  
>  	return 0;
> -
> -free_fence:
> -	kfree(fence);
> -	return err;
>  }
>  
>  static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
> @@ -2674,7 +2382,7 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  	switch (op->base.op) {
>  	case DRM_GPUVA_OP_MAP:
>  		err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma),
> -				 op->syncs, op->num_syncs, op->fence,
> +				 op->syncs, op->num_syncs,
>  				 op->map.immediate || !xe_vm_in_fault_mode(vm),
>  				 op->flags & XE_VMA_OP_FIRST,
>  				 op->flags & XE_VMA_OP_LAST);
> @@ -2686,15 +2394,14 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  
>  		if (!op->remap.unmap_done) {
>  			if (prev || next) {
> -				vm->async_ops.munmap_rebind_inflight = true;
> +				vm->ops_state.munmap_rebind_inflight = true;
>  				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
>  			}
>  			err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
>  					   op->num_syncs,
> -					   !prev && !next ? op->fence : NULL,
>  					   op->flags & XE_VMA_OP_FIRST,
> -					   op->flags & XE_VMA_OP_LAST && !prev &&
> -					   !next);
> +					   op->flags & XE_VMA_OP_LAST &&
> +					   !prev && !next);
>  			if (err)
>  				break;
>  			op->remap.unmap_done = true;
> @@ -2704,8 +2411,7 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
>  			err = xe_vm_bind(vm, op->remap.prev, op->engine,
>  					 xe_vma_bo(op->remap.prev), op->syncs,
> -					 op->num_syncs,
> -					 !next ? op->fence : NULL, true, false,
> +					 op->num_syncs, true, false,
>  					 op->flags & XE_VMA_OP_LAST && !next);
>  			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
>  			if (err)
> @@ -2718,26 +2424,25 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  			err = xe_vm_bind(vm, op->remap.next, op->engine,
>  					 xe_vma_bo(op->remap.next),
>  					 op->syncs, op->num_syncs,
> -					 op->fence, true, false,
> +					 true, false,
>  					 op->flags & XE_VMA_OP_LAST);
>  			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
>  			if (err)
>  				break;
>  			op->remap.next = NULL;
>  		}
> -		vm->async_ops.munmap_rebind_inflight = false;
> +		vm->ops_state.munmap_rebind_inflight = false;
>  
>  		break;
>  	}
>  	case DRM_GPUVA_OP_UNMAP:
>  		err = xe_vm_unbind(vm, vma, op->engine, op->syncs,
> -				   op->num_syncs, op->fence,
> -				   op->flags & XE_VMA_OP_FIRST,
> +				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
>  				   op->flags & XE_VMA_OP_LAST);
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
>  		err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region,
> -				     op->syncs, op->num_syncs, op->fence,
> +				     op->syncs, op->num_syncs,
>  				     op->flags & XE_VMA_OP_FIRST,
>  				     op->flags & XE_VMA_OP_LAST);
>  		break;
> @@ -2811,20 +2516,17 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
>  {
>  	bool last = op->flags & XE_VMA_OP_LAST;
>  
> +	lockdep_assert_held_write(&vm->lock);
> +
>  	if (last) {
>  		while (op->num_syncs--)
>  			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
>  		kfree(op->syncs);
>  		if (op->engine)
>  			xe_engine_put(op->engine);
> -		if (op->fence)
> -			dma_fence_put(&op->fence->fence);
>  	}
> -	if (!list_empty(&op->link)) {
> -		spin_lock_irq(&vm->async_ops.lock);
> +	if (!list_empty(&op->link))
>  		list_del(&op->link);
> -		spin_unlock_irq(&vm->async_ops.lock);
> -	}
>  	if (op->ops)
>  		drm_gpuva_ops_free(&vm->mgr, op->ops);
>  	if (last)
> @@ -2881,122 +2583,23 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
>  	}
>  }
>  
> -static struct xe_vma_op *next_vma_op(struct xe_vm *vm)
> -{
> -	return list_first_entry_or_null(&vm->async_ops.pending,
> -					struct xe_vma_op, link);
> -}
> -
> -static void xe_vma_op_work_func(struct work_struct *w)
> -{
> -	struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
> -
> -	for (;;) {
> -		struct xe_vma_op *op;
> -		int err;
> -
> -		if (vm->async_ops.error && !xe_vm_is_closed(vm))
> -			break;
> -
> -		spin_lock_irq(&vm->async_ops.lock);
> -		op = next_vma_op(vm);
> -		spin_unlock_irq(&vm->async_ops.lock);
> -
> -		if (!op)
> -			break;
> -
> -		if (!xe_vm_is_closed(vm)) {
> -			down_write(&vm->lock);
> -			err = xe_vma_op_execute(vm, op);
> -			if (err) {
> -				drm_warn(&vm->xe->drm,
> -					 "Async VM op(%d) failed with %d",
> -					 op->base.op, err);
> -				vm_set_async_error(vm, err);
> -				up_write(&vm->lock);
> -
> -				if (vm->async_ops.error_capture.addr)
> -					vm_error_capture(vm, err, 0, 0, 0);
> -				break;
> -			}
> -			up_write(&vm->lock);
> -		} else {
> -			struct xe_vma *vma;
> -
> -			switch (op->base.op) {
> -			case DRM_GPUVA_OP_REMAP:
> -				vma = gpuva_to_vma(op->base.remap.unmap->va);
> -				trace_xe_vma_flush(vma);
> -
> -				down_write(&vm->lock);
> -				xe_vma_destroy_unlocked(vma);
> -				up_write(&vm->lock);
> -				break;
> -			case DRM_GPUVA_OP_UNMAP:
> -				vma = gpuva_to_vma(op->base.unmap.va);
> -				trace_xe_vma_flush(vma);
> -
> -				down_write(&vm->lock);
> -				xe_vma_destroy_unlocked(vma);
> -				up_write(&vm->lock);
> -				break;
> -			default:
> -				/* Nothing to do */
> -				break;
> -			}
> -
> -			if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> -						   &op->fence->fence.flags)) {
> -				if (!xe_vm_no_dma_fences(vm)) {
> -					op->fence->started = true;
> -					wake_up_all(&op->fence->wq);
> -				}
> -				dma_fence_signal(&op->fence->fence);
> -			}
> -		}
> -
> -		xe_vma_op_cleanup(vm, op);
> -	}
> -}
> -
>  static int vm_bind_ioctl_ops_commit(struct xe_vm *vm,
> -				    struct list_head *ops_list, bool async)
> +				    struct list_head *ops_list,
> +				    bool reclaim)
>  {
> -	struct xe_vma_op *op, *last_op, *next;
> +	struct xe_vma_op *op, *next;
>  	int err;
>  
>  	lockdep_assert_held_write(&vm->lock);
>  
>  	list_for_each_entry(op, ops_list, link) {
> -		last_op = op;
>  		err = xe_vma_op_commit(vm, op);
>  		if (err)
>  			goto unwind;
>  	}
>  
> -	if (!async) {
> -		err = xe_vma_op_execute(vm, last_op);
> -		if (err)
> -			goto unwind;
> -		xe_vma_op_cleanup(vm, last_op);
> -	} else {
> -		int i;
> -		bool installed = false;
> -
> -		for (i = 0; i < last_op->num_syncs; i++)
> -			installed |= xe_sync_entry_signal(&last_op->syncs[i],
> -							  NULL,
> -							  &last_op->fence->fence);
> -		if (!installed && last_op->fence)
> -			dma_fence_signal(&last_op->fence->fence);
> -
> -		spin_lock_irq(&vm->async_ops.lock);
> -		list_splice_tail(ops_list, &vm->async_ops.pending);
> -		spin_unlock_irq(&vm->async_ops.lock);
> -
> -		if (!vm->async_ops.error)
> -			queue_work(system_unbound_wq, &vm->async_ops.work);
> -	}
> +	if (!reclaim)
> +		list_splice_tail(ops_list, &vm->ops_state.pending);
>  
>  	return 0;
>  
> @@ -3034,15 +2637,48 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
>  	}
>  }
>  
> +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> +				     struct list_head *ops_list)
> +{
> +	struct xe_vma_op *op, *next;
> +	int err;
> +
> +	lockdep_assert_held_write(&vm->lock);
> +
> +	list_for_each_entry_safe(op, next, ops_list, link) {
> +		err = xe_vma_op_execute(vm, op);
> +		if (err) {
> +			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
> +				 op->base.op, err);
> +			xe_vm_set_error(vm, err);
> +			return -ENOSPC;
> +		}
> +		xe_vma_op_cleanup(vm, op);
> +	}
> +
> +	return 0;
> +}
> +
> +static void vm_bind_ioctl_ops_cleanup(struct xe_vm *vm)
> +{
> +	struct xe_vma_op *op, *next;
> +
> +	lockdep_assert_held_write(&vm->lock);
> +
> +	list_for_each_entry_safe(op, next, &vm->ops_state.pending, link)
> +		xe_vma_op_cleanup(vm, op);
> +}
> +
>  #ifdef TEST_VM_ASYNC_OPS_ERROR
>  #define SUPPORTED_FLAGS	\
>  	(FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
>  	 XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \
> -	 XE_VM_BIND_FLAG_NULL | 0xffff)
> +	 XE_VM_BIND_FLAG_NULL | XE_VM_BIND_FLAG_RECLAIM | 0xffff)
>  #else
>  #define SUPPORTED_FLAGS	\
>  	(XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
> -	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff)
> +	 XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | \
> +	 XE_VM_BIND_FLAG_RECLAIM | 0xffff)
>  #endif
>  #define XE_64K_PAGE_MASK 0xffffull
>  
> @@ -3051,7 +2687,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
>  static int vm_bind_ioctl_check_args(struct xe_device *xe,
>  				    struct drm_xe_vm_bind *args,
>  				    struct drm_xe_vm_bind_op **bind_ops,
> -				    bool *async)
> +				    bool *async, bool *reclaim)
>  {
>  	int err;
>  	int i;
> @@ -3092,31 +2728,31 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
>  
>  		if (i == 0) {
>  			*async = !!(op & XE_VM_BIND_FLAG_ASYNC);
> -		} else if (XE_IOCTL_DBG(xe, !*async) ||
> -			   XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
> +			*reclaim = !!(op & XE_VM_BIND_FLAG_RECLAIM);
> +			if (XE_IOCTL_DBG(xe, !*async && args->num_syncs) ||
> +			    XE_IOCTL_DBG(xe, *async && *reclaim)) {
> +				err = -EINVAL;
> +				goto free_bind_ops;
> +			}
> +		} else if (XE_IOCTL_DBG(xe, *async !=
> +					!!(op & XE_VM_BIND_FLAG_ASYNC)) ||
> +			   XE_IOCTL_DBG(xe, *reclaim !=
> +					!!(op & XE_VM_BIND_FLAG_RECLAIM)) ||
>  			   XE_IOCTL_DBG(xe, VM_BIND_OP(op) ==
>  					XE_VM_BIND_OP_RESTART)) {
>  			err = -EINVAL;
>  			goto free_bind_ops;
>  		}
>  
> -		if (XE_IOCTL_DBG(xe, !*async &&
> -				 VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
> -			err = -EINVAL;
> -			goto free_bind_ops;
> -		}
> -
> -		if (XE_IOCTL_DBG(xe, !*async &&
> -				 VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
> -			err = -EINVAL;
> -			goto free_bind_ops;
> -		}
> -
>  		if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) >
>  				 XE_VM_BIND_OP_PREFETCH) ||
>  		    XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) ||
>  		    XE_IOCTL_DBG(xe, obj && is_null) ||
>  		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
> +		    XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP &&
> +				 VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL &&
> +				 VM_BIND_OP(op) != XE_VM_BIND_OP_RESTART &&
> +				 *reclaim) ||
>  		    XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP &&
>  				 is_null) ||
>  		    XE_IOCTL_DBG(xe, !obj &&
> @@ -3175,11 +2811,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	struct xe_sync_entry *syncs = NULL;
>  	struct drm_xe_vm_bind_op *bind_ops;
>  	LIST_HEAD(ops_list);
> -	bool async;
> +	bool async, reclaim, restart = false;
>  	int err;
>  	int i;
>  
> -	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
> +	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async, &reclaim);
>  	if (err)
>  		return err;
>  
> @@ -3194,6 +2830,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  			err = -EINVAL;
>  			goto put_engine;
>  		}
> +
> +		if (XE_IOCTL_DBG(xe, async !=
> +				 !!(e->flags & ENGINE_FLAG_VM_ASYNC))) {
> +			err = -EINVAL;
> +			goto put_engine;
> +		}
>  	}
>  
>  	vm = xe_vm_lookup(xef, args->vm_id);
> @@ -3202,6 +2844,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto put_engine;
>  	}
>  
> +	if (!args->engine_id) {
> +		if (XE_IOCTL_DBG(xe, async !=
> +				 !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) {
> +			err = -EINVAL;
> +			goto put_vm;
> +		}
> +	}
> +
>  	err = down_write_killable(&vm->lock);
>  	if (err)
>  		goto put_vm;
> @@ -3211,32 +2861,29 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto release_vm_lock;
>  	}
>  
> +	if (XE_IOCTL_DBG(xe, vm->ops_state.error && !reclaim)) {
> +		err = -EALREADY;
> +		goto release_vm_lock;
> +	}
> +
> +	if (XE_IOCTL_DBG(xe, !vm->ops_state.error && reclaim)) {
> +		err = -EINVAL;
> +		goto release_vm_lock;
> +	}
> +
>  	if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
> -		if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
> -			err = -EOPNOTSUPP;
> -		if (XE_IOCTL_DBG(xe, !err && args->num_syncs))
> +		if (XE_IOCTL_DBG(xe, args->num_syncs))
>  			err = EINVAL;
> -		if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error))
> +		if (XE_IOCTL_DBG(xe, !err && !vm->ops_state.error))
>  			err = -EPROTO;
> +		if (err)
> +			goto release_vm_lock;
>  
> -		if (!err) {
> -			trace_xe_vm_restart(vm);
> -			vm_set_async_error(vm, 0);
> -
> -			queue_work(system_unbound_wq, &vm->async_ops.work);
> -
> -			/* Rebinds may have been blocked, give worker a kick */
> -			if (xe_vm_in_compute_mode(vm))
> -				xe_vm_queue_rebind_worker(vm);
> -		}
> -
> -		goto release_vm_lock;
> -	}
> +		trace_xe_vm_restart(vm);
> +		xe_vm_set_error(vm, 0);
> +		restart = true;
>  
> -	if (XE_IOCTL_DBG(xe, !vm->async_ops.error &&
> -			 async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
> -		err = -EOPNOTSUPP;
> -		goto release_vm_lock;
> +		goto execute;
>  	}
>  
>  	for (i = 0; i < args->num_binds; ++i) {
> @@ -3324,17 +2971,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  			goto free_syncs;
>  	}
>  
> -	/* Do some error checking first to make the unwind easier */
> -	for (i = 0; i < args->num_binds; ++i) {
> -		u64 range = bind_ops[i].range;
> -		u64 addr = bind_ops[i].addr;
> -		u32 op = bind_ops[i].op;
> -
> -		err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
> -		if (err)
> -			goto free_syncs;
> -	}
> -
>  	for (i = 0; i < args->num_binds; ++i) {
>  		u64 range = bind_ops[i].range;
>  		u64 addr = bind_ops[i].addr;
> @@ -3358,10 +2994,28 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (err)
>  		goto unwind_ops;
>  
> -	err = vm_bind_ioctl_ops_commit(vm, &ops_list, async);
> +	xe_vm_get(vm);
> +	if (e)
> +		xe_engine_get(e);
> +	err = vm_bind_ioctl_ops_commit(vm, &ops_list, reclaim);
> +
> +execute:
> +	if (!err)
> +		err = vm_bind_ioctl_ops_execute(vm, reclaim && !restart ?
> +						&ops_list :
> +						&vm->ops_state.pending);
> +
> +	/* Rebinds may have been blocked, give worker a kick */
> +	if (!err && restart && xe_vm_in_compute_mode(vm))
> +		xe_vm_queue_rebind_worker(vm);
> +
>  	up_write(&vm->lock);
>  
> -	for (i = 0; i < args->num_binds; ++i)
> +	if (e)
> +		xe_engine_put(e);
> +	xe_vm_put(vm);
> +
> +	for (i = 0; bos && i < args->num_binds; ++i)
>  		xe_bo_put(bos[i]);
>  
>  	kfree(bos);
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 4db777d7e375..3d874456a9f2 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -179,8 +179,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
>  
>  int xe_vm_invalidate_vma(struct xe_vma *vma);
>  
> -int xe_vm_async_fence_wait_start(struct dma_fence *fence);
> -
>  extern struct ttm_device_funcs xe_ttm_funcs;
>  
>  struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 261a4b0e8570..ba05e7069dda 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -17,7 +17,6 @@
>  #include "xe_pt_types.h"
>  #include "xe_range_fence.h"
>  
> -struct async_op_fence;
>  struct xe_bo;
>  struct xe_sync_entry;
>  struct xe_vm;
> @@ -163,7 +162,7 @@ struct xe_vm {
>  	 */
>  #define XE_VM_FLAG_64K			BIT(0)
>  #define XE_VM_FLAG_COMPUTE_MODE		BIT(1)
> -#define XE_VM_FLAG_ASYNC_BIND_OPS	BIT(2)
> +#define XE_VM_FLAG_ASYNC_DEFAULT	BIT(2)
>  #define XE_VM_FLAG_MIGRATION		BIT(3)
>  #define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
>  #define XE_VM_FLAG_FAULT_MODE		BIT(5)
> @@ -214,40 +213,18 @@ struct xe_vm {
>  		struct list_head list;
>  	} extobj;
>  
> -	/** @async_ops: async VM operations (bind / unbinds) */
> +	/** @ops_state: VM operations (bind / unbinds) state*/
>  	struct {
> -		/** @list: list of pending async VM ops */
> +		/** @list: list of pending VM ops */
>  		struct list_head pending;
> -		/** @work: worker to execute async VM ops */
> -		struct work_struct work;
> -		/** @lock: protects list of pending async VM ops and fences */
> -		spinlock_t lock;
> -		/** @error_capture: error capture state */
> -		struct {
> -			/** @mm: user MM */
> -			struct mm_struct *mm;
> -			/**
> -			 * @addr: user pointer to copy error capture state too
> -			 */
> -			u64 addr;
> -			/** @wq: user fence wait queue for VM errors */
> -			wait_queue_head_t wq;
> -		} error_capture;
> -		/** @fence: fence state */
> -		struct {
> -			/** @context: context of async fence */
> -			u64 context;
> -			/** @seqno: seqno of async fence */
> -			u32 seqno;
> -		} fence;
> -		/** @error: error state for async VM ops */
> +		/** @error: error state for VM ops */
>  		int error;
>  		/**
>  		 * @munmap_rebind_inflight: an munmap style VM bind is in the
>  		 * middle of a set of ops which requires a rebind at the end.
>  		 */
>  		bool munmap_rebind_inflight;
> -	} async_ops;
> +	} ops_state;
>  
>  	/** @userptr: user pointer state */
>  	struct {
> @@ -404,10 +381,6 @@ struct xe_vma_op {
>  	u32 num_syncs;
>  	/** @link: async operation link */
>  	struct list_head link;
> -	/**
> -	 * @fence: async operation fence, signaled on last operation complete
> -	 */
> -	struct async_op_fence *fence;
>  	/** @tile_mask: gt mask for this operation */
>  	u8 tile_mask;
>  	/** @flags: operation flags */
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> index c4202df1d4f0..9eb5320a4509 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> @@ -13,7 +13,6 @@
>  #include "xe_device.h"
>  #include "xe_gt.h"
>  #include "xe_macros.h"
> -#include "xe_vm.h"
>  
>  static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
>  {
> @@ -81,8 +80,7 @@ static int check_hw_engines(struct xe_device *xe,
>  }
>  
>  #define VALID_FLAGS	(DRM_XE_UFENCE_WAIT_SOFT_OP | \
> -			 DRM_XE_UFENCE_WAIT_ABSTIME | \
> -			 DRM_XE_UFENCE_WAIT_VM_ERROR)
> +			 DRM_XE_UFENCE_WAIT_ABSTIME)
>  #define MAX_OP		DRM_XE_UFENCE_WAIT_LTE
>  
>  static unsigned long to_jiffies_timeout(struct drm_xe_wait_user_fence *args)
> @@ -109,11 +107,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
>  	struct drm_xe_engine_class_instance __user *user_eci =
>  		u64_to_user_ptr(args->instances);
> -	struct xe_vm *vm = NULL;
>  	u64 addr = args->addr;
>  	int err;
> -	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
> -		args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
> +	bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP;
>  	unsigned long timeout;
>  	ktime_t start;
>  
> @@ -134,8 +130,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  	if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines))
>  		return -EINVAL;
>  
> -	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
> -			 addr & 0x7))
> +	if (XE_IOCTL_DBG(xe, addr & 0x7))
>  		return -EINVAL;
>  
>  	if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
> @@ -153,22 +148,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  			return -EINVAL;
>  	}
>  
> -	if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
> -		if (XE_IOCTL_DBG(xe, args->vm_id >> 32))
> -			return -EINVAL;
> -
> -		vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
> -		if (XE_IOCTL_DBG(xe, !vm))
> -			return -ENOENT;
> -
> -		if (XE_IOCTL_DBG(xe, !vm->async_ops.error_capture.addr)) {
> -			xe_vm_put(vm);
> -			return -EOPNOTSUPP;
> -		}
> -
> -		addr = vm->async_ops.error_capture.addr;
> -	}
> -
>  	/*
>  	 * For negative timeout we want to wait "forever" by setting
>  	 * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also
> @@ -188,15 +167,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  	 * hardware engine. Open coding as 'do_compare' can sleep which doesn't
>  	 * work with the wait_event_* macros.
>  	 */
> -	if (vm)
> -		add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
> -	else
> -		add_wait_queue(&xe->ufence_wq, &w_wait);
> +	add_wait_queue(&xe->ufence_wq, &w_wait);
>  	for (;;) {
> -		if (vm && xe_vm_is_closed(vm)) {
> -			err = -ENODEV;
> -			break;
> -		}
>  		err = do_compare(addr, args->value, args->mask, args->op);
>  		if (err <= 0)
>  			break;
> @@ -213,12 +185,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  
>  		timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
>  	}
> -	if (vm) {
> -		remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
> -		xe_vm_put(vm);
> -	} else {
> -		remove_wait_queue(&xe->ufence_wq, &w_wait);
> -	}
> +	remove_wait_queue(&xe->ufence_wq, &w_wait);
>  
>  	if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) {
>  		args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start));
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 259de80376b4..93e097942563 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -475,50 +475,13 @@ struct drm_xe_gem_mmap_offset {
>  	__u64 reserved[2];
>  };
>  
> -/**
> - * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture
> - */
> -struct drm_xe_vm_bind_op_error_capture {
> -	/** @error: errno that occured */
> -	__s32 error;
> -
> -	/** @op: operation that encounter an error */
> -	__u32 op;
> -
> -	/** @addr: address of bind op */
> -	__u64 addr;
> -
> -	/** @size: size of bind */
> -	__u64 size;
> -};
> -
> -/** struct drm_xe_ext_vm_set_property - VM set property extension */
> -struct drm_xe_ext_vm_set_property {
> -	/** @base: base user extension */
> -	struct xe_user_extension base;
> -
> -#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS		0
> -	/** @property: property to set */
> -	__u32 property;
> -
> -	/** @pad: MBZ */
> -	__u32 pad;
> -
> -	/** @value: property value */
> -	__u64 value;
> -
> -	/** @reserved: Reserved */
> -	__u64 reserved[2];
> -};
> -
>  struct drm_xe_vm_create {
> -#define XE_VM_EXTENSION_SET_PROPERTY	0
>  	/** @extensions: Pointer to the first extension struct, if any */
>  	__u64 extensions;
>  
>  #define DRM_XE_VM_CREATE_SCRATCH_PAGE	(0x1 << 0)
>  #define DRM_XE_VM_CREATE_COMPUTE_MODE	(0x1 << 1)
> -#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS	(0x1 << 2)
> +#define DRM_XE_VM_CREATE_ASYNC_DEFAULT	(0x1 << 2)
>  #define DRM_XE_VM_CREATE_FAULT_MODE	(0x1 << 3)
>  	/** @flags: Flags */
>  	__u32 flags;
> @@ -583,30 +546,6 @@ struct drm_xe_vm_bind_op {
>  #define XE_VM_BIND_OP_PREFETCH		0x5
>  
>  #define XE_VM_BIND_FLAG_READONLY	(0x1 << 16)
> -	/*
> -	 * A bind ops completions are always async, hence the support for out
> -	 * sync. This flag indicates the allocation of the memory for new page
> -	 * tables and the job to program the pages tables is asynchronous
> -	 * relative to the IOCTL. That part of a bind operation can fail under
> -	 * memory pressure, the job in practice can't fail unless the system is
> -	 * totally shot.
> -	 *
> -	 * If this flag is clear and the IOCTL doesn't return an error, in
> -	 * practice the bind op is good and will complete.
> -	 *
> -	 * If this flag is set and doesn't return an error, the bind op can
> -	 * still fail and recovery is needed. If configured, the bind op that
> -	 * caused the error will be captured in drm_xe_vm_bind_op_error_capture.
> -	 * Once the user sees the error (via a ufence +
> -	 * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory
> -	 * via non-async unbinds, and then restart all queue'd async binds op via
> -	 * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
> -	 * VM.
> -	 *
> -	 * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is
> -	 * configured in the VM and must be set if the VM is configured with
> -	 * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
> -	 */
>  #define XE_VM_BIND_FLAG_ASYNC		(0x1 << 17)
>  	/*
>  	 * Valid on a faulting VM only, do the MAP operation immediately rather
> @@ -621,6 +560,7 @@ struct drm_xe_vm_bind_op {
>  	 * intended to implement VK sparse bindings.
>  	 */
>  #define XE_VM_BIND_FLAG_NULL		(0x1 << 19)
> +#define XE_VM_BIND_FLAG_RECLAIM		(0x1 << 20)
>  	/** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
>  	__u32 op;
>  
> @@ -735,10 +675,11 @@ struct drm_xe_engine_class_instance {
>  #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE	3
>  #define DRM_XE_ENGINE_CLASS_COMPUTE		4
>  	/*
> -	 * Kernel only class (not actual hardware engine class). Used for
> +	 * Kernel only classes (not actual hardware engine class). Used for
>  	 * creating ordered queues of VM bind operations.
>  	 */
> -#define DRM_XE_ENGINE_CLASS_VM_BIND		5
> +#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC	5
> +#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC	6
>  	__u16 engine_class;
>  
>  	__u16 engine_instance;
> @@ -908,18 +849,10 @@ struct drm_xe_wait_user_fence {
>  	/** @extensions: Pointer to the first extension struct, if any */
>  	__u64 extensions;
>  
> -	union {
> -		/**
> -		 * @addr: user pointer address to wait on, must qword aligned
> -		 */
> -		__u64 addr;
> -
> -		/**
> -		 * @vm_id: The ID of the VM which encounter an error used with
> -		 * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear.
> -		 */
> -		__u64 vm_id;
> -	};
> +	/**
> +	 * @addr: user pointer address to wait on, must qword aligned
> +	 */
> +	__u64 addr;
>  
>  #define DRM_XE_UFENCE_WAIT_EQ	0
>  #define DRM_XE_UFENCE_WAIT_NEQ	1
> @@ -932,7 +865,6 @@ struct drm_xe_wait_user_fence {
>  
>  #define DRM_XE_UFENCE_WAIT_SOFT_OP	(1 << 0)	/* e.g. Wait on VM bind */
>  #define DRM_XE_UFENCE_WAIT_ABSTIME	(1 << 1)
> -#define DRM_XE_UFENCE_WAIT_VM_ERROR	(1 << 2)
>  	/** @flags: wait flags */
>  	__u16 flags;
>