[RFC PATCH 7/7] drm/xe/uapi: Uniform async vs sync handling

Thomas Hellström thomas.hellstrom at linux.intel.com
Mon Dec 11 18:11:15 UTC 2023


On Mon, 2023-12-11 at 16:49 +0000, Matthew Brost wrote:
> On Mon, Dec 11, 2023 at 04:43:06PM +0100, Thomas Hellström wrote:
> > 
> > On 12/8/23 10:45, Matthew Brost wrote:
> > > On Fri, Dec 08, 2023 at 04:00:37PM +0100, Thomas Hellström wrote:
> > > > On 12/7/23 06:57, Matthew Brost wrote:
> > > > > Remove concept of async vs sync VM bind queues, rather make
> > > > > async vs
> > > > > sync a per IOCTL choice. Since this is per IOCTL, it makes
> > > > > sense to have
> > > > > a singular flag IOCTL rather than per VM bind op flag too.
> > > > > Add
> > > > > DRM_XE_SYNCS_FLAG_WAIT_FOR_OP which is an input sync flag to
> > > > > support
> > > > > this. Support this new flag for both the VM bind IOCTL and
> > > > > the exec
> > > > > IOCTL to match behavior.
> > > > > 
> > > > > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > > > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > > > Cc: Francois Dugast <francois.dugast at intel.com>
> > > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > > ---
> > > > >    drivers/gpu/drm/xe/xe_exec.c             |  58 ++++++++---
> > > > > -
> > > > >    drivers/gpu/drm/xe/xe_exec_queue.c       |   7 +-
> > > > >    drivers/gpu/drm/xe/xe_exec_queue_types.h |   2 -
> > > > >    drivers/gpu/drm/xe/xe_vm.c               | 110 ++++++++++-
> > > > > ------------
> > > > >    drivers/gpu/drm/xe/xe_vm_types.h         |  15 ++--
> > > > >    include/uapi/drm/xe_drm.h                |  56 +++++++----
> > > > > -
> > > > >    6 files changed, 129 insertions(+), 119 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > > > > b/drivers/gpu/drm/xe/xe_exec.c
> > > > > index 92b0da6580e8..c62cabfaa112 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_exec.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > > > > @@ -130,12 +130,15 @@ static int xe_exec_begin(struct
> > > > > drm_exec *exec, struct xe_vm *vm)
> > > > >         return err;
> > > > >    }
> > > > > +#define ALL_DRM_XE_SYNCS_FLAGS
> > > > > (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
> > > > > +
> > > > >    int xe_exec_ioctl(struct drm_device *dev, void *data,
> > > > > struct drm_file *file)
> > > > >    {
> > > > >         struct xe_device *xe = to_xe_device(dev);
> > > > >         struct xe_file *xef = to_xe_file(file);
> > > > >         struct drm_xe_exec *args = data;
> > > > > -       struct drm_xe_sync __user *syncs_user =
> > > > > u64_to_user_ptr(args->syncs);
> > > > > +       struct drm_xe_sync __user *syncs_user =
> > > > > +               u64_to_user_ptr(args->syncs.syncs);
> > > > >         u64 __user *addresses_user = u64_to_user_ptr(args-
> > > > > >address);
> > > > >         struct xe_exec_queue *q;
> > > > >         struct xe_sync_entry *syncs = NULL;
> > > > > @@ -143,15 +146,18 @@ int xe_exec_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >         struct drm_exec exec;
> > > > >         u32 i, num_syncs = 0;
> > > > >         struct xe_sched_job *job;
> > > > > -       struct dma_fence *rebind_fence;
> > > > > +       struct dma_fence *rebind_fence, *job_fence;
> > > > >         struct xe_vm *vm;
> > > > > -       bool write_locked;
> > > > > +       bool write_locked, skip_job_put = false;
> > > > > +       bool wait = args->syncs.flags &
> > > > > DRM_XE_SYNCS_FLAG_WAIT_FOR_OP;
> > > > >         ktime_t end = 0;
> > > > >         int err = 0;
> > > > >         if (XE_IOCTL_DBG(xe, args->extensions) ||
> > > > > -           XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] ||
> > > > > args->pad[2]) ||
> > > > > -           XE_IOCTL_DBG(xe, args->reserved[0] || args-
> > > > > >reserved[1]))
> > > > > +           XE_IOCTL_DBG(xe, args->pad || args->pad2[0] ||
> > > > > args->pad2[1] || args->pad2[2]) ||
> > > > > +           XE_IOCTL_DBG(xe, args->reserved[0] || args-
> > > > > >reserved[1]) ||
> > > > > +           XE_IOCTL_DBG(xe, args->syncs.flags &
> > > > > ~ALL_DRM_XE_SYNCS_FLAGS) ||
> > > > > +           XE_IOCTL_DBG(xe, wait && args->syncs.num_syncs))
> > > > >                 return -EINVAL;
> > > > >         q = xe_exec_queue_lookup(xef, args->exec_queue_id);
> > > > > @@ -170,8 +176,9 @@ int xe_exec_ioctl(struct drm_device *dev,
> > > > > void *data, struct drm_file *file)
> > > > >                 goto err_exec_queue;
> > > > >         }
> > > > > -       if (args->num_syncs) {
> > > > > -               syncs = kcalloc(args->num_syncs,
> > > > > sizeof(*syncs), GFP_KERNEL);
> > > > > +       if (args->syncs.num_syncs) {
> > > > > +               syncs = kcalloc(args->syncs.num_syncs,
> > > > > sizeof(*syncs),
> > > > > +                               GFP_KERNEL);
> > > > >                 if (!syncs) {
> > > > >                         err = -ENOMEM;
> > > > >                         goto err_exec_queue;
> > > > > @@ -180,7 +187,7 @@ int xe_exec_ioctl(struct drm_device *dev,
> > > > > void *data, struct drm_file *file)
> > > > >         vm = q->vm;
> > > > > -       for (i = 0; i < args->num_syncs; i++) {
> > > > > +       for (i = 0; i < args->syncs.num_syncs; i++) {
> > > > >                 err = xe_sync_entry_parse(xe, xef,
> > > > > &syncs[num_syncs++],
> > > > >                                           &syncs_user[i],
> > > > > SYNC_PARSE_FLAG_EXEC |
> > > > >                                          
> > > > > (xe_vm_in_lr_mode(vm) ?
> > > > > @@ -245,9 +252,17 @@ int xe_exec_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >                                 err = PTR_ERR(fence);
> > > > >                                 goto err_exec;
> > > > >                         }
> > > > > +
> > > > >                         for (i = 0; i < num_syncs; i++)
> > > > >                                 xe_sync_entry_signal(&syncs[i
> > > > > ], NULL, fence);
> > > > > +
> > > > >                         xe_exec_queue_last_fence_set(q, vm,
> > > > > fence);
> > > > > +                       if (wait) {
> > > > > +                               long timeout =
> > > > > dma_fence_wait(fence, true);
> > > > > +
> > > > > +                               if (timeout < 0)
> > > > > +                                       err = -EINTR;
> > > > > +                       }
> > > > Here it looks like we will rerun the same IOCTL again if we
> > > > return -EINTR.
> > > > The user-space expected action on -EINTR is to just restart the
> > > > IOCTL
> > > > without any argument changes. Solution is to add an ioctl
> > > > argument cookie
> > > > (or to skip sync vm binds and have the user just use the 0
> > > > batch buffers /
> > > > vm_binds calls or wait for an out-fence). If you go for the
> > > > cookie solution
> > > > then IMO we should keep the -ERESTARTSYS returned from
> > > > dma_fence_wait()
> > > > since it's converted to -EINTR on return-to-user-space, and the
> > > > kernel
> > > > restarts the IOCTL automatically if there was no requested-for-
> > > > delivery
> > > > signal pending.
> > > > 
> > > > I think the simplest solution at this point is to skip the sync
> > > > behaviour,
> > > > in particular if we enable the 0 batch / bind possibility.
> > > > 
> > > > If we still want to provide it, we could add a cookie address
> > > > as an
> > > > extension to the ioctl and activate sync if present? (Just
> > > > throwing up ideas
> > > > here).
> > > > 
> > > Hmm, forgot about this. A cookie is fairly easy, what about
> > > something like this:
> > > 
> > >   807 /**
> > >   808  * struct drm_xe_syncs - In / out syncs for IOCTLs.
> > >   809  */
> > >   810 struct drm_xe_syncs {
> > >   811         /** @num_syncs: amount of syncs to wait on */
> > >   812         __u32 num_syncs;
> > >   813
> > >   814         /*
> > >   815          * Block in IOCTL until operation complete,
> > > num_syncs MBZ if set.
> > >   816          */
> > >   817 #define DRM_XE_SYNCS_IN_FLAG_WAIT_FOR_OP (1 << 0)
> > >   818         /** @in_flags: Input Sync flags */
> > >   819         __u16 in_flags;
> > >   820
> > >   821         /*
> > >   822          * IOCTL operation has started (no need for user to
> > > resubmit on
> > >   823          * -ERESTARTSYS)
> > >   824          */
> > >   825 #define DRM_XE_SYNCS_OUT_FLAG_OP_COMMITTED (1 << 0)
> > >   826         /** @out_flags: Output Sync flags */
> > >   827         __u16 out_flags;
> > >   828
> > >   829         /** @syncs: pointer to struct drm_xe_sync array */
> > >   830         __u64 syncs;
> > >   831
> > >   832         /** @reserved: Reserved */
> > >   833         __u64 reserved[2];
> > >   834 };
> > > 
> > > DRM_XE_SYNCS_OUT_FLAG_OP_COMMITTED gets set in exec / bind IOCTL
> > > after
> > > the job is committed or in the of zero ops last-fence updated on
> > > the
> > > queue. Note that for binds we don't yet do 1 job per IOCTL but
> > > after
> > > landing some version of [1]
> > > 
> > > After DRM_XE_SYNCS_OUT_FLAG_OP_COMMITTED is set we return -
> > > ERESTARTSYS if
> > > the wait is interrupted and -EINTR is still
> > > DRM_XE_SYNCS_OUT_FLAG_OP_COMMITTED (interrupted before job is
> > > committed).
> > > 
> > > I'd rather go with patch as we have to change the uAPI here
> > > regardless
> > > so we might as well make this complete.
> > > 
> > > Matt
> > > 
> > > [1] https://patchwork.freedesktop.org/series/125608/
> > 
> > Yeah as we discussed in the meeting that means making the ioctl RW
> > instead
> > of W with some copying overhead.
> > 
> > I also think we should leave the EXEC ioctl out of this, meaning
> > just having
> > a single field in the VM_BIND ioctl. Basically the reason is that
> > waiting
> > like this after submission is a bit weird and does not align well
> > with how
> > -EINTR is typically used.
> > 
> 
> I kinda like uniform behavior between exec and binds with the
> behavior
> defined in a common sync structure.

Even so, I strongly think we should *not* in any way expose this for
exec. If needed the user can just wait for an out-fence and then we
don't need to implement code for this that will probably never get used
and with an implementation that very few will understand.

Furthermore the sync VM_BIND ioctl per the ASYNC VM_BIND doc doesn't
allow neither in-fences nor out fences, so grouping like this becomes a
bit overkill.

> 
> > So either a pointer to a cookie in the ioctl,
> > 
> 
> What about:
> 
> 119 > >   807 /**
> 120 > >   808  * struct drm_xe_syncs - In / out syncs for IOCTLs.
> 121 > >   809  */
> 122 > >   810 struct drm_xe_syncs {
> 123 > >   811         /** @num_syncs: amount of syncs to wait on */
> 124 > >   812         __u32 num_syncs;
> 125 > >   813
> 126 > >   814         /*
> 127 > >   815          * Block in IOCTL until operation complete,
> num_syncs MBZ if set.
> 128 > >   816          */
> 129 > >   817 #define DRM_XE_SYNCS_IN_FLAG_WAIT_FOR_OP (1 << 0)
> 130 > >   818         /** @flags: Sync flags */
> 131 > >   819         __u32 in_flags;
> 132 > >   820
> 138 > >   826         /** @cookie: userptr cookie written back with
> non-zero value once operation committed, only valid when IOCTL
> returns -EINTR */
> 139 > >   827         __u64 cookie;
> 140 > >   828
> 141 > >   829         /** @syncs: pointer to struct drm_xe_sync array
> */
> 142 > >   830         __u64 syncs;
> 143 > >   831
> 144 > >   832         /** @reserved: Reserved */
> 145 > >   833         __u64 reserved[2];
> 146 > >   834 };
> 
> Also if cookie is 0, we wait uninterruptable once the op is
> committed?

I'm afraid I don't follow. The *interruptible* wait after commit is
what trigger the need for a cookie in the first place? Also here,
@cookie is still read-only for the kernel since the struct drm_xe_syncs
is embedded in the ioctl. Also I think any cookie should be opaque to
the user, other than that it must must be 0 if not calling after an -
ERESTART.

> 
> > or perhaps dig up again the idea we had of mostly waiting before
> > the
> > submission:
> > 
> > 1) Pull out the last_op fence for the queue from under the relevant
> > lock.
> > 2) Wait for all dependencies without any locks.
> > 3) Lock, and (optionally) if the last_op fence changed, wait for
> > it.
> > 4) Submit
> > 5) Wait for completion uninterruptible.
> > 
> 
> We can always change the internal implementation to something like
> this
> after [1]. That series makes refactors like this quite a bit easier.

Well the idea of the above 1) - 5) was that we wouldn't be needing any
cookie at all, since the wait in 5) would be short, and we therefore
could get away with implementing it uninterruptible. If that turned out
to be bad, We add the cookie as an extension. Initial implementation
can even use uninterruptible waits for simplicity.

To summarize:

* I strongly don't think we should support sync exec calls.
* No in-syncs or out-syncs if SYNC.
* A flag to trigger sync binds. Syncs could be in a separate struct,
but not really needed if we don't support sync execs.
* If we go for the interruptible wait, we need a writable cookie that
is not embedded in the main struct.


/Thomas



> 
> Matt
> 
> [1] https://patchwork.freedesktop.org/series/125608/ 
> 
> > I actually like this last one best, but we'd recommend UMD to uses
> > out-fences whenever possible.
> > 
> > Thoughts?
> > 
> > > 
> > > > >                         dma_fence_put(fence);
> > > > >                 }
> > > > > @@ -331,42 +346,51 @@ int xe_exec_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >          * the job and let the DRM scheduler / backend clean
> > > > > up the job.
> > > > >          */
> > > > >         xe_sched_job_arm(job);
> > > > > +       job_fence = &job->drm.s_fence->finished;
> > > > > +       if (wait)
> > > > > +               dma_fence_get(job_fence);
> > > > >         if (!xe_vm_in_lr_mode(vm)) {
> > > > >                 /* Block userptr invalidations / BO eviction
> > > > > */
> > > > > -               dma_resv_add_fence(&vm->resv,
> > > > > -                                  &job->drm.s_fence-
> > > > > >finished,
> > > > > +               dma_resv_add_fence(&vm->resv, job_fence,
> > > > >                                    DMA_RESV_USAGE_BOOKKEEP);
> > > > >                 /*
> > > > >                  * Make implicit sync work across drivers,
> > > > > assuming all external
> > > > >                  * BOs are written as we don't pass in a read
> > > > > / write list.
> > > > >                  */
> > > > > -               xe_vm_fence_all_extobjs(vm, &job-
> > > > > >drm.s_fence->finished,
> > > > > -
> > > > >                                        DMA_RESV_USAGE_WRITE);
> > > > > +               xe_vm_fence_all_extobjs(vm, job_fence,
> > > > > DMA_RESV_USAGE_WRITE);
> > > > >         }
> > > > >         for (i = 0; i < num_syncs; i++)
> > > > > -               xe_sync_entry_signal(&syncs[i], job,
> > > > > -                                    &job->drm.s_fence-
> > > > > >finished);
> > > > > +               xe_sync_entry_signal(&syncs[i], job,
> > > > > job_fence);
> > > > >         if (xe_exec_queue_is_lr(q))
> > > > >                 q->ring_ops->emit_job(job);
> > > > >         if (!xe_vm_in_lr_mode(vm))
> > > > > -               xe_exec_queue_last_fence_set(q, vm, &job-
> > > > > >drm.s_fence->finished);
> > > > > +               xe_exec_queue_last_fence_set(q, vm,
> > > > > job_fence);
> > > > >         xe_sched_job_push(job);
> > > > >         xe_vm_reactivate_rebind(vm);
> > > > > -       if (!err && !xe_vm_in_lr_mode(vm)) {
> > > > > +       if (!xe_vm_in_lr_mode(vm)) {
> > > > >                 spin_lock(&xe->ttm.lru_lock);
> > > > >                 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
> > > > >                 spin_unlock(&xe->ttm.lru_lock);
> > > > >         }
> > > > > +       skip_job_put = true;
> > > > > +       if (wait) {
> > > > > +               long timeout = dma_fence_wait(job_fence,
> > > > > true);
> > > > > +
> > > > > +               dma_fence_put(job_fence);
> > > > > +               if (timeout < 0)
> > > > > +                       err = -EINTR;
> > > > > +       }
> > > > > +
> > > > >    err_repin:
> > > > >         if (!xe_vm_in_lr_mode(vm))
> > > > >                 up_read(&vm->userptr.notifier_lock);
> > > > >    err_put_job:
> > > > > -       if (err)
> > > > > +       if (err && !skip_job_put)
> > > > >                 xe_sched_job_put(job);
> > > > >    err_exec:
> > > > >         drm_exec_fini(&exec);
> > > > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > > b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > > index 3911d14522ee..98776d02d634 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > > > @@ -625,10 +625,7 @@ int xe_exec_queue_create_ioctl(struct
> > > > > drm_device *dev, void *data,
> > > > >         if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe-
> > > > > >info.gt_count))
> > > > >                 return -EINVAL;
> > > > > -       if (eci[0].engine_class >=
> > > > > DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
> > > > > -               bool sync = eci[0].engine_class ==
> > > > > -                       DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
> > > > > -
> > > > > +       if (eci[0].engine_class ==
> > > > > DRM_XE_ENGINE_CLASS_VM_BIND) {
> > > > >                 for_each_gt(gt, xe, id) {
> > > > >                         struct xe_exec_queue *new;
> > > > > @@ -654,8 +651,6 @@ int xe_exec_queue_create_ioctl(struct
> > > > > drm_device *dev, void *data,
> > > > >                                                    args-
> > > > > >width, hwe,
> > > > >                                                   
> > > > > EXEC_QUEUE_FLAG_PERSISTENT |
> > > > >                                                   
> > > > > EXEC_QUEUE_FLAG_VM |
> > > > > -                                                  (sync ? 0
> > > > > :
> > > > > -                                                  
> > > > > EXEC_QUEUE_FLAG_VM_ASYNC) |
> > > > >                                                    (id ?
> > > > >                                                    
> > > > > EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
> > > > >                                                     0));
> > > > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > > b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > > index 52f0927d0d9b..c78f6e8b41c4 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > > > @@ -74,8 +74,6 @@ struct xe_exec_queue {
> > > > >    #define EXEC_QUEUE_FLAG_VM                   BIT(4)
> > > > >    /* child of VM queue for multi-tile VM jobs */
> > > > >    #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD    BIT(5)
> > > > > -/* VM jobs for this queue are asynchronous */
> > > > > -#define EXEC_QUEUE_FLAG_VM_ASYNC               BIT(6)
> > > > >         /**
> > > > >          * @flags: flags for this exec queue, should
> > > > > statically setup aside from ban
> > > > > diff --git a/drivers/gpu/drm/xe/xe_vm.c
> > > > > b/drivers/gpu/drm/xe/xe_vm.c
> > > > > index cf2eb44a71db..4b0c976c003a 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > > > @@ -1433,9 +1433,7 @@ struct xe_vm *xe_vm_create(struct
> > > > > xe_device *xe, u32 flags)
> > > > >                         struct xe_gt *gt = tile->primary_gt;
> > > > >                         struct xe_vm *migrate_vm;
> > > > >                         struct xe_exec_queue *q;
> > > > > -                       u32 create_flags = EXEC_QUEUE_FLAG_VM
> > > > > |
> > > > > -                               ((flags &
> > > > > XE_VM_FLAG_ASYNC_DEFAULT) ?
> > > > > -                               EXEC_QUEUE_FLAG_VM_ASYNC :
> > > > > 0);
> > > > > +                       u32 create_flags =
> > > > > EXEC_QUEUE_FLAG_VM;
> > > > >                         if (!vm->pt_root[id])
> > > > >                                 continue;
> > > > > @@ -1835,16 +1833,10 @@ xe_vm_bind_vma(struct xe_vma *vma,
> > > > > struct xe_exec_queue *q,
> > > > >         return ERR_PTR(err);
> > > > >    }
> > > > > -static bool xe_vm_sync_mode(struct xe_vm *vm, struct
> > > > > xe_exec_queue *q)
> > > > > -{
> > > > > -       return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) :
> > > > > -               !(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
> > > > > -}
> > > > > -
> > > > >    static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > > > > *vma,
> > > > >                         struct xe_exec_queue *q, struct
> > > > > xe_sync_entry *syncs,
> > > > >                         u32 num_syncs, bool immediate, bool
> > > > > first_op,
> > > > > -                       bool last_op)
> > > > > +                       bool last_op, bool async)
> > > > >    {
> > > > >         struct dma_fence *fence;
> > > > >         struct xe_exec_queue *wait_exec_queue =
> > > > > to_wait_exec_queue(vm, q);
> > > > > @@ -1870,7 +1862,7 @@ static int __xe_vm_bind(struct xe_vm
> > > > > *vm, struct xe_vma *vma,
> > > > >         if (last_op)
> > > > >                 xe_exec_queue_last_fence_set(wait_exec_queue,
> > > > > vm, fence);
> > > > > -       if (last_op && xe_vm_sync_mode(vm, q))
> > > > > +       if (last_op && !async)
> > > > >                 dma_fence_wait(fence, true);
> > > > >         dma_fence_put(fence);
> > > > > @@ -1880,7 +1872,7 @@ static int __xe_vm_bind(struct xe_vm
> > > > > *vm, struct xe_vma *vma,
> > > > >    static int xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > > > > *vma, struct xe_exec_queue *q,
> > > > >                       struct xe_bo *bo, struct xe_sync_entry
> > > > > *syncs,
> > > > >                       u32 num_syncs, bool immediate, bool
> > > > > first_op,
> > > > > -                     bool last_op)
> > > > > +                     bool last_op, bool async)
> > > > >    {
> > > > >         int err;
> > > > > @@ -1894,12 +1886,12 @@ static int xe_vm_bind(struct xe_vm
> > > > > *vm, struct xe_vma *vma, struct xe_exec_queue
> > > > >         }
> > > > >         return __xe_vm_bind(vm, vma, q, syncs, num_syncs,
> > > > > immediate, first_op,
> > > > > -                           last_op);
> > > > > +                           last_op, async);
> > > > >    }
> > > > >    static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma
> > > > > *vma,
> > > > >                         struct xe_exec_queue *q, struct
> > > > > xe_sync_entry *syncs,
> > > > > -                       u32 num_syncs, bool first_op, bool
> > > > > last_op)
> > > > > +                       u32 num_syncs, bool first_op, bool
> > > > > last_op, bool async)
> > > > >    {
> > > > >         struct dma_fence *fence;
> > > > >         struct xe_exec_queue *wait_exec_queue =
> > > > > to_wait_exec_queue(vm, q);
> > > > > @@ -1914,7 +1906,7 @@ static int xe_vm_unbind(struct xe_vm
> > > > > *vm, struct xe_vma *vma,
> > > > >         xe_vma_destroy(vma, fence);
> > > > >         if (last_op)
> > > > >                 xe_exec_queue_last_fence_set(wait_exec_queue,
> > > > > vm, fence);
> > > > > -       if (last_op && xe_vm_sync_mode(vm, q))
> > > > > +       if (last_op && !async)
> > > > >                 dma_fence_wait(fence, true);
> > > > It looks like we're dropping the error return code here.
> > > > 
> > > > 
> > > > >         dma_fence_put(fence);
> > > > > @@ -1923,7 +1915,6 @@ static int xe_vm_unbind(struct xe_vm
> > > > > *vm, struct xe_vma *vma,
> > > > >    #define ALL_DRM_XE_VM_CREATE_FLAGS
> > > > > (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
> > > > >                                    
> > > > > DRM_XE_VM_CREATE_FLAG_LR_MODE | \
> > > > > -                                  
> > > > > DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
> > > > >                                    
> > > > > DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
> > > > >    int xe_vm_create_ioctl(struct drm_device *dev, void *data,
> > > > > @@ -1977,8 +1968,6 @@ int xe_vm_create_ioctl(struct
> > > > > drm_device *dev, void *data,
> > > > >                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
> > > > >         if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
> > > > >                 flags |= XE_VM_FLAG_LR_MODE;
> > > > > -       if (args->flags &
> > > > > DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
> > > > > -               flags |= XE_VM_FLAG_ASYNC_DEFAULT;
> > > > >         if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
> > > > >                 flags |= XE_VM_FLAG_FAULT_MODE;
> > > > > @@ -2062,7 +2051,7 @@ static const u32 region_to_mem_type[] =
> > > > > {
> > > > >    static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma
> > > > > *vma,
> > > > >                           struct xe_exec_queue *q, u32
> > > > > region,
> > > > >                           struct xe_sync_entry *syncs, u32
> > > > > num_syncs,
> > > > > -                         bool first_op, bool last_op)
> > > > > +                         bool first_op, bool last_op, bool
> > > > > async)
> > > > >    {
> > > > >         struct xe_exec_queue *wait_exec_queue =
> > > > > to_wait_exec_queue(vm, q);
> > > > >         int err;
> > > > > @@ -2077,7 +2066,7 @@ static int xe_vm_prefetch(struct xe_vm
> > > > > *vm, struct xe_vma *vma,
> > > > >         if (vma->tile_mask != (vma->tile_present & ~vma-
> > > > > >usm.tile_invalidated)) {
> > > > >                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma),
> > > > > syncs, num_syncs,
> > > > > -                                 true, first_op, last_op);
> > > > > +                                 true, first_op, last_op,
> > > > > async);
> > > > >         } else {
> > > > >                 int i;
> > > > > @@ -2400,6 +2389,8 @@ static int
> > > > > vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct
> > > > > xe_exec_queue *q,
> > > > >                 }
> > > > >                 op->q = q;
> > > > > +               if (async)
> > > > > +                       op->flags |= XE_VMA_OP_ASYNC;
> > > > >                 switch (op->base.op) {
> > > > >                 case DRM_GPUVA_OP_MAP:
> > > > > @@ -2538,7 +2529,8 @@ static int op_execute(struct drm_exec
> > > > > *exec, struct xe_vm *vm,
> > > > >                                  op->syncs, op->num_syncs,
> > > > >                                  op->map.immediate ||
> > > > > !xe_vm_in_fault_mode(vm),
> > > > >                                  op->flags & XE_VMA_OP_FIRST,
> > > > > -                                op->flags & XE_VMA_OP_LAST);
> > > > > +                                op->flags & XE_VMA_OP_LAST,
> > > > > +                                op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                 break;
> > > > >         case DRM_GPUVA_OP_REMAP:
> > > > >         {
> > > > > @@ -2552,7 +2544,8 @@ static int op_execute(struct drm_exec
> > > > > *exec, struct xe_vm *vm,
> > > > >                                            op->num_syncs,
> > > > >                                            op->flags &
> > > > > XE_VMA_OP_FIRST,
> > > > >                                            op->flags &
> > > > > XE_VMA_OP_LAST &&
> > > > > -                                          !prev && !next);
> > > > > +                                          !prev && !next,
> > > > > +                                          op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                         if (err)
> > > > >                                 break;
> > > > >                         op->remap.unmap_done = true;
> > > > > @@ -2563,7 +2556,8 @@ static int op_execute(struct drm_exec
> > > > > *exec, struct xe_vm *vm,
> > > > >                         err = xe_vm_bind(vm, op->remap.prev,
> > > > > op->q,
> > > > >                                          xe_vma_bo(op-
> > > > > >remap.prev), op->syncs,
> > > > >                                          op->num_syncs, true,
> > > > > false,
> > > > > -                                        op->flags &
> > > > > XE_VMA_OP_LAST && !next);
> > > > > +                                        op->flags &
> > > > > XE_VMA_OP_LAST && !next,
> > > > > +                                        op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                         op->remap.prev->gpuva.flags &=
> > > > > ~XE_VMA_LAST_REBIND;
> > > > >                         if (err)
> > > > >                                 break;
> > > > > @@ -2576,7 +2570,8 @@ static int op_execute(struct drm_exec
> > > > > *exec, struct xe_vm *vm,
> > > > >                                          xe_vma_bo(op-
> > > > > >remap.next),
> > > > >                                          op->syncs, op-
> > > > > >num_syncs,
> > > > >                                          true, false,
> > > > > -                                        op->flags &
> > > > > XE_VMA_OP_LAST);
> > > > > +                                        op->flags &
> > > > > XE_VMA_OP_LAST,
> > > > > +                                        op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                         op->remap.next->gpuva.flags &=
> > > > > ~XE_VMA_LAST_REBIND;
> > > > >                         if (err)
> > > > >                                 break;
> > > > > @@ -2588,13 +2583,15 @@ static int op_execute(struct drm_exec
> > > > > *exec, struct xe_vm *vm,
> > > > >         case DRM_GPUVA_OP_UNMAP:
> > > > >                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > > > >                                    op->num_syncs, op->flags &
> > > > > XE_VMA_OP_FIRST,
> > > > > -                                  op->flags &
> > > > > XE_VMA_OP_LAST);
> > > > > +                                  op->flags &
> > > > > XE_VMA_OP_LAST,
> > > > > +                                  op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                 break;
> > > > >         case DRM_GPUVA_OP_PREFETCH:
> > > > >                 err = xe_vm_prefetch(vm, vma, op->q, op-
> > > > > >prefetch.region,
> > > > >                                      op->syncs, op-
> > > > > >num_syncs,
> > > > >                                      op->flags &
> > > > > XE_VMA_OP_FIRST,
> > > > > -                                    op->flags &
> > > > > XE_VMA_OP_LAST);
> > > > > +                                    op->flags &
> > > > > XE_VMA_OP_LAST,
> > > > > +                                    op->flags &
> > > > > XE_VMA_OP_ASYNC);
> > > > >                 break;
> > > > >         default:
> > > > >                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > > > > @@ -2808,16 +2805,16 @@ static int
> > > > > vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > > > >    #ifdef TEST_VM_ASYNC_OPS_ERROR
> > > > >    #define SUPPORTED_FLAGS      \
> > > > > -       (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \
> > > > > -        DRM_XE_VM_BIND_FLAG_READONLY |
> > > > > DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
> > > > > -        DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
> > > > > +       (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY
> > > > > | \
> > > > > +        DRM_XE_VM_BIND_FLAG_IMMEDIATE |
> > > > > DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
> > > > >    #else
> > > > >    #define SUPPORTED_FLAGS      \
> > > > > -       (DRM_XE_VM_BIND_FLAG_ASYNC |
> > > > > DRM_XE_VM_BIND_FLAG_READONLY | \
> > > > > +       (DRM_XE_VM_BIND_FLAG_READONLY | \
> > > > >          DRM_XE_VM_BIND_FLAG_IMMEDIATE |
> > > > > DRM_XE_VM_BIND_FLAG_NULL | \
> > > > >          0xffff)
> > > > >    #endif
> > > > >    #define XE_64K_PAGE_MASK 0xffffull
> > > > > +#define ALL_DRM_XE_SYNCS_FLAGS
> > > > > (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
> > > > >    #define MAX_BINDS    512     /* FIXME: Picking random
> > > > > upper limit */
> > > > > @@ -2829,7 +2826,7 @@ static int
> > > > > vm_bind_ioctl_check_args(struct xe_device *xe,
> > > > >         int err;
> > > > >         int i;
> > > > > -       if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
> > > > > +       if (XE_IOCTL_DBG(xe, args->pad) ||
> > > > >             XE_IOCTL_DBG(xe, args->reserved[0] || args-
> > > > > >reserved[1]))
> > > > >                 return -EINVAL;
> > > > > @@ -2857,6 +2854,14 @@ static int
> > > > > vm_bind_ioctl_check_args(struct xe_device *xe,
> > > > >                 *bind_ops = &args->bind;
> > > > >         }
> > > > > +       *async = !(args->syncs.flags &
> > > > > DRM_XE_SYNCS_FLAG_WAIT_FOR_OP);
> > > > > +
> > > > > +       if (XE_IOCTL_DBG(xe, args->syncs.flags &
> > > > > ~ALL_DRM_XE_SYNCS_FLAGS) ||
> > > > > +           XE_IOCTL_DBG(xe, !*async && args-
> > > > > >syncs.num_syncs)) {
> > > > > +               err = -EINVAL;
> > > > > +               goto free_bind_ops;
> > > > > +       }
> > > > > +
> > > > >         for (i = 0; i < args->num_binds; ++i) {
> > > > >                 u64 range = (*bind_ops)[i].range;
> > > > >                 u64 addr = (*bind_ops)[i].addr;
> > > > > @@ -2887,18 +2892,6 @@ static int
> > > > > vm_bind_ioctl_check_args(struct xe_device *xe,
> > > > >                         goto free_bind_ops;
> > > > >                 }
> > > > > -               if (i == 0) {
> > > > > -                       *async = !!(flags &
> > > > > DRM_XE_VM_BIND_FLAG_ASYNC);
> > > > > -                       if (XE_IOCTL_DBG(xe, !*async && args-
> > > > > >num_syncs)) {
> > > > > -                               err = -EINVAL;
> > > > > -                               goto free_bind_ops;
> > > > > -                       }
> > > > > -               } else if (XE_IOCTL_DBG(xe, *async !=
> > > > > -                                       !!(flags &
> > > > > DRM_XE_VM_BIND_FLAG_ASYNC))) {
> > > > > -                       err = -EINVAL;
> > > > > -                       goto free_bind_ops;
> > > > > -               }
> > > > > -
> > > > >                 if (XE_IOCTL_DBG(xe, op >
> > > > > DRM_XE_VM_BIND_OP_PREFETCH) ||
> > > > >                     XE_IOCTL_DBG(xe, flags &
> > > > > ~SUPPORTED_FLAGS) ||
> > > > >                     XE_IOCTL_DBG(xe, obj && is_null) ||
> > > > > @@ -2951,7 +2944,7 @@ static int
> > > > > vm_bind_ioctl_check_args(struct xe_device *xe,
> > > > >    static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
> > > > >                                        struct xe_exec_queue
> > > > > *q,
> > > > >                                        struct xe_sync_entry
> > > > > *syncs,
> > > > > -                                      int num_syncs)
> > > > > +                                      int num_syncs, bool
> > > > > async)
> > > > >    {
> > > > >         struct dma_fence *fence;
> > > > >         int i, err = 0;
> > > > > @@ -2967,7 +2960,7 @@ static int
> > > > > vm_bind_ioctl_signal_fences(struct xe_vm *vm,
> > > > >         xe_exec_queue_last_fence_set(to_wait_exec_queue(vm,
> > > > > q), vm,
> > > > >                                      fence);
> > > > > -       if (xe_vm_sync_mode(vm, q)) {
> > > > > +       if (!async) {
> > > > >                 long timeout = dma_fence_wait(fence, true);
> > > > >                 if (timeout < 0)
> > > > > @@ -3001,7 +2994,7 @@ int xe_vm_bind_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >         if (err)
> > > > >                 return err;
> > > > > -       if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
> > > > > +       if (XE_IOCTL_DBG(xe, args->pad) ||
> > > > >             XE_IOCTL_DBG(xe, args->reserved[0] || args-
> > > > > >reserved[1]))
> > > > >                 return -EINVAL;
> > > > > @@ -3016,12 +3009,6 @@ int xe_vm_bind_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >                         err = -EINVAL;
> > > > >                         goto put_exec_queue;
> > > > >                 }
> > > > > -
> > > > > -               if (XE_IOCTL_DBG(xe, args->num_binds && async
> > > > > !=
> > > > > -                                !!(q->flags &
> > > > > EXEC_QUEUE_FLAG_VM_ASYNC))) {
> > > > > -                       err = -EINVAL;
> > > > > -                       goto put_exec_queue;
> > > > > -               }
> > > > >         }
> > > > >         vm = xe_vm_lookup(xef, args->vm_id);
> > > > > @@ -3030,14 +3017,6 @@ int xe_vm_bind_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >                 goto put_exec_queue;
> > > > >         }
> > > > > -       if (!args->exec_queue_id) {
> > > > > -               if (XE_IOCTL_DBG(xe, args->num_binds && async
> > > > > !=
> > > > > -                                !!(vm->flags &
> > > > > XE_VM_FLAG_ASYNC_DEFAULT))) {
> > > > > -                       err = -EINVAL;
> > > > > -                       goto put_vm;
> > > > > -               }
> > > > > -       }
> > > > > -
> > > > >         err = down_write_killable(&vm->lock);
> > > > >         if (err)
> > > > >                 goto put_vm;
> > > > > @@ -3127,16 +3106,16 @@ int xe_vm_bind_ioctl(struct
> > > > > drm_device *dev, void *data, struct drm_file *file)
> > > > >                 }
> > > > >         }
> > > > > -       if (args->num_syncs) {
> > > > > -               syncs = kcalloc(args->num_syncs,
> > > > > sizeof(*syncs), GFP_KERNEL);
> > > > > +       if (args->syncs.num_syncs) {
> > > > > +               syncs = kcalloc(args->syncs.num_syncs,
> > > > > sizeof(*syncs), GFP_KERNEL);
> > > > >                 if (!syncs) {
> > > > >                         err = -ENOMEM;
> > > > >                         goto put_obj;
> > > > >                 }
> > > > >         }
> > > > > -       syncs_user = u64_to_user_ptr(args->syncs);
> > > > > -       for (num_syncs = 0; num_syncs < args->num_syncs;
> > > > > num_syncs++) {
> > > > > +       syncs_user = u64_to_user_ptr(args->syncs.syncs);
> > > > > +       for (num_syncs = 0; num_syncs < args-
> > > > > >syncs.num_syncs; num_syncs++) {
> > > > >                 err = xe_sync_entry_parse(xe, xef,
> > > > > &syncs[num_syncs],
> > > > >                                          
> > > > > &syncs_user[num_syncs],
> > > > >                                          
> > > > > (xe_vm_in_lr_mode(vm) ?
> > > > > @@ -3210,7 +3189,8 @@ int xe_vm_bind_ioctl(struct drm_device
> > > > > *dev, void *data, struct drm_file *file)
> > > > >         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > > > >    free_syncs:
> > > > >         if (err == -ENODATA)
> > > > > -               err = vm_bind_ioctl_signal_fences(vm, q,
> > > > > syncs, num_syncs);
> > > > > +               err = vm_bind_ioctl_signal_fences(vm, q,
> > > > > syncs, num_syncs,
> > > > > +                                                 async);
> > > > >         while (num_syncs--)
> > > > >                 xe_sync_entry_cleanup(&syncs[num_syncs]);
> > > > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > index 23abdfd8622f..ce8b9bde7e9c 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > @@ -167,13 +167,12 @@ struct xe_vm {
> > > > >          */
> > > > >    #define XE_VM_FLAG_64K                       BIT(0)
> > > > >    #define XE_VM_FLAG_LR_MODE           BIT(1)
> > > > > -#define XE_VM_FLAG_ASYNC_DEFAULT       BIT(2)
> > > > > -#define XE_VM_FLAG_MIGRATION           BIT(3)
> > > > > -#define XE_VM_FLAG_SCRATCH_PAGE                BIT(4)
> > > > > -#define XE_VM_FLAG_FAULT_MODE          BIT(5)
> > > > > -#define XE_VM_FLAG_BANNED              BIT(6)
> > > > > -#define XE_VM_FLAG_TILE_ID(flags)      FIELD_GET(GENMASK(8,
> > > > > 7), flags)
> > > > > -#define XE_VM_FLAG_SET_TILE_ID(tile)   FIELD_PREP(GENMASK(8,
> > > > > 7), (tile)->id)
> > > > > +#define XE_VM_FLAG_MIGRATION           BIT(2)
> > > > > +#define XE_VM_FLAG_SCRATCH_PAGE                BIT(3)
> > > > > +#define XE_VM_FLAG_FAULT_MODE          BIT(4)
> > > > > +#define XE_VM_FLAG_BANNED              BIT(5)
> > > > > +#define XE_VM_FLAG_TILE_ID(flags)      FIELD_GET(GENMASK(7,
> > > > > 6), flags)
> > > > > +#define XE_VM_FLAG_SET_TILE_ID(tile)   FIELD_PREP(GENMASK(7,
> > > > > 6), (tile)->id)
> > > > >         unsigned long flags;
> > > > >         /** @composite_fence_ctx: context composite fence */
> > > > > @@ -385,6 +384,8 @@ enum xe_vma_op_flags {
> > > > >         XE_VMA_OP_PREV_COMMITTED        = BIT(3),
> > > > >         /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation
> > > > > committed */
> > > > >         XE_VMA_OP_NEXT_COMMITTED        = BIT(4),
> > > > > +       /** @XE_VMA_OP_ASYNC: operation is async */
> > > > > +       XE_VMA_OP_ASYNC                 = BIT(5),
> > > > >    };
> > > > >    /** struct xe_vma_op - VMA operation */
> > > > > diff --git a/include/uapi/drm/xe_drm.h
> > > > > b/include/uapi/drm/xe_drm.h
> > > > > index eb03a49c17a1..fd8172fe2d9a 100644
> > > > > --- a/include/uapi/drm/xe_drm.h
> > > > > +++ b/include/uapi/drm/xe_drm.h
> > > > > @@ -141,8 +141,7 @@ struct drm_xe_engine_class_instance {
> > > > >          * Kernel only classes (not actual hardware engine
> > > > > class). Used for
> > > > >          * creating ordered queues of VM bind operations.
> > > > >          */
> > > > > -#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC      5
> > > > > -#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC       6
> > > > > +#define DRM_XE_ENGINE_CLASS_VM_BIND            5
> > > > >         __u16 engine_class;
> > > > >         __u16 engine_instance;
> > > > > @@ -660,7 +659,6 @@ struct drm_xe_vm_create {
> > > > >          * still enable recoverable pagefaults if supported
> > > > > by the device.
> > > > >          */
> > > > >    #define DRM_XE_VM_CREATE_FLAG_LR_MODE                (1 <<
> > > > > 1)
> > > > > -#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT    (1 << 2)
> > > > >         /*
> > > > >          * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
> > > > >          * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to
> > > > > be allocated
> > > > > @@ -668,7 +666,7 @@ struct drm_xe_vm_create {
> > > > >          * The xe driver internally uses recoverable
> > > > > pagefaults to implement
> > > > >          * this.
> > > > >          */
> > > > > -#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE       (1 << 3)
> > > > > +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE       (1 << 2)
> > > > >         /** @flags: Flags */
> > > > >         __u32 flags;
> > > > > @@ -776,12 +774,11 @@ struct drm_xe_vm_bind_op {
> > > > >         __u32 op;
> > > > >    #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
> > > > > -#define DRM_XE_VM_BIND_FLAG_ASYNC      (1 << 1)
> > > > >         /*
> > > > >          * Valid on a faulting VM only, do the MAP operation
> > > > > immediately rather
> > > > >          * than deferring the MAP to the page fault handler.
> > > > >          */
> > > > > -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE  (1 << 2)
> > > > > +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE  (1 << 1)
> > > > >         /*
> > > > >          * When the NULL flag is set, the page tables are
> > > > > setup with a special
> > > > >          * bit which indicates writes are dropped and all
> > > > > reads return zero.  In
> > > > > @@ -789,7 +786,7 @@ struct drm_xe_vm_bind_op {
> > > > >          * operations, the BO handle MBZ, and the BO offset
> > > > > MBZ. This flag is
> > > > >          * intended to implement VK sparse bindings.
> > > > >          */
> > > > > -#define DRM_XE_VM_BIND_FLAG_NULL       (1 << 3)
> > > > > +#define DRM_XE_VM_BIND_FLAG_NULL       (1 << 2)
> > > > >         /** @flags: Bind flags */
> > > > >         __u32 flags;
> > > > > @@ -807,6 +804,27 @@ struct drm_xe_vm_bind_op {
> > > > >         __u64 reserved[3];
> > > > >    };
> > > > > +/**
> > > > > + * struct drm_xe_syncs - In / out syncs for IOCTLs.
> > > > > + */
> > > > > +struct drm_xe_syncs {
> > > > > +       /** @num_syncs: amount of syncs to wait on */
> > > > > +       __u32 num_syncs;
> > > > > +
> > > > > +       /*
> > > > > +        * Block in IOCTL until operation complete, num_syncs
> > > > > MBZ if set.
> > > > > +        */
> > > > > +#define DRM_XE_SYNCS_FLAG_WAIT_FOR_OP (1 << 0)
> > > > > +       /** @flags: Sync flags */
> > > > > +       __u32 flags;
> > > > > +
> > > > > +       /** @syncs: pointer to struct drm_xe_sync array */
> > > > > +       __u64 syncs;
> > > > > +
> > > > > +       /** @reserved: Reserved */
> > > > > +       __u64 reserved[2];
> > > > > +};
> > > > > +
> > > > >    struct drm_xe_vm_bind {
> > > > >         /** @extensions: Pointer to the first extension
> > > > > struct, if any */
> > > > >         __u64 extensions;
> > > > > @@ -838,14 +856,8 @@ struct drm_xe_vm_bind {
> > > > >                 __u64 vector_of_binds;
> > > > >         };
> > > > > -       /** @pad: MBZ */
> > > > > -       __u32 pad2;
> > > > > -
> > > > > -       /** @num_syncs: amount of syncs to wait on */
> > > > > -       __u32 num_syncs;
> > > > > -
> > > > > -       /** @syncs: pointer to struct drm_xe_sync array */
> > > > > -       __u64 syncs;
> > > > > +       /** @syncs: syncs for bind */
> > > > > +       struct drm_xe_syncs syncs;
> > > > >         /** @reserved: Reserved */
> > > > >         __u64 reserved[2];
> > > > > @@ -974,14 +986,14 @@ struct drm_xe_exec {
> > > > >         /** @extensions: Pointer to the first extension
> > > > > struct, if any */
> > > > >         __u64 extensions;
> > > > > +       /** @pad: MBZ */
> > > > > +       __u32 pad;
> > > > > +
> > > > >         /** @exec_queue_id: Exec queue ID for the batch
> > > > > buffer */
> > > > >         __u32 exec_queue_id;
> > > > > -       /** @num_syncs: Amount of struct drm_xe_sync in
> > > > > array. */
> > > > > -       __u32 num_syncs;
> > > > > -
> > > > > -       /** @syncs: Pointer to struct drm_xe_sync array. */
> > > > > -       __u64 syncs;
> > > > > +       /** @syncs: syncs for exec */
> > > > > +       struct drm_xe_syncs syncs;
> > > > >         /**
> > > > >          * @address: address of batch buffer if
> > > > > num_batch_buffer == 1 or an
> > > > > @@ -995,8 +1007,8 @@ struct drm_xe_exec {
> > > > >          */
> > > > >         __u16 num_batch_buffer;
> > > > > -       /** @pad: MBZ */
> > > > > -       __u16 pad[3];
> > > > > +       /** @pad2: MBZ */
> > > > > +       __u16 pad2[3];
> > > > >         /** @reserved: Reserved */
> > > > >         __u64 reserved[2];



More information about the Intel-xe mailing list