[Intel-xe] [PATCH v2 02/27] drm/xe: Allow num_batch_buffer == 0 in exec IOCTL

Fri Nov 10 09:03:37 UTC 2023

On Fri, Nov 10, 2023 at 12:11:19PM +0100, Thomas Hellström wrote:
> On Mon, 2023-11-06 at 21:25 -0800, Matthew Brost wrote:
> > The idea being out-syncs can signal indicating all previous
> > operations
> > on the exec queue are complete. An example use case of this would be
> > support for implementing vkQueueWaitForIdle easily.
> > 
> > v2: Don't add last_fence for VM's that do not support dma fences
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_exec.c             | 22 +++++++++++++++++++---
> >  drivers/gpu/drm/xe/xe_exec_queue.c       |  5 ++++-
> >  drivers/gpu/drm/xe/xe_exec_queue_types.h |  5 +++--
> >  drivers/gpu/drm/xe/xe_sync.c             |  5 ++++-
> >  drivers/gpu/drm/xe/xe_sync.h             |  2 +-
> >  drivers/gpu/drm/xe/xe_vm.c               |  2 +-
> >  6 files changed, 32 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > b/drivers/gpu/drm/xe/xe_exec.c
> > index 28e84a0bbeb0..4666f5b145f7 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -161,7 +161,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >         if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
> >                 return -EINVAL;
> >  
> > -       if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> > +       if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
> > +                        q->width != args->num_batch_buffer))
> >                 return -EINVAL;
> >  
> >         if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> > @@ -182,12 +183,13 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >         for (i = 0; i < args->num_syncs; i++) {
> >                 err = xe_sync_entry_parse(xe, xef,
> > &syncs[num_syncs++],
> >                                           &syncs_user[i], true,
> > -                                         xe_vm_no_dma_fences(vm));
> > +                                         xe_vm_no_dma_fences(vm),
> > +                                         !args->num_batch_buffer);
> >                 if (err)
> >                         goto err_syncs;
> >         }
> >  
> > -       if (xe_exec_queue_is_parallel(q)) {
> > +       if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) {
> >                 err = __copy_from_user(addresses, addresses_user,
> > sizeof(u64) *
> >                                        q->width);
> >                 if (err) {
> > @@ -234,6 +236,18 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >                 goto err_exec;
> >         }
> >  
> > +       if (!args->num_batch_buffer) {
> > +               if (!xe_vm_no_dma_fences(vm)) {
> > +                       struct dma_fence *fence =
> > +                               xe_exec_queue_last_fence_get(q, vm);
> > +
> > +                       for (i = 0; i < num_syncs; i++)
> > +                               xe_sync_entry_signal(&syncs[i], NULL,
> > fence);
> > +               }
> > +
> > +               goto err_exec;
> > +       }
> > +
> >         if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
> >                 err = -EWOULDBLOCK;
> >                 goto err_exec;
> > @@ -327,6 +341,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >  
> >         if (xe_exec_queue_is_lr(q))
> >                 q->ring_ops->emit_job(job);
> > +       if (!xe_vm_no_dma_fences(vm))
> > +               xe_exec_queue_last_fence_set(q, vm, &job-
> > >drm.s_fence->finished);
> >         xe_sched_job_push(job);
> >         xe_vm_reactivate_rebind(vm);
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> > b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index 4fd44a9203e4..35710b66e5de 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -924,7 +924,10 @@ int xe_exec_queue_set_property_ioctl(struct
> > drm_device *dev, void *data,
> >  static void xe_exec_queue_last_fence_lockdep_assert(struct
> > xe_exec_queue *q,
> >                                                     struct xe_vm *vm)
> >  {
> > -       lockdep_assert_held_write(&vm->lock);
> > +       if (q->flags & EXEC_QUEUE_FLAG_VM)
> > +               lockdep_assert_held_write(&vm->lock);
> > +       else
> > +               xe_vm_assert_held(vm);
> >  }
> >  
> >  /**
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index ecd761177567..35ffe7c55f25 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -53,8 +53,9 @@ struct xe_exec_queue {
> >         struct xe_hw_fence_irq *fence_irq;
> >  
> >         /**
> > -        * @last_fence: last fence on engine, protected by vm->lock
> > in write
> > -        * mode if bind engine
> > +        * @last_fence: last fence on exec queue, protected by vm-
> > >lock in write
> > +        * mode if bind exec queue, protected by dma resv lock if
> > non-bind exec
> > +        * queue
> >          */
> >         struct dma_fence *last_fence;
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> > b/drivers/gpu/drm/xe/xe_sync.c
> > index 73ef259aa387..2461e7d4814c 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -100,7 +100,7 @@ static void user_fence_cb(struct dma_fence
> > *fence, struct dma_fence_cb *cb)
> >  int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> >                         struct xe_sync_entry *sync,
> >                         struct drm_xe_sync __user *sync_user,
> > -                       bool exec, bool no_dma_fences)
> > +                       bool exec, bool no_dma_fences, bool exec_nop)
> 
> Here we have a number of bools indicating the context of a potential
> caller. That makes the code hard to read. When reading the caller code
> you'd both need to remember what each argument position means and
> exactly what xe_sync_entry_parse does for a certain caller, for example
> exec_nop.
> One would have a set of flags instead
> exec_nop becomes FLAG_DISALLOW_USER_FENCE
> 

Agree, let me clean this up.

Matt

> Otherwise LGTM.
>