[Intel-xe] [PATCH 6/6] drm/xe: Allow num_batch_buffer == 0 in exec IOCTL
Matthew Brost
matthew.brost at intel.com
Thu Sep 21 18:33:43 UTC 2023
On Thu, Sep 21, 2023 at 11:42:43AM +0200, Thomas Hellström wrote:
> Hi, Matthew,
>
> On Thu, 2023-09-14 at 13:40 -0700, Matthew Brost wrote:
> > The idea being out-syncs can signal indicating all previous
> > operations
> > on the exec queue are complete. An example use case of this would be
> > support for implementing vkQueueWaitForIdle easily.
> >
> > v2: Don't add last_fence for VM's that do not support dma fences
>
> Question here: Since we seem to need to add some special-casing for the
> last patches here, and it seems to me that UMD could easily keep track
> of the last out-fence, did we get a request for this from UMD?
>
I believe this was a discussion between in Faith, Danilo, and Dave I
picked up on. vkQueueWaitForIdle is implemented in user space via a
dummy BB or bind I believe but since the ask was for the KMD to allow a
zero number of binds / execs to implement this.
> What happens if this zero-batch-buffer job has a number of unmet
> dependencies? Will the next job on the queue wait for those
> dependencies to be met?
>
Hmm, that is a gap. If zero-batch-buffer job has unmet dependencies it
may signal immediately.
Maybe for now we return -EINVAL if num_binds / num_exec == 0 with
in-syncs?
If we want to support this the easiest way would be a NOP job in the
queue.
> If patch 4 fixes a bug, Could we make 5 and 6 a separate series?
>
Yes. Rodrigo is including 1-4 in his uAPI change series anyways. Will
repost 5-6 in a new series once his series is merged.
Matt
> Thanks,
> /Thomas
>
>
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_exec.c | 22 +++++++++++++++++++---
> > drivers/gpu/drm/xe/xe_exec_queue.c | 5 ++++-
> > drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 +++--
> > drivers/gpu/drm/xe/xe_sync.c | 5 ++++-
> > drivers/gpu/drm/xe/xe_sync.h | 2 +-
> > drivers/gpu/drm/xe/xe_vm.c | 2 +-
> > 6 files changed, 32 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > b/drivers/gpu/drm/xe/xe_exec.c
> > index 28e84a0bbeb0..4666f5b145f7 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -161,7 +161,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> > if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
> > return -EINVAL;
> >
> > - if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> > + if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
> > + q->width != args->num_batch_buffer))
> > return -EINVAL;
> >
> > if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> > @@ -182,12 +183,13 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> > for (i = 0; i < args->num_syncs; i++) {
> > err = xe_sync_entry_parse(xe, xef,
> > &syncs[num_syncs++],
> > &syncs_user[i], true,
> > - xe_vm_no_dma_fences(vm));
> > + xe_vm_no_dma_fences(vm),
> > + !args->num_batch_buffer);
> > if (err)
> > goto err_syncs;
> > }
> >
> > - if (xe_exec_queue_is_parallel(q)) {
> > + if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) {
> > err = __copy_from_user(addresses, addresses_user,
> > sizeof(u64) *
> > q->width);
> > if (err) {
> > @@ -234,6 +236,18 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> > goto err_exec;
> > }
> >
> > + if (!args->num_batch_buffer) {
> > + if (!xe_vm_no_dma_fences(vm)) {
> > + struct dma_fence *fence =
> > + xe_exec_queue_last_fence_get(q, vm);
> > +
> > + for (i = 0; i < num_syncs; i++)
> > + xe_sync_entry_signal(&syncs[i], NULL,
> > fence);
> > + }
> > +
> > + goto err_exec;
> > + }
> > +
> > if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
> > err = -EWOULDBLOCK;
> > goto err_exec;
> > @@ -327,6 +341,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >
> > if (xe_exec_queue_is_lr(q))
> > q->ring_ops->emit_job(job);
> > + if (!xe_vm_no_dma_fences(vm))
> > + xe_exec_queue_last_fence_set(q, vm, &job-
> > >drm.s_fence->finished);
> > xe_sched_job_push(job);
> > xe_vm_reactivate_rebind(vm);
> >
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> > b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index 8722ab6ba00a..9fe91f66f776 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -964,7 +964,10 @@ int xe_exec_queue_set_property_ioctl(struct
> > drm_device *dev, void *data,
> > static void xe_exec_queue_last_fence_lockdep_assert(struct
> > xe_exec_queue *q,
> > struct xe_vm *vm)
> > {
> > - lockdep_assert_held_write(&vm->lock);
> > + if (q->flags & EXEC_QUEUE_FLAG_VM)
> > + lockdep_assert_held_write(&vm->lock);
> > + else
> > + xe_vm_assert_held(vm);
> > }
> >
> > /**
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index 71ed8d22a8a1..9648b2bbabc9 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -53,8 +53,9 @@ struct xe_exec_queue {
> > struct xe_hw_fence_irq *fence_irq;
> >
> > /**
> > - * @last_fence: last fence on engine, protected by vm->lock
> > in write
> > - * mode if bind engine
> > + * @last_fence: last fence on exec queue, protected by vm-
> > >lock in write
> > + * mode if bind exec queue, protected by dma resv lock if
> > non-bind exec
> > + * queue
> > */
> > struct dma_fence *last_fence;
> >
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> > b/drivers/gpu/drm/xe/xe_sync.c
> > index 73ef259aa387..2461e7d4814c 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -100,7 +100,7 @@ static void user_fence_cb(struct dma_fence
> > *fence, struct dma_fence_cb *cb)
> > int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> > struct xe_sync_entry *sync,
> > struct drm_xe_sync __user *sync_user,
> > - bool exec, bool no_dma_fences)
> > + bool exec, bool no_dma_fences, bool exec_nop)
> > {
> > struct drm_xe_sync sync_in;
> > int err;
> > @@ -171,6 +171,9 @@ int xe_sync_entry_parse(struct xe_device *xe,
> > struct xe_file *xef,
> > break;
> >
> > case DRM_XE_SYNC_USER_FENCE:
> > + if (XE_IOCTL_DBG(xe, exec_nop))
> > + return -EOPNOTSUPP;
> > +
> > if (XE_IOCTL_DBG(xe, !signal))
> > return -EOPNOTSUPP;
> >
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h
> > b/drivers/gpu/drm/xe/xe_sync.h
> > index 30958ddc4cdc..98f02bb34637 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -15,7 +15,7 @@ struct xe_sched_job;
> > int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> > struct xe_sync_entry *sync,
> > struct drm_xe_sync __user *sync_user,
> > - bool exec, bool compute_mode);
> > + bool exec, bool compute_mode, bool exec_nop);
> > int xe_sync_entry_wait(struct xe_sync_entry *sync);
> > int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
> > struct xe_sched_job *job);
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 0e2f3ab453ea..c2526950cf60 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2916,7 +2916,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> > void *data, struct drm_file *file)
> > for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++)
> > {
> > err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
> > &syncs_user[num_syncs],
> > false,
> > - xe_vm_no_dma_fences(vm));
> > + xe_vm_no_dma_fences(vm),
> > false);
> > if (err)
> > goto free_syncs;
> > }
>
More information about the Intel-xe
mailing list