[Intel-xe] [PATCH 4/5] rm/xe: Allow num_batch_buffer == 0 in exec IOCTL
Matthew Brost
matthew.brost at intel.com
Mon Dec 4 18:19:49 UTC 2023
On Mon, Dec 04, 2023 at 02:25:55PM +0100, Thomas Hellström wrote:
>
> On 11/16/23 20:40, Matthew Brost wrote:
> > The idea being out-syncs can signal indicating all previous operations
> > on the exec queue are complete. An example use case of this would be
> > support for implementing vkQueueWaitForIdle easily.
> >
> > v2: Don't add last_fence for VM's that do not support dma fences
> > v3: Use a flags field instead of severval bools in sync parse (Thomas)
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_exec.c | 23 ++++++++++++++++++++---
> > drivers/gpu/drm/xe/xe_exec_queue.c | 5 ++++-
> > drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 +++--
> > drivers/gpu/drm/xe/xe_sync.c | 4 ++++
> > drivers/gpu/drm/xe/xe_sync.h | 6 +++++-
> > 5 files changed, 36 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > index 7d4d22c81a42..4989bc6038af 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -161,7 +161,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
> > return -EINVAL;
> > - if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> > + if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
> > + q->width != args->num_batch_buffer))
> > return -EINVAL;
> > if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> > @@ -183,12 +184,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
> > &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
> > (xe_vm_no_dma_fences(vm) ?
> > - SYNC_PARSE_FLAG_LR_MODE : 0));
> > + SYNC_PARSE_FLAG_LR_MODE : 0) |
> > + (!args->num_batch_buffer ?
> > + SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
>
> Q: Why do we disallow user fences in this situation?
>
>
A user fence is inserted into the ring so unless we issue a NULL batch
or something this is not easy to support. I don't think we have use case
for this either. If this is blocker can fix change this. Also I think we
can add support this later if needed too.
> > if (err)
> > goto err_syncs;
> > }
> > - if (xe_exec_queue_is_parallel(q)) {
> > + if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) {
> > err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
> > q->width);
> > if (err) {
> > @@ -235,6 +238,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > goto err_exec;
> > }
> > + if (!args->num_batch_buffer) {
> > + if (!xe_vm_no_dma_fences(vm)) {
> > + struct dma_fence *fence =
> > + xe_exec_queue_last_fence_get(q, vm);
> > +
> > + for (i = 0; i < num_syncs; i++)
> > + xe_sync_entry_signal(&syncs[i], NULL, fence);
> > + }
> > +
> > + goto err_exec;
> > + }
> > +
> > if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
> > err = -EWOULDBLOCK;
> > goto err_exec;
> > @@ -328,6 +343,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > if (xe_exec_queue_is_lr(q))
> > q->ring_ops->emit_job(job);
> > + if (!xe_vm_no_dma_fences(vm))
> > + xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
> > xe_sched_job_push(job);
> > xe_vm_reactivate_rebind(vm);
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index 4fd44a9203e4..35710b66e5de 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -924,7 +924,10 @@ int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
> > static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
> > struct xe_vm *vm)
> > {
> > - lockdep_assert_held_write(&vm->lock);
> > + if (q->flags & EXEC_QUEUE_FLAG_VM)
> > + lockdep_assert_held_write(&vm->lock);
> > + else
> > + xe_vm_assert_held(vm);
> > }
> > /**
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index 5ba47a5cfdbd..52f0927d0d9b 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -56,8 +56,9 @@ struct xe_exec_queue {
> > struct xe_hw_fence_irq *fence_irq;
> > /**
> > - * @last_fence: last fence on engine, protected by vm->lock in write
> > - * mode if bind engine
> > + * @last_fence: last fence on exec queue, protected by vm->lock in write
> > + * mode if bind exec queue, protected by dma resv lock if non-bind exec
> > + * queue
> > */
> > struct dma_fence *last_fence;
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> > index 016d0a2ea5bc..cfe8724ee954 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -106,6 +106,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> > int err;
> > bool exec = flags & SYNC_PARSE_FLAG_EXEC;
> > bool lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
> > + bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE;
> > bool signal;
> > if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
> > @@ -173,6 +174,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> > break;
> > case DRM_XE_SYNC_USER_FENCE:
> > + if (XE_IOCTL_DBG(xe, disallow_user_fence))
> > + return -EOPNOTSUPP;
> > +
> > if (XE_IOCTL_DBG(xe, !signal))
> > return -EOPNOTSUPP;
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> > index 30958ddc4cdc..45f4371e94b9 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -12,10 +12,14 @@ struct xe_device;
> > struct xe_file;
> > struct xe_sched_job;
> > +#define SYNC_PARSE_FLAG_EXEC BIT(0)
> > +#define SYNC_PARSE_FLAG_LR_MODE BIT(1)
> > +#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE BIT(2)
> > +
> > int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> > struct xe_sync_entry *sync,
> > struct drm_xe_sync __user *sync_user,
> > - bool exec, bool compute_mode);
> > + unsigned int flags);
>
> Should this have been in the previous patch?
>
Yes, good catch. Will fix.
Matt
> Thomas
>
>
> > int xe_sync_entry_wait(struct xe_sync_entry *sync);
> > int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
> > struct xe_sched_job *job);
More information about the Intel-xe
mailing list