[Intel-xe] [PATCH v3 5/5] drm/xe: Take in-syncs into account when num_execs or num_binds == 0
Matthew Brost
matthew.brost at intel.com
Wed Dec 6 08:35:43 UTC 2023
On Wed, Dec 06, 2023 at 03:00:33PM +0100, Thomas Hellström wrote:
> On Tue, 2023-12-05 at 12:37 -0800, Matthew Brost wrote:
> > Wait on in-syncs before signaling out-syncs if num_execs or num_binds
> > ==
> > 0 in execbuf IOCTL or VM bind IOCTL respectfully.
> >
> > v2: Wait on last fence in addition to in-fences (Thomas)
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_exec.c | 10 ++++-
> > drivers/gpu/drm/xe/xe_sync.c | 74
> > ++++++++++++++++++++++++++++++++++++
> > drivers/gpu/drm/xe/xe_sync.h | 5 +++
> > drivers/gpu/drm/xe/xe_vm.c | 24 ++++++++++--
> > 4 files changed, 107 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > b/drivers/gpu/drm/xe/xe_exec.c
> > index 96d7506a4c72..438e34585e1e 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -238,11 +238,17 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >
> > if (!args->num_batch_buffer) {
> > if (!xe_vm_in_lr_mode(vm)) {
> > - struct dma_fence *fence =
> > - xe_exec_queue_last_fence_get(q, vm);
> > + struct dma_fence *fence;
> >
> > + fence = xe_sync_in_fence_get(syncs,
> > num_syncs, q, vm);
> > + if (IS_ERR(fence)) {
> > + err = PTR_ERR(fence);
> > + goto err_exec;
> > + }
> > for (i = 0; i < num_syncs; i++)
> > xe_sync_entry_signal(&syncs[i], NULL,
> > fence);
> > + xe_exec_queue_last_fence_set(q, vm, fence);
> > + dma_fence_put(fence);
> > }
> >
> > goto err_exec;
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> > b/drivers/gpu/drm/xe/xe_sync.c
> > index d0f118223fa2..e4c220cf9115 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -5,6 +5,7 @@
> >
> > #include "xe_sync.h"
> >
> > +#include <linux/dma-fence-array.h>
> > #include <linux/kthread.h>
> > #include <linux/sched/mm.h>
> > #include <linux/uaccess.h>
> > @@ -14,6 +15,7 @@
> > #include <drm/xe_drm.h>
> >
> > #include "xe_device_types.h"
> > +#include "xe_exec_queue.h"
> > #include "xe_macros.h"
> > #include "xe_sched_job_types.h"
> >
> > @@ -268,3 +270,75 @@ void xe_sync_entry_cleanup(struct xe_sync_entry
> > *sync)
> > if (sync->ufence)
> > user_fence_put(sync->ufence);
> > }
> > +
> > +/**
> > + * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and
> > VM
> > + * @sync: input syncs
> > + * @num_sync: number of syncs
> > + * @q: exec queue
> > + * @vm: VM
> > + *
> > + * Get a fence from syncs, exec queue, and VM. If syncs contain in-
> > fences create
> > + * and return a composite fence of all in-fences + last fence. If no
> > in-fences
> > + * return last fence on input exec queue. Caller must drop
> > reference to
> > + * returned fence.
> > + *
> > + * Return: fence on success, ERR_PTR(-ENOMEM) on failure
> > + */
> > +struct dma_fence *
> > +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
> > + struct xe_exec_queue *q, struct xe_vm *vm)
> > +{
> > + struct dma_fence **fences = NULL;
> > + struct dma_fence_array *cf = NULL;
> > + struct dma_fence *fence;
> > + int i, num_in_fence = 0, current_fence = 0;
> > +
> > + lockdep_assert_held(&vm->lock);
> > +
> > + /* Count in-fences */
> > + for (i = 0; i < num_sync; ++i) {
> > + if (sync[i].fence) {
> > + ++num_in_fence;
> > + fence = sync[i].fence;
> > + }
> > + }
> > +
> > + /* Easy case... */
> > + if (!num_in_fence) {
> > + fence = xe_exec_queue_last_fence_get(q, vm);
> > + dma_fence_get(fence);
> > + return fence;
> > + }
> > +
> > + /* Create composite fence */
> > + fences = kmalloc_array(num_in_fence + 1, sizeof(*fences),
> > GFP_KERNEL);
> > + if (!fences)
> > + return ERR_PTR(-ENOMEM);
> > + for (i = 0; i < num_sync; ++i) {
> > + if (sync[i].fence) {
> > + dma_fence_get(sync[i].fence);
> > + fences[current_fence++] = sync[i].fence;
> > + }
> > + }
> > + fences[current_fence++] = xe_exec_queue_last_fence_get(q,
> > vm);
> > + dma_fence_get(fences[current_fence - 1]);
> > + cf = dma_fence_array_create(num_in_fence, fences,
> > + vm->composite_fence_ctx,
> > + vm->composite_fence_seqno++,
> > + false);
> > + if (!cf) {
> > + --vm->composite_fence_seqno;
> > + goto err_out;
> > + }
> > +
> > + return &cf->base;
> > +
> > +err_out:
> > + while (current_fence)
> > + dma_fence_put(fences[--current_fence]);
> > + kfree(fences);
> > + kfree(cf);
> > +
> > + return ERR_PTR(-ENOMEM);
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h
> > b/drivers/gpu/drm/xe/xe_sync.h
> > index 45f4371e94b9..d284afbe917c 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -9,8 +9,10 @@
> > #include "xe_sync_types.h"
> >
> > struct xe_device;
> > +struct xe_exec_queue;
> > struct xe_file;
> > struct xe_sched_job;
> > +struct xe_vm;
> >
> > #define SYNC_PARSE_FLAG_EXEC BIT(0)
> > #define SYNC_PARSE_FLAG_LR_MODE BIT(1)
> > @@ -27,5 +29,8 @@ void xe_sync_entry_signal(struct xe_sync_entry
> > *sync,
> > struct xe_sched_job *job,
> > struct dma_fence *fence);
> > void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
> > +struct dma_fence *
> > +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
> > + struct xe_exec_queue *q, struct xe_vm *vm);
> >
> > #endif
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 042e1e31ba54..254766a79364 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -3178,12 +3178,28 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> > void *data, struct drm_file *file)
> > unwind_ops:
> > vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > free_syncs:
> > - for (i = 0; err == -ENODATA && i < num_syncs; i++) {
> > - struct dma_fence *fence =
> > -
> > xe_exec_queue_last_fence_get(to_wait_exec_queue
> > (vm, q), vm);
> > + if (err == -ENODATA) {
> > + struct dma_fence *fence;
> > +
> > + fence = xe_sync_in_fence_get(syncs, num_syncs,
> > + to_wait_exec_queue(vm,
> > q), vm);
> > + if (IS_ERR(fence)) {
> > + err = PTR_ERR(fence);
> > + goto cleanup_syncs;
> > + }
> > + for (i = 0; i < num_syncs; i++)
> > + xe_sync_entry_signal(&syncs[i], NULL, fence);
> > + if (xe_vm_sync_mode(vm, q)) {
> > + long timeout = dma_fence_wait(fence, true);
> >
> > - xe_sync_entry_signal(&syncs[i], NULL, fence);
> > + if (timeout < 0)
> > + err = -EINTR;
> > + }
>
>
> Is there a chance we can make all this a small static function and also
> reuse that above?
>
Yes. Will fix.
>
> > + xe_exec_queue_last_fence_set(to_wait_exec_queue(vm,
> > q), vm,
> > + fence);
>
> Hm. What happens when there is a new bind with a real workload being
> queued *after* this operation? Will it take this composite fence as a
> dependency?
>
Good catch, I think I missed this part. We should always add the last
fence as a dep if any new jobs on queue. Will fix.
Matt
> /Thomas
>
>
>
> > + dma_fence_put(fence);
> > }
> > +cleanup_syncs:
> > while (num_syncs--)
> > xe_sync_entry_cleanup(&syncs[num_syncs]);
> >
>
More information about the Intel-xe
mailing list