[Intel-xe] [PATCH v2 03/27] drm/xe: Take in-syncs into account when num_execs or num_binds == 0

Matthew Brost matthew.brost at intel.com
Mon Nov 13 08:28:26 UTC 2023


On Mon, Nov 13, 2023 at 01:50:41PM +0100, Thomas Hellström wrote:
> Hi,
> 
> On 11/7/23 06:25, Matthew Brost wrote:
> > Wait on in-syncs before signaling out-syncs if num_execs or num_binds ==
> > 0 in execbuf IOCTL or VM bind IOCTL respectfully.
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >   drivers/gpu/drm/xe/xe_exec.c | 10 ++++-
> >   drivers/gpu/drm/xe/xe_sync.c | 75 ++++++++++++++++++++++++++++++++++++
> >   drivers/gpu/drm/xe/xe_sync.h |  5 +++
> >   drivers/gpu/drm/xe/xe_vm.c   | 24 ++++++++++--
> >   4 files changed, 108 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > index 4666f5b145f7..80ee6d8fcf68 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -238,11 +238,17 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >   	if (!args->num_batch_buffer) {
> >   		if (!xe_vm_no_dma_fences(vm)) {
> > -			struct dma_fence *fence =
> > -				xe_exec_queue_last_fence_get(q, vm);
> > +			struct dma_fence *fence;
> > +			fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
> > +			if (IS_ERR(fence)) {
> > +				err = PTR_ERR(fence);
> > +				goto err_exec;
> > +			}
> >   			for (i = 0; i < num_syncs; i++)
> >   				xe_sync_entry_signal(&syncs[i], NULL, fence);
> > +			xe_exec_queue_last_fence_set(q, vm, fence);
> > +			dma_fence_put(fence);
> >   		}
> >   		goto err_exec;
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> > index 2461e7d4814c..6b38c74a1de1 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -5,6 +5,7 @@
> >   #include "xe_sync.h"
> > +#include <linux/dma-fence-array.h>
> >   #include <linux/kthread.h>
> >   #include <linux/sched/mm.h>
> >   #include <linux/uaccess.h>
> > @@ -14,6 +15,7 @@
> >   #include <drm/xe_drm.h>
> >   #include "xe_device_types.h"
> > +#include "xe_exec_queue.h"
> >   #include "xe_macros.h"
> >   #include "xe_sched_job_types.h"
> > @@ -274,3 +276,76 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
> >   	if (sync->ufence)
> >   		user_fence_put(sync->ufence);
> >   }
> > +
> > +/**
> > + * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM
> > + * @sync: input syncs
> > + * @num_sync: number of syncs
> > + * @q: exec queue
> > + * @vm: VM
> > + *
> > + * Get a fence from syncs, exec queue, and VM. If syncs contain more than 1
> > + * in-fence create and return a composite fence of all in-fences, if syncs
> > + * contain 1 in-fence return in-fence, if no in-fences return last fence on
> > + * input exec queue. Caller must drop reference to returned fence.
> > + *
> > + * Return: fence on success, ERR_PTR(-ENOMEM) on failure
> > + */
> > +struct dma_fence *
> > +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
> > +		     struct xe_exec_queue *q, struct xe_vm *vm)
> > +{
> > +	struct dma_fence **fences = NULL;
> > +	struct dma_fence_array *cf = NULL;
> > +	struct dma_fence *fence;
> > +	int i, num_in_fence = 0, current_fence = 0;
> > +
> > +	lockdep_assert_held(&vm->lock);
> > +
> > +	/* Count in-fences */
> > +	for (i = 0; i < num_sync; ++i) {
> > +		if (sync[i].fence) {
> > +			++num_in_fence;
> > +			fence = sync[i].fence;
> > +		}
> > +	}
> > +
> > +	/* Easy cases... */
> > +	if (!num_in_fence) {
> > +		fence = xe_exec_queue_last_fence_get(q, vm);
> > +		dma_fence_get(fence);
> > +		return fence;
> > +	} else if (num_in_fence == 1) {
> 
> 
> Don't we need to also wait on the exec_queue last fence in this case, and
> the multiple-in-fences below?
> Otherwise we only wait for the in-fences but not on currently executing
> jobs?
> 

That is right. I think if num_in_fence > 0 we need to wait on the last fence too.

> Did you investigate just to forward a non-existing batchbuffer in the exec
> case and create a NOP binding job in the bind case?
> 

Hmm, let me look at that as an option.

Matt

> /Thomas
> 
> 
> 
> > +		dma_fence_get(fence);
> > +		return fence;
> > +	}
> > +
> > +	/* Create composite fence */
> > +	fences = kmalloc_array(num_in_fence, sizeof(*fences), GFP_KERNEL);
> > +	if (!fences)
> > +		return ERR_PTR(-ENOMEM);
> > +	for (i = 0; i < num_sync; ++i) {
> > +		if (sync[i].fence) {
> > +			dma_fence_get(sync[i].fence);
> > +			fences[current_fence++] = sync[i].fence;
> > +		}
> > +	}
> > +	cf = dma_fence_array_create(num_in_fence, fences,
> > +				    vm->composite_fence_ctx,
> > +				    vm->composite_fence_seqno++,
> > +				    false);
> > +	if (!cf) {
> > +		--vm->composite_fence_seqno;
> > +		goto err_out;
> > +	}
> > +
> > +	return &cf->base;
> > +
> > +err_out:
> > +	while (current_fence)
> > +		dma_fence_put(fences[--current_fence]);
> > +	kfree(fences);
> > +	kfree(cf);
> > +
> > +	return ERR_PTR(-ENOMEM);
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> > index 98f02bb34637..c0c8ddac805d 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -9,8 +9,10 @@
> >   #include "xe_sync_types.h"
> >   struct xe_device;
> > +struct xe_exec_queue;
> >   struct xe_file;
> >   struct xe_sched_job;
> > +struct xe_vm;
> >   int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> >   			struct xe_sync_entry *sync,
> > @@ -23,5 +25,8 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync,
> >   			  struct xe_sched_job *job,
> >   			  struct dma_fence *fence);
> >   void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
> > +struct dma_fence *
> > +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
> > +		     struct xe_exec_queue *q, struct xe_vm *vm);
> >   #endif
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 2f212939d2b5..2a7fa8e2058e 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -3155,12 +3155,28 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >   unwind_ops:
> >   	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> >   free_syncs:
> > -	for (i = 0; err == -ENODATA && i < num_syncs; i++) {
> > -		struct dma_fence *fence =
> > -			xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
> > +	if (err == -ENODATA) {
> > +		struct dma_fence *fence;
> > -		xe_sync_entry_signal(&syncs[i], NULL, fence);
> > +		fence = xe_sync_in_fence_get(syncs, num_syncs,
> > +					     to_wait_exec_queue(vm, q), vm);
> > +		if (IS_ERR(fence)) {
> > +			err = PTR_ERR(fence);
> > +			goto cleanup_syncs;
> > +		}
> > +		for (i = 0; i < num_syncs; i++)
> > +			xe_sync_entry_signal(&syncs[i], NULL, fence);
> > +		if (!async) {
> > +			long timeout = dma_fence_wait(fence, true);
> > +
> > +			if (timeout < 0)
> > +				err = -EINTR;
> > +		}
> > +		xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
> > +					     fence);
> > +		dma_fence_put(fence);
> >   	}
> > +cleanup_syncs:
> >   	while (num_syncs--)
> >   		xe_sync_entry_cleanup(&syncs[num_syncs]);


More information about the Intel-xe mailing list