[Intel-xe] [PATCH 02/26] drm/xe: Allow num_batch_buffer == 0 in exec IOCTL

Mon Nov 6 13:52:23 UTC 2023

On 10/26/23 06:01, Matthew Brost wrote:
> The idea being out-syncs can signal indicating all previous operations
> on the exec queue are complete. An example use case of this would be
> support for implementing vkQueueWaitForIdle easily.
>
> v2: Don't add last_fence for VM's that do not support dma fences
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>

Also, here, as we discussed previously, this breaks with in-fences, right?

/Thomas

> ---
>   drivers/gpu/drm/xe/xe_exec.c             | 22 +++++++++++++++++++---
>   drivers/gpu/drm/xe/xe_exec_queue.c       |  5 ++++-
>   drivers/gpu/drm/xe/xe_exec_queue_types.h |  5 +++--
>   drivers/gpu/drm/xe/xe_sync.c             |  5 ++++-
>   drivers/gpu/drm/xe/xe_sync.h             |  2 +-
>   drivers/gpu/drm/xe/xe_vm.c               |  2 +-
>   6 files changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 28e84a0bbeb0..4666f5b145f7 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -161,7 +161,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
>   		return -EINVAL;
>   
> -	if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> +	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
> +			 q->width != args->num_batch_buffer))
>   		return -EINVAL;
>   
>   	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> @@ -182,12 +183,13 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	for (i = 0; i < args->num_syncs; i++) {
>   		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
>   					  &syncs_user[i], true,
> -					  xe_vm_no_dma_fences(vm));
> +					  xe_vm_no_dma_fences(vm),
> +					  !args->num_batch_buffer);
>   		if (err)
>   			goto err_syncs;
>   	}
>   
> -	if (xe_exec_queue_is_parallel(q)) {
> +	if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) {
>   		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
>   				       q->width);
>   		if (err) {
> @@ -234,6 +236,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   		goto err_exec;
>   	}
>   
> +	if (!args->num_batch_buffer) {
> +		if (!xe_vm_no_dma_fences(vm)) {
> +			struct dma_fence *fence =
> +				xe_exec_queue_last_fence_get(q, vm);
> +
> +			for (i = 0; i < num_syncs; i++)
> +				xe_sync_entry_signal(&syncs[i], NULL, fence);
> +		}
> +
> +		goto err_exec;
> +	}
> +
>   	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
>   		err = -EWOULDBLOCK;
>   		goto err_exec;
> @@ -327,6 +341,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   
>   	if (xe_exec_queue_is_lr(q))
>   		q->ring_ops->emit_job(job);
> +	if (!xe_vm_no_dma_fences(vm))
> +		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
>   	xe_sched_job_push(job);
>   	xe_vm_reactivate_rebind(vm);
>   
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index dd61c4267e24..b442d86d34fe 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -921,7 +921,10 @@ int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
>   static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
>   						    struct xe_vm *vm)
>   {
> -	lockdep_assert_held_write(&vm->lock);
> +	if (q->flags & EXEC_QUEUE_FLAG_VM)
> +		lockdep_assert_held_write(&vm->lock);
> +	else
> +		xe_vm_assert_held(vm);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index ecd761177567..35ffe7c55f25 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -53,8 +53,9 @@ struct xe_exec_queue {
>   	struct xe_hw_fence_irq *fence_irq;
>   
>   	/**
> -	 * @last_fence: last fence on engine, protected by vm->lock in write
> -	 * mode if bind engine
> +	 * @last_fence: last fence on exec queue, protected by vm->lock in write
> +	 * mode if bind exec queue, protected by dma resv lock if non-bind exec
> +	 * queue
>   	 */
>   	struct dma_fence *last_fence;
>   
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 73ef259aa387..2461e7d4814c 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -100,7 +100,7 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>   int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>   			struct xe_sync_entry *sync,
>   			struct drm_xe_sync __user *sync_user,
> -			bool exec, bool no_dma_fences)
> +			bool exec, bool no_dma_fences, bool exec_nop)
>   {
>   	struct drm_xe_sync sync_in;
>   	int err;
> @@ -171,6 +171,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>   		break;
>   
>   	case DRM_XE_SYNC_USER_FENCE:
> +		if (XE_IOCTL_DBG(xe, exec_nop))
> +			return -EOPNOTSUPP;
> +
>   		if (XE_IOCTL_DBG(xe, !signal))
>   			return -EOPNOTSUPP;
>   
> diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> index 30958ddc4cdc..98f02bb34637 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -15,7 +15,7 @@ struct xe_sched_job;
>   int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>   			struct xe_sync_entry *sync,
>   			struct drm_xe_sync __user *sync_user,
> -			bool exec, bool compute_mode);
> +			bool exec, bool compute_mode, bool exec_nop);
>   int xe_sync_entry_wait(struct xe_sync_entry *sync);
>   int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
>   			   struct xe_sched_job *job);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index d1483744936f..d39135a6a1d2 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3077,7 +3077,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
>   		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
>   					  &syncs_user[num_syncs], false,
> -					  xe_vm_no_dma_fences(vm));
> +					  xe_vm_no_dma_fences(vm), false);
>   		if (err)
>   			goto free_syncs;
>   	}