[Intel-xe] [PATCH 4/5] rm/xe: Allow num_batch_buffer == 0 in exec IOCTL

Thu Nov 16 20:57:19 UTC 2023

On Thu, 2023-11-16 at 11:40 -0800, Matthew Brost wrote:
> The idea being out-syncs can signal indicating all previous operations
> on the exec queue are complete. An example use case of this would be
> support for implementing vkQueueWaitForIdle easily.

vkQueueWaitIdle

That is useful for Mesa.

> 
> v2: Don't add last_fence for VM's that do not support dma fences
> v3: Use a flags field instead of severval bools in sync parse (Thomas)
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_exec.c             | 23 ++++++++++++++++++++---
>  drivers/gpu/drm/xe/xe_exec_queue.c       |  5 ++++-
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |  5 +++--
>  drivers/gpu/drm/xe/xe_sync.c             |  4 ++++
>  drivers/gpu/drm/xe/xe_sync.h             |  6 +++++-
>  5 files changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 7d4d22c81a42..4989bc6038af 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -161,7 +161,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
>  		return -EINVAL;
>  
> -	if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> +	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
> +			 q->width != args->num_batch_buffer))
>  		return -EINVAL;
>  
>  	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> @@ -183,12 +184,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
>  					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
>  					  (xe_vm_no_dma_fences(vm) ?
> -					   SYNC_PARSE_FLAG_LR_MODE : 0));
> +					   SYNC_PARSE_FLAG_LR_MODE : 0) |
> +					  (!args->num_batch_buffer ?
> +					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
>  		if (err)
>  			goto err_syncs;
>  	}
>  
> -	if (xe_exec_queue_is_parallel(q)) {
> +	if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) {
>  		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
>  				       q->width);
>  		if (err) {
> @@ -235,6 +238,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		goto err_exec;
>  	}
>  
> +	if (!args->num_batch_buffer) {
> +		if (!xe_vm_no_dma_fences(vm)) {
> +			struct dma_fence *fence =
> +				xe_exec_queue_last_fence_get(q, vm);
> +
> +			for (i = 0; i < num_syncs; i++)
> +				xe_sync_entry_signal(&syncs[i], NULL, fence);
> +		}
> +
> +		goto err_exec;
> +	}
> +
>  	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
>  		err = -EWOULDBLOCK;
>  		goto err_exec;
> @@ -328,6 +343,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  
>  	if (xe_exec_queue_is_lr(q))
>  		q->ring_ops->emit_job(job);
> +	if (!xe_vm_no_dma_fences(vm))
> +		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
>  	xe_sched_job_push(job);
>  	xe_vm_reactivate_rebind(vm);
>  
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 4fd44a9203e4..35710b66e5de 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -924,7 +924,10 @@ int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
>  static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
>  						    struct xe_vm *vm)
>  {
> -	lockdep_assert_held_write(&vm->lock);
> +	if (q->flags & EXEC_QUEUE_FLAG_VM)
> +		lockdep_assert_held_write(&vm->lock);
> +	else
> +		xe_vm_assert_held(vm);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 5ba47a5cfdbd..52f0927d0d9b 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -56,8 +56,9 @@ struct xe_exec_queue {
>  	struct xe_hw_fence_irq *fence_irq;
>  
>  	/**
> -	 * @last_fence: last fence on engine, protected by vm->lock in write
> -	 * mode if bind engine
> +	 * @last_fence: last fence on exec queue, protected by vm->lock in write
> +	 * mode if bind exec queue, protected by dma resv lock if non-bind exec
> +	 * queue
>  	 */
>  	struct dma_fence *last_fence;
>  
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 016d0a2ea5bc..cfe8724ee954 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -106,6 +106,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>  	int err;
>  	bool exec = flags & SYNC_PARSE_FLAG_EXEC;
>  	bool lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE;
> +	bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE;
>  	bool signal;
>  
>  	if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
> @@ -173,6 +174,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>  		break;
>  
>  	case DRM_XE_SYNC_USER_FENCE:
> +		if (XE_IOCTL_DBG(xe, disallow_user_fence))
> +			return -EOPNOTSUPP;
> +
>  		if (XE_IOCTL_DBG(xe, !signal))
>  			return -EOPNOTSUPP;
>  
> diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> index 30958ddc4cdc..45f4371e94b9 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -12,10 +12,14 @@ struct xe_device;
>  struct xe_file;
>  struct xe_sched_job;
>  
> +#define SYNC_PARSE_FLAG_EXEC			BIT(0)
> +#define SYNC_PARSE_FLAG_LR_MODE			BIT(1)
> +#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE	BIT(2)
> +
>  int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
>  			struct xe_sync_entry *sync,
>  			struct drm_xe_sync __user *sync_user,
> -			bool exec, bool compute_mode);
> +			unsigned int flags);
>  int xe_sync_entry_wait(struct xe_sync_entry *sync);
>  int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
>  			   struct xe_sched_job *job);