[PATCH 4/4] drm/xe: Finish refactoring of exec_queue_create

Wed Jan 3 08:07:19 UTC 2024

On Tue, Jan 02, 2024 at 01:17:31PM -0800, Brian Welty wrote:
> Setting of exec_queue user extensions is moved from the end of the ioctl
> function earlier, into __xe_exec_queue_alloc().
> This fixes bug in that the USM attributes for access counters were being
> applied too late, and effectively were ignored.
> 
> However, in order to apply user extensions this early, we can no longer
> call q->ops functions.  Instead, make it more efficient. The user extension
> functions can simply update the q->sched_props values and they will be
> applied by the backend during q->ops->init().
> 
> Signed-off-by: Brian Welty <brian.welty at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_exec_queue.c | 74 ++++++++++++++++++++----------
>  drivers/gpu/drm/xe/xe_exec_queue.h |  3 +-
>  drivers/gpu/drm/xe/xe_gsc.c        |  2 +-
>  drivers/gpu/drm/xe/xe_gt.c         |  4 +-
>  drivers/gpu/drm/xe/xe_migrate.c    |  2 +-
>  5 files changed, 57 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 9891cddba71c..8a2d51b994cf 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -30,14 +30,18 @@ enum xe_exec_queue_sched_prop {
>  	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
>  };
>  
> +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
> +				      u64 extensions, int ext_number, bool create);
> +
>  static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>  						   struct xe_vm *vm,
>  						   u32 logical_mask,
>  						   u16 width, struct xe_hw_engine *hwe,
> -						   u32 flags)
> +						   u32 flags, u64 extensions)
>  {
>  	struct xe_exec_queue *q;
>  	struct xe_gt *gt = hwe->gt;
> +	int err;
>  
>  	/* only kernel queues can be permanent */
>  	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
> @@ -50,8 +54,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>  	q->flags = flags;
>  	q->hwe = hwe;
>  	q->gt = gt;
> -	if (vm)
> -		q->vm = xe_vm_get(vm);
>  	q->class = hwe->class;
>  	q->width = width;
>  	q->logical_mask = logical_mask;
> @@ -69,6 +71,21 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>  				hwe->eclass->sched_props.job_timeout_ms;
>  	q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
>  
> +	if (extensions) {
> +		/*
> +		 * may set q->usm, must come before xe_lrc_init(),
> +		 * may overwrite q->sched_props, must come before q->ops->init()
> +		 */
> +		err = exec_queue_user_extensions(xe, q, extensions, 0, true);
> +		if (err) {
> +			kfree(q);
> +			return ERR_PTR(err);
> +		}
> +	}
> +
> +	if (vm)
> +		q->vm = xe_vm_get(vm);
> +
>  	if (xe_exec_queue_is_parallel(q)) {
>  		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
>  		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
> @@ -124,12 +141,14 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
>  
>  struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
>  					   u32 logical_mask, u16 width,
> -					   struct xe_hw_engine *hwe, u32 flags)
> +					   struct xe_hw_engine *hwe, u32 flags,
> +					   u64 extensions)
>  {
>  	struct xe_exec_queue *q;
>  	int err;
>  
> -	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags);
> +	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
> +				  extensions);
>  	if (IS_ERR(q))
>  		return q;
>  
> @@ -174,7 +193,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
>  	if (!logical_mask)
>  		return ERR_PTR(-ENODEV);
>  
> -	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
> +	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
>  }
>  
>  void xe_exec_queue_destroy(struct kref *ref)
> @@ -258,7 +277,11 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
>  	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
>  		return -EPERM;
>  
> -	return q->ops->set_priority(q, value);
> +	if (!create)
> +		return q->ops->set_priority(q, value);
> +	q->sched_props.priority = value;
> +
> +	return 0;
>  }
>  
>  static bool xe_exec_queue_enforce_schedule_limit(void)
> @@ -325,7 +348,11 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
>  	    !xe_hw_engine_timeout_in_range(value, min, max))
>  		return -EINVAL;
>  
> -	return q->ops->set_timeslice(q, value);
> +	if (!create)
> +		return q->ops->set_timeslice(q, value);
> +	q->sched_props.timeslice_us = value;
> +
> +	return 0;
>  }
>  
>  static int exec_queue_set_preemption_timeout(struct xe_device *xe,
> @@ -341,7 +368,11 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
>  	    !xe_hw_engine_timeout_in_range(value, min, max))
>  		return -EINVAL;
>  
> -	return q->ops->set_preempt_timeout(q, value);
> +	if (!create)
> +		return q->ops->set_preempt_timeout(q, value);
> +	q->sched_props.preempt_timeout_us = value;
> +
> +	return 0;

Nit: I found this tricky to read with indentaions as is, how about:

	if (!create)
		return q->ops->set_preempt_timeout(q, value);

	q->sched_props.preempt_timeout_us = value;
	return 0;

Just my preference. Same for all other cases.

>  }
>  
>  static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
> @@ -376,7 +407,9 @@ static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue
>  	    !xe_hw_engine_timeout_in_range(value, min, max))
>  		return -EINVAL;
>  
> -	return q->ops->set_job_timeout(q, value);
> +	q->sched_props.job_timeout_ms = value;

set_job_timeout vfunc can now be deleted, right?

Everything else LGTM.

Matt

> +
> +	return 0;
>  }
>  
>  static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
> @@ -651,6 +684,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
>  		for_each_gt(gt, xe, id) {
>  			struct xe_exec_queue *new;
> +			u32 flags;
>  
>  			if (xe_gt_is_media_type(gt))
>  				continue;
> @@ -669,14 +703,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  			/* The migration vm doesn't hold rpm ref */
>  			xe_device_mem_access_get(xe);
>  
> +			flags = EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_VM |
> +				(id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
> +
>  			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
>  			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
> -						   args->width, hwe,
> -						   EXEC_QUEUE_FLAG_PERSISTENT |
> -						   EXEC_QUEUE_FLAG_VM |
> -						   (id ?
> -						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
> -						    0));
> +						   args->width, hwe, flags,
> +						   args->extensions);
>  
>  			xe_device_mem_access_put(xe); /* now held by engine */
>  
> @@ -724,7 +757,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  		q = xe_exec_queue_create(xe, vm, logical_mask,
>  					 args->width, hwe,
>  					 xe_vm_in_lr_mode(vm) ? 0 :
> -					 EXEC_QUEUE_FLAG_PERSISTENT);
> +					 EXEC_QUEUE_FLAG_PERSISTENT,
> +					 args->extensions);
>  		up_read(&vm->lock);
>  		xe_vm_put(vm);
>  		if (IS_ERR(q))
> @@ -740,12 +774,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  		}
>  	}
>  
> -	if (args->extensions) {
> -		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
> -		if (XE_IOCTL_DBG(xe, err))
> -			goto kill_exec_queue;
> -	}
> -
>  	q->persistent.xef = xef;
>  
>  	mutex_lock(&xef->exec_queue.lock);
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
> index d959cc4a1a82..02ce8d204622 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
> @@ -16,7 +16,8 @@ struct xe_file;
>  
>  struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
>  					   u32 logical_mask, u16 width,
> -					   struct xe_hw_engine *hw_engine, u32 flags);
> +					   struct xe_hw_engine *hw_engine, u32 flags,
> +					   u64 extensions);
>  struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
>  						 struct xe_vm *vm,
>  						 enum xe_engine_class class, u32 flags);
> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
> index a8a895cf4b44..5b84fc9ab8ad 100644
> --- a/drivers/gpu/drm/xe/xe_gsc.c
> +++ b/drivers/gpu/drm/xe/xe_gsc.c
> @@ -356,7 +356,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
>  	q = xe_exec_queue_create(xe, NULL,
>  				 BIT(hwe->logical_instance), 1, hwe,
>  				 EXEC_QUEUE_FLAG_KERNEL |
> -				 EXEC_QUEUE_FLAG_PERMANENT);
> +				 EXEC_QUEUE_FLAG_PERMANENT, 0);
>  	if (IS_ERR(q)) {
>  		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
>  		err = PTR_ERR(q);
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 3af2adec1295..0f2258dc4a00 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -235,7 +235,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  			return -ENOMEM;
>  
>  		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
> -					 hwe, EXEC_QUEUE_FLAG_KERNEL);
> +					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
>  		if (IS_ERR(q)) {
>  			err = PTR_ERR(q);
>  			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
> @@ -252,7 +252,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  		}
>  
>  		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
> -					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
> +					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
>  		if (IS_ERR(nop_q)) {
>  			err = PTR_ERR(nop_q);
>  			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 6567abcb3c6d..e1615717486a 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -344,7 +344,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
>  
>  		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
>  					    EXEC_QUEUE_FLAG_KERNEL |
> -					    EXEC_QUEUE_FLAG_PERMANENT);
> +					    EXEC_QUEUE_FLAG_PERMANENT, 0);
>  	} else {
>  		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
>  						  XE_ENGINE_CLASS_COPY,
> -- 
> 2.43.0
>