[PATCH v7 03/13] drm/xe/hw_engine_group: Register hw engine group's exec queues

Matthew Brost matthew.brost at intel.com
Thu Aug 8 03:22:45 UTC 2024


On Wed, Aug 07, 2024 at 06:23:32PM +0200, Francois Dugast wrote:
> Add helpers to safely add and delete the exec queues attached to a hw
> engine group, and make use them at the time of creation and destruction of
> the exec queues. Keeping track of them is required to control the
> execution mode of the hw engine group.
> 
> v2: Improve error handling and robustness, suspend exec queues created in
>     fault mode if group in dma-fence mode, init queue link (Matt Brost)
> v3: Delete queue from hw engine group when it is destroyed by the user,
>     also clean up at the time of closing the file in case the user did
>     not destroy the queue
> v4: Use correct list when checking if empty, do not add the queue if VM
>     is in xe_vm_in_preempt_fence_mode (Matt Brost)
> 
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.c           |  3 ++
>  drivers/gpu/drm/xe/xe_exec_queue.c       | 10 +++++
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |  2 +
>  drivers/gpu/drm/xe/xe_hw_engine_group.c  | 55 ++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_hw_engine_group.h  |  4 ++
>  5 files changed, 74 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 1aba6f9eaa19..447e9acbb570 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -37,6 +37,7 @@
>  #include "xe_gt_printk.h"
>  #include "xe_gt_sriov_vf.h"
>  #include "xe_guc.h"
> +#include "xe_hw_engine_group.h"
>  #include "xe_hwmon.h"
>  #include "xe_irq.h"
>  #include "xe_memirq.h"
> @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
>  	 * vm->lock taken during xe_exec_queue_kill().
>  	 */
>  	xa_for_each(&xef->exec_queue.xa, idx, q) {
> +		if (q->vm && q->hwe->hw_engine_group)

Incongruent with below.

> +			xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
>  		xe_exec_queue_kill(q);
>  		xe_exec_queue_put(q);
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 956dc15b432a..77edb20aa21c 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -14,6 +14,7 @@
>  #include "xe_device.h"
>  #include "xe_gt.h"
>  #include "xe_hw_engine_class_sysfs.h"
> +#include "xe_hw_engine_group.h"
>  #include "xe_hw_fence.h"
>  #include "xe_lrc.h"
>  #include "xe_macros.h"
> @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>  	q->ops = gt->exec_queue_ops;
>  	INIT_LIST_HEAD(&q->lr.link);
>  	INIT_LIST_HEAD(&q->multi_gt_link);
> +	INIT_LIST_HEAD(&q->hw_engine_group_link);
>  
>  	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
>  	q->sched_props.preempt_timeout_us =
> @@ -190,6 +192,7 @@ void xe_exec_queue_destroy(struct kref *ref)
>  	struct xe_exec_queue *eq, *next;
>  
>  	xe_exec_queue_last_fence_put_unlocked(q);
> +

Nit: unrelated newline.

>  	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
>  		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
>  					 multi_gt_link)
> @@ -615,6 +618,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>  			if (XE_IOCTL_DBG(xe, err))
>  				goto put_exec_queue;
>  		}
> +
> +		err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
> +		if (err)
> +			goto put_exec_queue;
>  	}
>  
>  	mutex_lock(&xef->exec_queue.lock);
> @@ -797,6 +804,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
>  	if (XE_IOCTL_DBG(xe, !q))
>  		return -ENOENT;
>  
> +	if (q->vm)

Here is where it is incongruent.

User exec queues must have a VM too, and currently must have a
q->hwe->hw_engine_group as the GSC is not exposed to the user.

I guess if you really want be safe...

if (q->vm && q->hwe->hw_engine_group)

Or just drop it and let the asserts handle misuse.

> +		xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
> +
>  	xe_exec_queue_kill(q);
>  
>  	trace_xe_exec_queue_close(q);
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 1408b02eea53..315f874426bd 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -142,6 +142,8 @@ struct xe_exec_queue {
>  	 * Protected by @vm's resv. Unused if @vm == NULL.
>  	 */
>  	u64 tlb_flush_seqno;
> +	/** @hw_engine_group_link: link into exec queues in the same hw engine group */
> +	struct list_head hw_engine_group_link;
>  	/** @lrc: logical ring context for this exec queue */
>  	struct xe_lrc *lrc[];
>  };
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 1d109c08c7a6..52e1dc78518e 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -5,9 +5,12 @@
>  
>  #include <drm/drm_managed.h>
>  
> +#include "xe_assert.h"
>  #include "xe_device.h"
> +#include "xe_exec_queue.h"
>  #include "xe_gt.h"
>  #include "xe_hw_engine_group.h"
> +#include "xe_vm.h"
>  
>  static void
>  hw_engine_group_free(struct drm_device *drm, void *arg)
> @@ -100,3 +103,55 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  
>  	return err;
>  }
> +
> +/**
> + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
> + * @group: The hw engine group
> + * @q: The exec_queue
> + *
> + * Return: 0 on success,
> + *	    -EINTR if the lock could not be acquired
> + */
> +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
> +{
> +	int err;
> +	struct xe_device *xe = gt_to_xe(q->gt);
> +
> +	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
> +	xe_assert(xe, q->vm);
> +
> +	if (xe_vm_in_preempt_fence_mode(q->vm))
> +		return 0;
> +
> +	err = down_write_killable(&group->mode_sem);
> +	if (err)
> +		return err;
> +
> +	if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
> +		q->ops->suspend(q);
> +		q->ops->suspend_wait(q);

suspend_wait has a return value, best to check it and unwind on failure
even though it really shouldn't be possible to failure here. Best to not
assume it can't fail.

> +		queue_work(group->resume_wq, &group->resume_work);
> +	}
> +
> +	list_add(&q->hw_engine_group_link, &group->exec_queue_list);
> +	up_write(&group->mode_sem);
> +
> +	return 0;
> +}
> +
> +/**
> + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
> + * @group: The hw engine group
> + * @q: The exec_queue
> + */
> +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
> +{

Nit:
xe_assert(gt_to_xe(q->gt), group);

Matt

> +	xe_assert(gt_to_xe(q->gt), q->vm);
> +
> +	down_write(&group->mode_sem);
> +
> +	if (!list_empty(&q->hw_engine_group_link))
> +		list_del(&q->hw_engine_group_link);
> +
> +	up_write(&group->mode_sem);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> index c2648f87f7ef..857a83787504 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> @@ -9,8 +9,12 @@
>  #include "xe_hw_engine_group_types.h"
>  
>  struct drm_device;
> +struct xe_exec_queue;
>  struct xe_gt;
>  
>  int xe_hw_engine_setup_groups(struct xe_gt *gt);
>  
> +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
> +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
> +
>  #endif
> -- 
> 2.43.0
> 


More information about the Intel-xe mailing list