[PATCH v7 03/13] drm/xe/hw_engine_group: Register hw engine group's exec queues
Matthew Brost
matthew.brost at intel.com
Thu Aug 8 03:22:45 UTC 2024
On Wed, Aug 07, 2024 at 06:23:32PM +0200, Francois Dugast wrote:
> Add helpers to safely add and delete the exec queues attached to a hw
> engine group, and make use them at the time of creation and destruction of
> the exec queues. Keeping track of them is required to control the
> execution mode of the hw engine group.
>
> v2: Improve error handling and robustness, suspend exec queues created in
> fault mode if group in dma-fence mode, init queue link (Matt Brost)
> v3: Delete queue from hw engine group when it is destroyed by the user,
> also clean up at the time of closing the file in case the user did
> not destroy the queue
> v4: Use correct list when checking if empty, do not add the queue if VM
> is in xe_vm_in_preempt_fence_mode (Matt Brost)
>
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
> drivers/gpu/drm/xe/xe_device.c | 3 ++
> drivers/gpu/drm/xe/xe_exec_queue.c | 10 +++++
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 +
> drivers/gpu/drm/xe/xe_hw_engine_group.c | 55 ++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++
> 5 files changed, 74 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 1aba6f9eaa19..447e9acbb570 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -37,6 +37,7 @@
> #include "xe_gt_printk.h"
> #include "xe_gt_sriov_vf.h"
> #include "xe_guc.h"
> +#include "xe_hw_engine_group.h"
> #include "xe_hwmon.h"
> #include "xe_irq.h"
> #include "xe_memirq.h"
> @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
> * vm->lock taken during xe_exec_queue_kill().
> */
> xa_for_each(&xef->exec_queue.xa, idx, q) {
> + if (q->vm && q->hwe->hw_engine_group)
Incongruent with below.
> + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
> xe_exec_queue_kill(q);
> xe_exec_queue_put(q);
> }
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 956dc15b432a..77edb20aa21c 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -14,6 +14,7 @@
> #include "xe_device.h"
> #include "xe_gt.h"
> #include "xe_hw_engine_class_sysfs.h"
> +#include "xe_hw_engine_group.h"
> #include "xe_hw_fence.h"
> #include "xe_lrc.h"
> #include "xe_macros.h"
> @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
> q->ops = gt->exec_queue_ops;
> INIT_LIST_HEAD(&q->lr.link);
> INIT_LIST_HEAD(&q->multi_gt_link);
> + INIT_LIST_HEAD(&q->hw_engine_group_link);
>
> q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
> q->sched_props.preempt_timeout_us =
> @@ -190,6 +192,7 @@ void xe_exec_queue_destroy(struct kref *ref)
> struct xe_exec_queue *eq, *next;
>
> xe_exec_queue_last_fence_put_unlocked(q);
> +
Nit: unrelated newline.
> if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
> list_for_each_entry_safe(eq, next, &q->multi_gt_list,
> multi_gt_link)
> @@ -615,6 +618,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> if (XE_IOCTL_DBG(xe, err))
> goto put_exec_queue;
> }
> +
> + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
> + if (err)
> + goto put_exec_queue;
> }
>
> mutex_lock(&xef->exec_queue.lock);
> @@ -797,6 +804,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
> if (XE_IOCTL_DBG(xe, !q))
> return -ENOENT;
>
> + if (q->vm)
Here is where it is incongruent.
User exec queues must have a VM too, and currently must have a
q->hwe->hw_engine_group as the GSC is not exposed to the user.
I guess if you really want be safe...
if (q->vm && q->hwe->hw_engine_group)
Or just drop it and let the asserts handle misuse.
> + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
> +
> xe_exec_queue_kill(q);
>
> trace_xe_exec_queue_close(q);
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 1408b02eea53..315f874426bd 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -142,6 +142,8 @@ struct xe_exec_queue {
> * Protected by @vm's resv. Unused if @vm == NULL.
> */
> u64 tlb_flush_seqno;
> + /** @hw_engine_group_link: link into exec queues in the same hw engine group */
> + struct list_head hw_engine_group_link;
> /** @lrc: logical ring context for this exec queue */
> struct xe_lrc *lrc[];
> };
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 1d109c08c7a6..52e1dc78518e 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -5,9 +5,12 @@
>
> #include <drm/drm_managed.h>
>
> +#include "xe_assert.h"
> #include "xe_device.h"
> +#include "xe_exec_queue.h"
> #include "xe_gt.h"
> #include "xe_hw_engine_group.h"
> +#include "xe_vm.h"
>
> static void
> hw_engine_group_free(struct drm_device *drm, void *arg)
> @@ -100,3 +103,55 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>
> return err;
> }
> +
> +/**
> + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
> + * @group: The hw engine group
> + * @q: The exec_queue
> + *
> + * Return: 0 on success,
> + * -EINTR if the lock could not be acquired
> + */
> +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
> +{
> + int err;
> + struct xe_device *xe = gt_to_xe(q->gt);
> +
> + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
> + xe_assert(xe, q->vm);
> +
> + if (xe_vm_in_preempt_fence_mode(q->vm))
> + return 0;
> +
> + err = down_write_killable(&group->mode_sem);
> + if (err)
> + return err;
> +
> + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
> + q->ops->suspend(q);
> + q->ops->suspend_wait(q);
suspend_wait has a return value, best to check it and unwind on failure
even though it really shouldn't be possible to failure here. Best to not
assume it can't fail.
> + queue_work(group->resume_wq, &group->resume_work);
> + }
> +
> + list_add(&q->hw_engine_group_link, &group->exec_queue_list);
> + up_write(&group->mode_sem);
> +
> + return 0;
> +}
> +
> +/**
> + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
> + * @group: The hw engine group
> + * @q: The exec_queue
> + */
> +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
> +{
Nit:
xe_assert(gt_to_xe(q->gt), group);
Matt
> + xe_assert(gt_to_xe(q->gt), q->vm);
> +
> + down_write(&group->mode_sem);
> +
> + if (!list_empty(&q->hw_engine_group_link))
> + list_del(&q->hw_engine_group_link);
> +
> + up_write(&group->mode_sem);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> index c2648f87f7ef..857a83787504 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> @@ -9,8 +9,12 @@
> #include "xe_hw_engine_group_types.h"
>
> struct drm_device;
> +struct xe_exec_queue;
> struct xe_gt;
>
> int xe_hw_engine_setup_groups(struct xe_gt *gt);
>
> +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
> +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q);
> +
> #endif
> --
> 2.43.0
>
More information about the Intel-xe
mailing list