[PATCH 5/5] drm/xe: Allow suspend / resume to be safely called multiple times
Cavitt, Jonathan
jonathan.cavitt at intel.com
Fri Aug 9 21:02:06 UTC 2024
-----Original Message-----
From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew Brost
Sent: Friday, August 9, 2024 12:19 PM
To: intel-xe at lists.freedesktop.org
Cc: Dugast, Francois <francois.dugast at intel.com>
Subject: [PATCH 5/5] drm/xe: Allow suspend / resume to be safely called multiple times
>
> Switching modes between LR and dma-fence can result in multiple calls to
> suspend / resume. Make these calls safe while still enforcing call
> order.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_submit.c | 40 ++++++++++++++++++++++++------
> 1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index a9af33ff8aa7..c5a824981362 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -1388,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
> #define SET_SCHED_PROPS 2
> #define SUSPEND 3
> #define RESUME 4
> +#define OPCODE_MASK 0xf
> +#define MSG_LOCKED (1 << 8)
>
> static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
> {
> @@ -1432,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
> struct xe_device *xe = guc_to_xe(guc);
> struct xe_guc_exec_queue *ge;
> long timeout;
> - int err;
> + int err, i;
>
> xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
>
> @@ -1444,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
> ge->q = q;
> init_waitqueue_head(&ge->suspend_wait);
>
> + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
> + INIT_LIST_HEAD(&ge->static_msgs[i].link);
> +
> timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
> msecs_to_jiffies(q->sched_props.job_timeout_ms);
> err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
> @@ -1506,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg
> xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
>
> INIT_LIST_HEAD(&msg->link);
> - msg->opcode = opcode;
> + msg->opcode = opcode & OPCODE_MASK;
> msg->private_data = q;
>
> trace_xe_sched_msg_add(msg);
> - xe_sched_add_msg(&q->guc->sched, msg);
> + if (opcode & MSG_LOCKED)
> + xe_sched_add_msg_locked(&q->guc->sched, msg);
> + else
> + xe_sched_add_msg(&q->guc->sched, msg);
> +}
It seems like we only and always call this guc_exec_queue_add_msg function with
the MSG_LOCKED opcode set in the suspend and resume cases. On paper, that
means we could probably just check if the opcode >= 2 and cover all the current
cases. However, using the new MSG_LOCKED flag in the opcode is definitely more
scalable, so this is less a suggestion and more an observation.
No changes needed, as far as I can tell.
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
-Jonathan Cavitt
> +
> +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
> + struct xe_sched_msg *msg,
> + u32 opcode)
> +{
> + if (!list_empty(&msg->link))
> + return false;
> +
> + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
> +
> + return true;
> }
>
> #define STATIC_MSG_CLEANUP 0
> @@ -1584,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
>
> static int guc_exec_queue_suspend(struct xe_exec_queue *q)
> {
> + struct xe_gpu_scheduler *sched = &q->guc->sched;
> struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
>
> - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
> + if (exec_queue_killed_or_banned_or_wedged(q))
> return -EINVAL;
>
> - q->guc->suspend_pending = true;
> - guc_exec_queue_add_msg(q, msg, SUSPEND);
> + xe_sched_msg_lock(sched);
> + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
> + q->guc->suspend_pending = true;
> + xe_sched_msg_unlock(sched);
>
> return 0;
> }
> @@ -1624,13 +1647,16 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
>
> static void guc_exec_queue_resume(struct xe_exec_queue *q)
> {
> + struct xe_gpu_scheduler *sched = &q->guc->sched;
> struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
> struct xe_guc *guc = exec_queue_to_guc(q);
> struct xe_device *xe = guc_to_xe(guc);
>
> xe_assert(xe, !q->guc->suspend_pending);
>
> - guc_exec_queue_add_msg(q, msg, RESUME);
> + xe_sched_msg_lock(sched);
> + guc_exec_queue_try_add_msg(q, msg, RESUME);
> + xe_sched_msg_unlock(sched);
> }
>
> static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
> --
> 2.34.1
>
>
More information about the Intel-xe
mailing list