[PATCH 5/5] drm/xe: Allow suspend / resume to be safely called multiple times
Matthew Brost
matthew.brost at intel.com
Fri Aug 9 19:19:29 UTC 2024
Switching modes between LR and dma-fence can result in multiple calls to
suspend / resume. Make these calls safe while still enforcing call
order.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_guc_submit.c | 40 ++++++++++++++++++++++++------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index a9af33ff8aa7..c5a824981362 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1388,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
#define SET_SCHED_PROPS 2
#define SUSPEND 3
#define RESUME 4
+#define OPCODE_MASK 0xf
+#define MSG_LOCKED (1 << 8)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
@@ -1432,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
struct xe_device *xe = guc_to_xe(guc);
struct xe_guc_exec_queue *ge;
long timeout;
- int err;
+ int err, i;
xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
@@ -1444,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
ge->q = q;
init_waitqueue_head(&ge->suspend_wait);
+ for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
+ INIT_LIST_HEAD(&ge->static_msgs[i].link);
+
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
@@ -1506,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg
xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
INIT_LIST_HEAD(&msg->link);
- msg->opcode = opcode;
+ msg->opcode = opcode & OPCODE_MASK;
msg->private_data = q;
trace_xe_sched_msg_add(msg);
- xe_sched_add_msg(&q->guc->sched, msg);
+ if (opcode & MSG_LOCKED)
+ xe_sched_add_msg_locked(&q->guc->sched, msg);
+ else
+ xe_sched_add_msg(&q->guc->sched, msg);
+}
+
+static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
+ struct xe_sched_msg *msg,
+ u32 opcode)
+{
+ if (!list_empty(&msg->link))
+ return false;
+
+ guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
+
+ return true;
}
#define STATIC_MSG_CLEANUP 0
@@ -1584,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
- if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
+ if (exec_queue_killed_or_banned_or_wedged(q))
return -EINVAL;
- q->guc->suspend_pending = true;
- guc_exec_queue_add_msg(q, msg, SUSPEND);
+ xe_sched_msg_lock(sched);
+ if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
+ q->guc->suspend_pending = true;
+ xe_sched_msg_unlock(sched);
return 0;
}
@@ -1624,13 +1647,16 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
static void guc_exec_queue_resume(struct xe_exec_queue *q)
{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
xe_assert(xe, !q->guc->suspend_pending);
- guc_exec_queue_add_msg(q, msg, RESUME);
+ xe_sched_msg_lock(sched);
+ guc_exec_queue_try_add_msg(q, msg, RESUME);
+ xe_sched_msg_unlock(sched);
}
static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
--
2.34.1
More information about the Intel-xe
mailing list