[PATCH v2 2/3] drm/xe/guc: Track pending-enable source in submission state
Matthew Brost
matthew.brost at intel.com
Fri Aug 15 22:22:14 UTC 2025
Add explicit tracking in the GuC submission state to record the source
of a pending enable (TDR vs. resume path vs. submission). Disambiguating
the origin lets the GuC submission state machine apply the correct
recovery/replay behavior.
This helps VF resume: when the device comes back, the state machine knows
whether the pending enable stems from timeout recovery, from a resume
sequence, or submission and can gate sequencing and fixups accordingly.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_guc_submit.c | 36 ++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 32601bc8cf9a..75efbee5bc9d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -68,6 +68,8 @@ exec_queue_to_guc(struct xe_exec_queue *q)
#define EXEC_QUEUE_STATE_BANNED (1 << 9)
#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10)
#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
+#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12)
+#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13)
static bool exec_queue_registered(struct xe_exec_queue *q)
{
@@ -219,6 +221,36 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
}
+static bool __maybe_unused exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
+}
+
+static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static bool __maybe_unused exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT;
+}
+
+static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
{
return (atomic_read(&q->guc->state) &
@@ -1344,6 +1376,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
return DRM_GPU_SCHED_STAT_RESET;
sched_enable:
+ set_exec_queue_pending_tdr_exit(q);
enable_scheduling(q);
rearm:
/*
@@ -1494,6 +1527,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
clear_exec_queue_suspended(q);
if (!exec_queue_enabled(q)) {
q->guc->resume_time = RESUME_PENDING;
+ set_exec_queue_pending_resume(q);
enable_scheduling(q);
}
} else {
@@ -2083,6 +2117,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
q->guc->resume_time = ktime_get();
+ clear_exec_queue_pending_resume(q);
+ clear_exec_queue_pending_tdr_exit(q);
clear_exec_queue_pending_enable(q);
smp_wmb();
wake_up_all(&guc->ct.wq);
--
2.34.1
More information about the Intel-xe
mailing list