[PATCH] drm/xe: Kill exec queues synchronously

Matthew Brost matthew.brost at intel.com
Tue Jul 23 15:14:54 UTC 2024


Upon kill of exec queue ensure said exec queue is not running on the GPU
for overall safety by waiting for kill to complete. This includes
possibly waiting for a GT reset to complete.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  6 +++++-
 drivers/gpu/drm/xe/xe_guc_submit.c       | 13 +++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 1408b02eea53..147e9407ce9b 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -152,7 +152,11 @@ struct xe_exec_queue {
 struct xe_exec_queue_ops {
 	/** @init: Initialize exec queue for submission backend */
 	int (*init)(struct xe_exec_queue *q);
-	/** @kill: Kill inflight submissions for backend */
+	/**
+	 * @kill: Kill inflight submissions for backend, wait synchronously for
+	 * kill. For safety, should never be called while holding any locks as a
+	 * device reset may be last resort for kill.
+	 */
 	void (*kill)(struct xe_exec_queue *q);
 	/** @fini: Fini exec queue for submission backend */
 	void (*fini)(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index da2ead86b9ae..df03fdb83dbd 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1496,10 +1496,23 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 
 static void guc_exec_queue_kill(struct xe_exec_queue *q)
 {
+	struct xe_guc *guc = exec_queue_to_guc(q);
+	struct xe_device *xe = guc_to_xe(guc);
+	int ret;
+
 	trace_xe_exec_queue_kill(q);
 	set_exec_queue_killed(q);
 	__suspend_fence_signal(q);
 	xe_guc_exec_queue_trigger_cleanup(q);
+
+	ret = wait_event_timeout(guc->ct.wq,
+				 !exec_queue_pending_disable(q) ||
+				 guc_read_stopped(guc), HZ * 5);
+	if (!ret) {
+		drm_warn(&xe->drm, "Schedule disable failed to respond upon kill");
+		xe_gt_reset_async(q->gt);
+	}
+	xe_guc_submit_reset_wait(guc);
 }
 
 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
-- 
2.34.1



More information about the Intel-xe mailing list