[Intel-xe] [PATCH] drm/xe: Add child contexts to the GuC context lookup

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Thu Aug 31 00:29:31 UTC 2023


The CAT_ERROR message from the GuC provides the guc id of the context
that caused the problem, which can be a child context. We therefore
need to be able to match that id to the exec_queue that owns it, which
we do by adding child context to the context lookup.

While at it, fix the error path of the guc id allocation code to
correctly free the ids allocated for parallel queues.

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/590
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 44 ++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index c6a9e17d6889..f912b6fd51ad 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -245,10 +245,28 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	return 0;
 }
 
+static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
+{
+	int i;
+
+	lockdep_assert_held(&guc->submission_state.lock);
+
+	for (i = 0; i < xa_count; ++i)
+		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
+
+	if (xe_exec_queue_is_parallel(q))
+		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+				      q->guc->id - GUC_ID_START_MLRC,
+				      order_base_2(q->width));
+	else
+		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+}
+
 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	int ret;
 	void *ptr;
+	int i;
 
 	/*
 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
@@ -275,30 +293,27 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 	if (xe_exec_queue_is_parallel(q))
 		q->guc->id += GUC_ID_START_MLRC;
 
-	ptr = xa_store(&guc->submission_state.exec_queue_lookup,
-		       q->guc->id, q, GFP_NOWAIT);
-	if (IS_ERR(ptr)) {
-		ret = PTR_ERR(ptr);
-		goto err_release;
+	for (i = 0; i < q->width; ++i) {
+		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+			       q->guc->id + i, q, GFP_NOWAIT);
+		if (IS_ERR(ptr)) {
+			ret = PTR_ERR(ptr);
+			goto err_release;
+		}
 	}
 
 	return 0;
 
 err_release:
-	ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+	__release_guc_id(guc, q, i);
+
 	return ret;
 }
 
 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	mutex_lock(&guc->submission_state.lock);
-	xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id);
-	if (xe_exec_queue_is_parallel(q))
-		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
-				      q->guc->id - GUC_ID_START_MLRC,
-				      order_base_2(q->width));
-	else
-		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+	__release_guc_id(guc, q, q->width);
 	mutex_unlock(&guc->submission_state.lock);
 }
 
@@ -1473,7 +1488,8 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
 		return NULL;
 	}
 
-	XE_WARN_ON(q->guc->id != guc_id);
+	XE_WARN_ON(guc_id < q->guc->id);
+	XE_WARN_ON(guc_id > (q->guc->id + q->width - 1));
 
 	return q;
 }
-- 
2.41.0



More information about the Intel-xe mailing list