[Intel-gfx] [PATCH 2/2] drm/i915/error: capture errored context based on request context-id

Arun Siluvery arun.siluvery at linux.intel.com
Thu Aug 11 16:09:01 UTC 2016


From: Dave Gordon <david.s.gordon at intel.com>

Context capture hasn't worked for a while now, since the introduction of
execlists because the function that records active context is using CCID
register but this register contents are not valid in Execlist mode; this
patch makes it work again by using a different way of identifying the
context of interest in execlist mode.

For: VIZ-2021
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 46 +++++++++++++++++++++++++----------
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7971c76..94b9314 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -546,6 +546,8 @@ struct drm_i915_error_state {
 		u32 rc_psmi; /* sleep state */
 		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
 
+		u64 ctx_desc;
+
 		struct drm_i915_error_object {
 			int page_count;
 			u64 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3209f6a..fa365ee 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -386,7 +386,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
+	if (!i915.enable_execlists)
+		err_printf(m, "CCID: 0x%08x\n", error->ccid);
 	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++)
@@ -526,9 +527,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 
 		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
+			err_printf(m, "%s --- HW Context = 0x%08x, %d pages\n",
 				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+				   lower_32_bits(obj->gtt_offset),
+				   obj->page_count);
 			print_error_obj(m, obj);
 		}
 	}
@@ -1069,16 +1071,29 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_object *obj;
+	bool elsp_mode = i915.enable_execlists;
 
 	/* Currently render ring is the only HW context user */
-	if (engine->id != RCS || !error->ccid)
+	if (engine->id != RCS)
+		return;
+
+	/* contents of CCID are not valid in execlist mode of scheduling */
+	if (!elsp_mode && !error->ccid)
 		return;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		u64 base;
+
 		if (!i915_gem_obj_ggtt_bound(obj))
 			continue;
 
-		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
+		base = i915_gem_obj_ggtt_offset(obj);
+
+		if (elsp_mode)
+			base += LRC_PPHWSP_PN * PAGE_SIZE;
+
+		if (((error->ccid & PAGE_MASK) == base) ||
+		    (((base ^ ee->ctx_desc) & 0x00000000FFFFF000ULL) == 0)) {
 			ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
 			break;
 		}
@@ -1090,6 +1105,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct drm_i915_gem_request *request;
+	struct intel_engine_cs *engine;
 	int i, count;
 
 	if (dev_priv->semaphore_obj) {
@@ -1098,17 +1114,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 						      dev_priv->semaphore_obj);
 	}
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_engine_cs *engine = &dev_priv->engine[i];
+	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) {
 		struct drm_i915_error_engine *ee = &error->engine[i];
+		u64 ctx_desc;
 
 		ee->pid = -1;
-		ee->engine_id = -1;
-
-		if (!intel_engine_initialized(engine))
-			continue;
-
-		ee->engine_id = i;
+		ee->engine_id = engine->id;
 
 		error_record_engine_registers(error, engine, ee);
 		error_record_engine_waiters(engine, ee);
@@ -1150,7 +1161,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			error->simulated |=
 				request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
 
+			ctx_desc = 0;
+			if (i915.enable_execlists) {
+				struct i915_gem_context *ctx;
+
+				ctx = request ? request->ctx : dev_priv->kernel_context;
+				ctx_desc = intel_lr_context_descriptor(ctx, engine);
+			}
+
 			ring = request->ring;
+			ee->ctx_desc = ctx_desc;
 			ee->cpu_ring_head = ring->head;
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =
-- 
1.9.1



More information about the Intel-gfx mailing list