[Intel-gfx] [PATCH v2] drm/i915: Use trace_printk to provide a death rattle for GEM

Chris Wilson chris at chris-wilson.co.uk
Thu Nov 9 11:15:49 UTC 2017


Trying to enable printk debugging for GEM is fraught with the issue of
spam; interactions with HW are very frequent and often boring. However,
one instance where they are not so boring is just before a BUG; here
ftrace provides a facility to dump its ringbuffer on an oops. So for CI
let's enable trace_printk() to capture the last exchanges with HW as a
death rattle.

For example,
[   72.120722] ------------[ cut here ]------------
[   72.120748] kernel BUG at drivers/gpu/drm/i915/intel_lrc.c:905!
[   72.120756] invalid opcode: 0000 [#1] SMP
[   72.120762] Dumping ftrace buffer:
[   72.120768] ---------------------------------
...
[   72.200424] gem_conc-1064    0..s1 71949306us : intel_lrc_irq_handler: bcs0 in:  ctx=4.1, seqno=161
[   72.200469] gem_conc-1064    0..s1 71949312us : intel_lrc_irq_handler: bcs0 in:  ctx=6.2, seqno=160
[   72.200512] gem_conc-1066    1..s1 71949325us : intel_lrc_irq_handler: bcs0 csb[4/4]: status=0x00008002
[   72.200555] gem_conc-1066    1..s1 71949326us : intel_lrc_irq_handler: bcs0 out: ctx=6, seqno=160
[   72.200601] gem_conc-1066    1..s. 71956923us : intel_lrc_irq_handler: bcs0 csb[5/5]: status=0x00000014
[   72.200646] gem_conc-1066    1..s. 71956928us : intel_lrc_irq_handler: bcs0 out: ctx=6, seqno=160
[   72.200690] gem_conc-1066    1..s. 71956939us : intel_lrc_irq_handler: bcs0 in:  ctx=8.1, seqno=164
[   72.200736] gem_conc-1066    1..s. 71956940us : intel_lrc_irq_handler: bcs0 in:  ctx=4.2, seqno=162
[   72.200780] gem_conc-1066    1..s. 71956951us : intel_lrc_irq_handler: bcs0 csb[0/0]: status=0x00008002
[   72.200824] gem_conc-1066    1..s. 71956951us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=162
[   72.200867]   <idle>-0       1..s1 71967999us : intel_lrc_irq_handler: bcs0 csb[1/1]: status=0x00000014
[   72.200912]   <idle>-0       1..s1 71968001us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=162
[   72.200956]   <idle>-0       1.Ns1 71979385us : intel_lrc_irq_handler: bcs0 csb[2/2]: status=0x00000018
[   72.201001]   <idle>-0       1.Ns1 71979388us : intel_lrc_irq_handler: bcs0 out: ctx=8, seqno=164
[   72.201044] gem_conc-1063    3..s1 72086825us : intel_lrc_irq_handler: bcs0 in:  ctx=7.1, seqno=165
[   72.201088] gem_conc-1066    1..s. 72086918us : intel_lrc_irq_handler: bcs0 csb[3/3]: status=0x00000001
[   72.201132] gem_conc-1066    1..s. 72086932us : intel_lrc_irq_handler: bcs0 in:  ctx=7.2, seqno=166
[   72.201176] gem_conc-1066    1..s. 72086941us : intel_lrc_irq_handler: bcs0 csb[4/4]: status=0x00008002
[   72.201219] gem_conc-1066    1..s. 72086941us : intel_lrc_irq_handler: bcs0 out: ctx=7, seqno=166
[   72.201263] gem_conc-1066    1..s. 72103855us : intel_lrc_irq_handler: bcs0 csb[5/5]: status=0x00000018
[   72.201307] gem_conc-1066    1..s. 72103858us : intel_lrc_irq_handler: bcs0 out: ctx=7, seqno=166
[   72.201351] gem_conc-1064    0..s1 72116924us : intel_lrc_irq_handler: bcs0 in:  ctx=4.1, seqno=167
[   72.201394] gem_conc-1064    0..s1 72116981us : intel_lrc_irq_handler: bcs0 in:  ctx=4.2, seqno=168
[   72.201438] gem_conc-1066    1..s1 72117545us : intel_lrc_irq_handler: bcs0 csb[0/2]: status=0x00000012
[   72.201482] gem_conc-1066    1..s1 72117547us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=168
[   72.201526] gem_conc-1066    1..s1 72117548us : intel_lrc_irq_handler: bcs0 csb[1/2]: status=0x00008002
[   72.201570] gem_conc-1066    1..s1 72117548us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=168
[   72.201578] ---------------------------------

v2: Tweak the formatting to be more consistent between in/out.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug | 14 ++++++++++++++
 drivers/gpu/drm/i915/i915_gem.h    |  6 ++++++
 drivers/gpu/drm/i915/intel_lrc.c   | 14 ++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 19c77c6feb24..9e53edbc713b 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -28,6 +28,7 @@ config DRM_I915_DEBUG
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
+	select DRM_I915_TRACE_GEM
         default n
         help
           Choose this option to turn on extra driver debugging that may affect
@@ -49,6 +50,19 @@ config DRM_I915_DEBUG_GEM
 
           If in doubt, say "N".
 
+config DRM_I915_TRACE_GEM
+	bool "Insert extra ftrace output from the GEM internals"
+	select TRACING
+	default n
+	help
+	  Enable additional and verbose debugging output that will spam
+	  ordinary tests, but may be vital for post-mortem debugging when
+	  used with /proc/sys/kernel/ftrace_dump_on_oops
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
         bool "Enable additional driver debugging for fence objects"
         depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index ee54597465b6..d7905e4a1cf4 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -44,6 +44,12 @@
 #define GEM_DEBUG_BUG_ON(expr)
 #endif
 
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
+#define GEM_TRACE(...) trace_printk(__VA_ARGS__)
+#else
+#define GEM_TRACE(...)
+#endif
+
 #define I915_NUM_ENGINES 5
 
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4f56ff5cb908..fb2a46f5a9c0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -467,6 +467,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			port_set(&port[n], port_pack(rq, count));
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x\n",
+				  engine->name, n,
+				  rq->ctx->hw_id,
+				  count,
+				  rq->global_seqno);
 		} else {
 			GEM_BUG_ON(!n);
 			desc = 0;
@@ -521,6 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	ce->ring->tail &= (ce->ring->size - 1);
 	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
 
+	GEM_TRACE("\n");
 	for (n = execlists_num_ports(&engine->execlists); --n; )
 		elsp_write(0, elsp);
 
@@ -860,6 +867,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 			 */
 
 			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+			GEM_TRACE("%s csb[%d/%d]: status=0x%08x:0x%08x\n",
+				  engine->name, head, tail,
+				  status, buf[2*head + 1]);
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
@@ -887,6 +897,10 @@ static void intel_lrc_irq_handler(unsigned long data)
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
 			rq = port_unpack(port, &count);
+			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+				  engine->name,
+				  rq->ctx->hw_id, count,
+				  rq->global_seqno);
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-- 
2.15.0



More information about the Intel-gfx mailing list