[Intel-gfx] [PATCH] drm/i915: Trace client-wakeup latency

Chris Wilson chris at chris-wilson.co.uk
Fri Aug 3 14:41:32 UTC 2018


If we record the time we send the interrupt, and then the time we wake
the client up, we can measure that wake up latency. This can be very
useful for detecting issues and in particular whether the CPU power
management is accidentally delaying clients from their work.

To use:
	perf record -e i915:i915_wait_irq_latency
	perf script
However, using perf/tracepoints seems to be preventing CPU power
management; completely obliterating the effect we are trying to
investigate.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug      |  6 ++++
 drivers/gpu/drm/i915/i915_request.c     | 14 ++++++--
 drivers/gpu/drm/i915/i915_trace.h       | 45 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 26 ++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++++++
 5 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 459f8f88a34c..bc8faa6439ad 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -77,6 +77,12 @@ config DRM_I915_TRACE_GEM
 
 	  If in doubt, say "N".
 
+config DRM_I915_TRACE_IRQ_LATENCY
+	bool "Measure interrupt-to-client-wakeup"
+	depends on DRM_I915_DEBUG_GEM
+	default n
+	select FTRACE
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
         bool "Enable additional driver debugging for fence objects"
         depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f3ff8dbe363d..1dfda71ed9e3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1401,12 +1401,12 @@ long i915_request_wait(struct i915_request *rq,
 	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
-			break;
+			goto remove;
 		}
 
 		if (!timeout) {
 			timeout = -ETIME;
-			break;
+			goto remove;
 		}
 
 		if (!qos &&
@@ -1457,6 +1457,16 @@ long i915_request_wait(struct i915_request *rq,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+		struct drm_i915_private *dev_priv = rq->i915;
+		u32 now = I915_READ(RING_TIMESTAMP(rq->engine->mmio_base));
+
+		trace_i915_wait_irq_latency(rq,
+					    now - intel_engine_get_timestamp(rq->engine),
+					    rq->global_seqno == intel_engine_get_seqno(rq->engine));
+	}
+
+remove:
 	intel_engine_remove_wait(rq->engine, &wait);
 complete:
 	__set_current_state(TASK_RUNNING);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index b50c6b829715..acd089e701cf 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -849,6 +849,51 @@ TRACE_EVENT(i915_request_wait_begin,
 		      __entry->flags)
 );
 
+#if defined(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)
+TRACE_EVENT(i915_wait_irq_latency,
+	    TP_PROTO(struct i915_request *rq, u32 delay, bool stable),
+	    TP_ARGS(rq, delay, stable),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, hw_id)
+			     __field(u64, ctx)
+			     __field(u16, class)
+			     __field(u16, instance)
+			     __field(u32, seqno)
+			     __field(u32, global)
+			     __field(u32, delay)
+			     __field(u32, time)
+			     __field(u32, stable)
+			    ),
+
+	    TP_fast_assign(
+			   __entry->dev = rq->i915->drm.primary->index;
+			   __entry->hw_id = rq->gem_context->hw_id;
+			   __entry->class = rq->engine->uabi_class;
+			   __entry->instance = rq->engine->instance;
+			   __entry->ctx = rq->fence.context;
+			   __entry->seqno = rq->fence.seqno;
+			   __entry->global = rq->global_seqno;
+			   __entry->delay = delay;
+			   __entry->stable = stable;
+			   ),
+
+	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, delay=%u%s",
+		      __entry->dev, __entry->class, __entry->instance,
+		      __entry->hw_id, __entry->ctx,
+		      __entry->seqno, __entry->global,
+		      __entry->delay, __entry->stable ? "" : "*")
+);
+#else
+#if !defined(TRACE_HEADER_MULTI_READ)
+static inline void
+trace_i915_wait_irq_latency(struct i915_request *rq, u32 delta, bool stable)
+{
+}
+#endif
+#endif
+
 DEFINE_EVENT(i915_request, i915_request_wait_end,
 	    TP_PROTO(struct i915_request *rq),
 	    TP_ARGS(rq)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0be180c6294..0b31d2bd205b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2213,14 +2213,25 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 
 	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
+
+	if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+		*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(request->engine->mmio_base));
+		*cs++ = intel_hws_timestamp_address(request->engine);
+		*cs++ = MI_NOOP;
+	}
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
 	gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz =
+	6 + (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY) ? 4 : 0) +
+	WA_TAIL_DWORDS;
 
 static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
@@ -2229,14 +2240,25 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 
 	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
 				      intel_hws_seqno_address(request->engine));
+
+	if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+		*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(request->engine->mmio_base));
+		*cs++ = intel_hws_timestamp_address(request->engine);
+		*cs++ = MI_NOOP;
+	}
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
 	gen8_emit_wa_tail(request, cs);
 }
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_rcs_sz =
+	8 + (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY) ? 4 : 0) +
+	WA_TAIL_DWORDS;
 
 static int gen8_init_rcs_context(struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 57f3787ed6ec..e5c8ddf63441 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -786,6 +786,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_PREEMPT_INDEX	0x32
 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_TIMESTAMP_INDEX	0x34
+#define I915_GEM_HWS_TIMESTAMP_ADDR (I915_GEM_HWS_TIMESTAMP_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_SCRATCH_INDEX	0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
@@ -916,6 +918,11 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
 	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 }
 
+static inline u32 intel_engine_get_timestamp(struct intel_engine_cs *engine)
+{
+	return intel_read_status_page(engine, I915_GEM_HWS_TIMESTAMP_INDEX);
+}
+
 static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 {
 	/* We are only peeking at the tail of the submit queue (and not the
@@ -950,6 +957,11 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
 }
 
+static inline u32 intel_hws_timestamp_address(struct intel_engine_cs *engine)
+{
+	return engine->status_page.ggtt_offset + I915_GEM_HWS_TIMESTAMP_ADDR;
+}
+
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-- 
2.18.0



More information about the Intel-gfx mailing list