[PATCH] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore

Chris Wilson chris at chris-wilson.co.uk
Mon Jun 24 17:27:43 UTC 2019


Not all mediated environments yet support HW semaphores, so we should
avoid using one for our preempt-to-busy barrier and instead request that
the CS be paused and not advance on to the next execlist.

There's a natural advantage with the register writes being serialised
with the writes to the ELSP register that should allow the barrier to be
much more constrained. On the other hand, we can no longer track the
extents of the barrier and so must be a lot more lenient in accepting
early CS events.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  8 +++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 69 ++++++++------------
 2 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7e056114344e..1353df264236 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -166,6 +166,8 @@ struct intel_engine_execlists {
 	 */
 	bool no_priolist;
 
+	u16 ring_mode;
+
 	/**
 	 * @submit_reg: gen-specific execlist submission register
 	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
@@ -179,6 +181,12 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
+	/**
+	 * @ring_mode: the engine control register, used to freeze the command
+	 * streamer around preempt-to-busy
+	 */
+	u32 __iomem *ring_reg;
+
 #define EXECLIST_MAX_PORTS 2
 	/**
 	 * @active: the currently known context executing on HW
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 28685ba91a2c..e96a877d98c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state)
 {
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_PREEMPT_ADDR);
-}
+	u16 x;
 
-static inline void
-ring_set_paused(const struct intel_engine_cs *engine, int state)
-{
-	/*
-	 * We inspect HWS_PREEMPT with a semaphore inside
-	 * engine->emit_fini_breadcrumb. If the dword is true,
-	 * the ring is paused as the semaphore will busywait
-	 * until the dword is false.
-	 */
-	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
-	if (state)
-		wmb();
+	x = engine->execlists.ring_mode;
+	x &= ~(state >> 16);
+	x |= state;
+	if (x == engine->execlists.ring_mode)
+		return;
+
+	engine->execlists.ring_mode = x;
+	writel(state, engine->execlists.ring_reg);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * as we unwind (and until we resubmit) so that we do
 			 * not accidentally tell it to go backwards.
 			 */
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 
 			/*
 			 * Note that we have not stopped the GPU at this point,
@@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				  last->sched.attr.priority,
 				  execlists->queue_priority_hint);
 
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 			defer_active(engine);
 
 			/*
@@ -1240,12 +1234,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		  engine->name, execlists->queue_priority_hint,
 		  yesno(submit));
 
+	if (!inject_preempt_hang(execlists))
+		ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
+
 	if (submit) {
 		*port = execlists_schedule_in(last, port - execlists->pending);
 		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
-	} else {
-		ring_set_paused(engine, 0);
 	}
 }
 
@@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			struct i915_request * const *port = execlists->active;
 
@@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * ordered, that the breadcrumb write is
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
+			 *
+			 * The caveat here applies when we are injecting
+			 * a completion event via manipulation of the
+			 * RING_MI_MODE; this may occur before the
+			 * request is completed and appears as a
+			 * normal context-switch (0x14).
 			 */
-			GEM_BUG_ON(!i915_request_completed(rq));
+			GEM_BUG_ON(!i915_request_completed(rq) &&
+				   !execlists->pending[0]);
 			execlists_schedule_out(rq);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2133,7 +2132,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
-	ring_set_paused(engine, 0);
+	ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
@@ -2581,19 +2580,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
-static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
-{
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_EQ_SDD;
-	*cs++ = 0;
-	*cs++ = intel_hws_preempt_address(request->engine);
-	*cs++ = 0;
-
-	return cs;
-}
-
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2601,9 +2587,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->timeline->hwsp_offset,
 				  0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2625,9 +2609,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				    PIPE_CONTROL_CS_STALL,
 				    0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2807,6 +2789,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
+	execlists->ring_reg =
+		uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
+
 	if (INTEL_GEN(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
 	else
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list