[PATCH 2/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore

Chris Wilson chris at chris-wilson.co.uk
Fri Jun 21 08:27:11 UTC 2019


Not all mediated environments yet support HW semaphores, so we should
avoid using one for our preempt-to-busy barrier and instead request that
the CS be paused and not advance on to the next execlist.

There's a natural advantage with the register writes being serialised
with the writes to the ELSP register that should allow the barrier to be
much more constrained. On the other hand, we can no longer track the
extents of the barrier and so must be a lot more lenient in accepting
early CS events.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  6 +++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 56 +++++++-------------
 2 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9940bad37812..09a60ccadfe7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -178,6 +178,12 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
+	/**
+	 * @ring_mode: the engine control register, used to freeze the command
+	 * streamer around preempt-to-busy
+	 */
+	u32 __iomem *ring_mode;
+
 #define EXECLIST_MAX_PORTS 2
 	/**
 	 * @active: the currently known context executing on HW
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index faa9a2e4e40b..710c735d8b8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -223,23 +223,10 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_PREEMPT_ADDR);
-}
-
 static inline void
 ring_set_paused(const struct intel_engine_cs *engine, int state)
 {
-	/*
-	 * We inspect HWS_PREEMPT with a semaphore inside
-	 * engine->emit_fini_breadcrumb. If the dword is true,
-	 * the ring is paused as the semaphore will busywait
-	 * until the dword is false.
-	 */
-	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
-	wmb();
+	writel(state, engine->execlists.ring_mode);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -701,6 +688,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+	if (!inject_preempt_hang(execlists))
+		ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -979,7 +969,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * as we unwind (and until we resubmit) so that we do
 			 * not accidentally tell it to go backwards.
 			 */
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 
 			/*
 			 * Note that we have not stopped the GPU at this point,
@@ -1008,7 +998,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				  last->sched.attr.priority,
 				  execlists->queue_priority_hint);
 
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 			defer_active(engine);
 
 			/*
@@ -1351,9 +1341,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			struct i915_request * const *port = execlists->active;
 
@@ -1374,8 +1361,15 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * ordered, that the breadcrumb write is
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
+			 *
+			 * The caveat here applies when we are injecting
+			 * a completion event via manipulation of the
+			 * RING_MI_MODE; this may occur before the
+			 * request is completed and appears as a
+			 * normal context-switch (0x14).
 			 */
-			GEM_BUG_ON(!i915_request_completed(rq));
+			GEM_BUG_ON(!i915_request_completed(rq) &&
+				   !execlists->pending[0]);
 			execlists_schedule_out(rq);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2133,7 +2127,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
-	ring_set_paused(engine, 0);
+	ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
@@ -2581,19 +2575,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
-static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
-{
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_EQ_SDD;
-	*cs++ = 0;
-	*cs++ = intel_hws_preempt_address(request->engine);
-	*cs++ = 0;
-
-	return cs;
-}
-
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2601,9 +2582,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->timeline->hwsp_offset,
 				  0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2625,9 +2604,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				    PIPE_CONTROL_CS_STALL,
 				    0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2807,6 +2784,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
+	execlists->ring_mode =
+		uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
+
 	if (INTEL_GEN(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
 	else
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list