[PATCH] ring-mode

Chris Wilson chris at chris-wilson.co.uk
Fri Jun 21 05:55:13 UTC 2019


---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  6 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 61 +++++++-------------
 2 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9940bad37812..09a60ccadfe7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -178,6 +178,12 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
+	/**
+	 * @ring_mode: the engine control register, used to freeze the command
+	 * streamer around preempt-to-busy
+	 */
+	u32 __iomem *ring_mode;
+
 #define EXECLIST_MAX_PORTS 2
 	/**
 	 * @active: the currently known context executing on HW
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f8ad49006917..84cb0b5c6eb1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -223,23 +223,10 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_PREEMPT_ADDR);
-}
-
 static inline void
 ring_set_paused(const struct intel_engine_cs *engine, int state)
 {
-	/*
-	 * We inspect HWS_PREEMPT with a semaphore inside
-	 * engine->emit_fini_breadcrumb. If the dword is true,
-	 * the ring is paused as the semaphore will busywait
-	 * until the dword is false.
-	 */
-	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
-	wmb();
+	writel(state, engine->execlists.ring_mode);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -626,7 +613,7 @@ trace_ports(const struct intel_engine_execlists *execlists,
 	const struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
 
-	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }, stopped? %s\n",
 		  engine->name, msg,
 		  ports[0]->fence.context,
 		  ports[0]->fence.seqno,
@@ -634,7 +621,8 @@ trace_ports(const struct intel_engine_execlists *execlists,
 		  i915_request_started(ports[0]) ? "*" :
 		  "",
 		  ports[1] ? ports[1]->fence.context : 0,
-		  ports[1] ? ports[1]->fence.seqno : 0);
+		  ports[1] ? ports[1]->fence.seqno : 0,
+		  yesno(readl(execlists->ring_mode) & STOP_RING));
 }
 
 static __maybe_unused bool
@@ -701,6 +689,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+	if (!inject_preempt_hang(execlists))
+		ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -979,7 +970,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * as we unwind (and until we resubmit) so that we do
 			 * not accidentally tell it to go backwards.
 			 */
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 
 			/*
 			 * Note that we have not stopped the GPU at this point,
@@ -1008,7 +999,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				  last->sched.attr.priority,
 				  execlists->queue_priority_hint);
 
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 			defer_active(engine);
 
 			/*
@@ -1351,9 +1342,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			struct i915_request * const *port = execlists->active;
 
@@ -1374,8 +1362,15 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * ordered, that the breadcrumb write is
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
+			 *
+			 * The caveat here applies when we are injecting
+			 * a completion event via manipulation of the
+			 * RING_MI_MODE; this may occur before the
+			 * request is completed and appears as a
+			 * normal context-switch (0x14).
 			 */
-			GEM_BUG_ON(!i915_request_completed(rq));
+			GEM_BUG_ON(!i915_request_completed(rq) &&
+				   !(readl(execlists->ring_mode) & STOP_RING));
 			execlists_schedule_out(rq);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2133,7 +2128,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
-	ring_set_paused(engine, 0);
+	ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
@@ -2581,19 +2576,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
-static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
-{
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_EQ_SDD;
-	*cs++ = 0;
-	*cs++ = intel_hws_preempt_address(request->engine);
-	*cs++ = 0;
-
-	return cs;
-}
-
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2601,9 +2583,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->timeline->hwsp_offset,
 				  0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2625,9 +2605,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				    PIPE_CONTROL_CS_STALL,
 				    0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2807,6 +2785,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
+	execlists->ring_mode =
+		uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
+
 	if (INTEL_GEN(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
 	else
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list