[Intel-gfx] [PATCH v5] drm/i915/icl: Preempt-to-idle support in execlists.

Mon Oct 15 17:29:18 UTC 2018

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

v5: Renamed inject_preempt_context(). (Daniele)
    Removed duplicated GEM_BUG_ON() on HWACK (Daniele)

Bspec: 18922
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Michal Winiarski <michal.winiarski at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 109 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef0..4817438 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2597,6 +2597,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8cbe580..98ca20e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 0a05cc7..f708d97 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -597,7 +597,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN10_FEATURES, \
 	GEN(11), \
 	.ddb_size = 2048, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index af70026..7dcf0fd 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..4c2bfed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -488,29 +489,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
 }
 
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
+	GEM_TRACE("%s\n", engine->name);
 
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	write_desc(execlists, ce->lrc_desc, n);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -583,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			execlist_send_preempt_to_idle(engine);
 			return;
 		}
 
@@ -910,22 +931,43 @@ static void process_csb(struct intel_engine_cs *engine)
 			  execlists->active);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
+		/*
+		 * Check if preempted from idle to idle directly.
+		 * The STATUS_IDLE_ACTIVE flag is used to mark
+		 * such transition.
+		 */
+		if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+		     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
 
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			/*
+			 * We could not have COMPLETED anything
+			 * if we were idle before preemption.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+		} else {
+			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+				      GEN8_CTX_STATUS_PREEMPTED))
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_HWACK);
+
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
 
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/* We should never get a COMPLETED | IDLE_ACTIVE! */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		}
+
+		/*
+		 * Check if preempted to real idle, either directly or
+		 * the preemptive context already finished executing
+		 */
+		if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+		    (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status)) {
 			GEM_TRACE("%s preempt-idle\n", engine->name);
 			complete_preempt_context(execlists);
 			continue;
@@ -2138,7 +2180,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4