[Intel-gfx] [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.

Tue Mar 27 15:17:59 UTC 2018

The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_lrc.h         |  1 +
 5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *last,
 				int prio)
 {
-	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+	return (engine->i915->preempt_context ||
+		HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
+		 prio > max(rq_prio(last), 0);
 }
 
 /**
@@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	/*
+	 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+	 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+	 */
+	GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+	/* trigger preemption to idle */
+	writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -594,7 +616,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			goto unlock;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+				gen11_preempt_to_idle(engine);
+			else
+				inject_preempt_context(engine);
 			goto unlock;
 		}
 
@@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
+			/* Check if switched to active or preempted to active */
+			if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					GEN8_CTX_STATUS_PREEMPTED)) &&
+			    !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_HWACK);
+
 			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
 			/* We should never get a COMPLETED | IDLE_ACTIVE! */
 			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer
-- 
2.7.4