[Intel-gfx] [PATCH 39/70] drm/i915: Reduce more locking in execlist command submission

Chris Wilson chris at chris-wilson.co.uk
Tue Apr 7 08:21:03 PDT 2015


Slightly more extravagant than the previous patch is to use the
I915_READ_FW() registers for all the bounded reads in
intel_lrc_irq_handler - for even more spinlock reduction.

v2: Rebase

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08e35003c4f2..27942f61d6fe 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -278,19 +278,12 @@ static void execlists_submit_pair(struct intel_engine_cs *ring)
 	desc[3] = ring->execlist_port[0]->seqno;
 
 	/* Note: You must always write both descriptors in the order below. */
-	spin_lock(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
 	I915_WRITE_FW(RING_ELSP(ring), desc[1]);
 	I915_WRITE_FW(RING_ELSP(ring), desc[0]);
 	I915_WRITE_FW(RING_ELSP(ring), desc[3]);
 
 	/* The context is automatically loaded after the following */
 	I915_WRITE_FW(RING_ELSP(ring), desc[2]);
-
-	/* ELSP is a wo register, use another nearby reg for posting instead */
-	POSTING_READ_FW(RING_EXECLIST_STATUS(ring));
-	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
-	spin_unlock(&dev_priv->uncore.lock);
 }
 
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
@@ -379,31 +372,37 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 	u8 head, tail;
 	u32 seqno = 0;
 
+	spin_lock(&ring->execlist_lock);
+	spin_lock(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
 	head = ring->next_context_status_buffer;
-	tail = I915_READ(RING_CONTEXT_STATUS_PTR(ring)) & 0x7;
+	tail = I915_READ_FW(RING_CONTEXT_STATUS_PTR(ring)) & 0x7;
 	if (head > tail)
 		tail += 6;
 
 	while (head++ < tail) {
 		u32 reg = RING_CONTEXT_STATUS_BUF(ring) + (head % 6)*8;
-		u32 status = I915_READ(reg);
+		u32 status = I915_READ_FW(reg);
 		if (unlikely(status & GEN8_CTX_STATUS_PREEMPTED && 0)) {
 			DRM_ERROR("Pre-empted request %x %s Lite Restore\n",
-				  I915_READ(reg + 4),
+				  I915_READ_FW(reg + 4),
 				  status & GEN8_CTX_STATUS_LITE_RESTORE ? "with" : "without");
 		}
 		if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE |
 			      GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			seqno = I915_READ(reg + 4);
+			seqno = I915_READ_FW(reg + 4);
 	}
 
 	ring->next_context_status_buffer = tail % 6;
-	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-		   (u32)ring->next_context_status_buffer << 8);
+	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(ring),
+		      (u32)ring->next_context_status_buffer << 8);
 
-	spin_lock(&ring->execlist_lock);
 	if (execlists_complete_requests(ring, seqno))
 		execlists_context_unqueue(ring);
+
+	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+	spin_unlock(&dev_priv->uncore.lock);
 	spin_unlock(&ring->execlist_lock);
 }
 
@@ -427,8 +426,16 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 
 	list_add_tail(&request->execlist_link, &ring->execlist_queue);
 	if (ring->execlist_port[0] == NULL) {
+		struct drm_i915_private *dev_priv = to_i915(ring->dev);
+
+		spin_lock(&dev_priv->uncore.lock);
+		intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
 		ring->execlist_port[0] = request;
 		execlists_submit_pair(ring);
+
+		intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+		spin_unlock(&dev_priv->uncore.lock);
 	}
 
 	spin_unlock_irq(&ring->execlist_lock);
-- 
2.1.4



More information about the Intel-gfx mailing list