[Intel-gfx] [PATCH 32/49] drm/i915: Reduce more locking in execlist command submission

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 27 04:02:04 PDT 2015


Slightly more extravagant than the previous patch is to use the
I915_READ_FW() registers for all the bounded reads in
intel_lrc_irq_handler - for even more spinlock reduction.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 454bb7df27fe..1c768c05e52e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -278,17 +278,12 @@ static void execlists_submit_pair(struct intel_engine_cs *ring)
 	desc[3] = ring->execlist_port[0]->seqno;
 
 	/* Note: You must always write both descriptors in the order below. */
-	intel_uncore_forcewake_irqlock(dev_priv, FORCEWAKE_ALL);
 	I915_WRITE_FW(RING_ELSP(ring), desc[1]);
 	I915_WRITE_FW(RING_ELSP(ring), desc[0]);
 	I915_WRITE_FW(RING_ELSP(ring), desc[3]);
 
 	/* The context is automatically loaded after the following */
 	I915_WRITE_FW(RING_ELSP(ring), desc[2]);
-
-	/* ELSP is a wo register, use another nearby reg for posting instead */
-	POSTING_READ_FW(RING_EXECLIST_STATUS(ring));
-	intel_uncore_forcewake_irqunlock(dev_priv, FORCEWAKE_ALL);
 }
 
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
@@ -377,32 +372,36 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 	u8 head, tail;
 	u32 seqno = 0;
 
+	spin_lock(&ring->execlist_lock);
+	intel_uncore_forcewake_irqlock(dev_priv, FORCEWAKE_ALL);
+
 	head = ring->next_context_status_buffer;
-	tail = I915_READ(RING_CONTEXT_STATUS_PTR(ring)) & 0x7;
+	tail = I915_READ_FW(RING_CONTEXT_STATUS_PTR(ring)) & 0x7;
 	if (head > tail)
 		tail += 6;
 
 	while (head++ < tail) {
 		u32 reg = RING_CONTEXT_STATUS_BUF(ring) + (head % 6)*8;
-		u32 status = I915_READ(reg);
+		u32 status = I915_READ_FW(reg);
 		if (unlikely(status & GEN8_CTX_STATUS_PREEMPTED && 0)) {
 			DRM_ERROR("Pre-empted request %x %s Lite Restore\n",
-				  I915_READ(reg + 4),
+				  I915_READ_FW(reg + 4),
 				  status & GEN8_CTX_STATUS_LITE_RESTORE ? "with" : "without");
 		}
 		if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE |
 			      GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			seqno = I915_READ(reg + 4);
+			seqno = I915_READ_FW(reg + 4);
 	}
 
-	spin_lock(&ring->execlist_lock);
 	if (execlists_complete_requests(ring, seqno))
 		execlists_context_unqueue(ring);
-	spin_unlock(&ring->execlist_lock);
 
 	ring->next_context_status_buffer = tail % 6;
-	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
+	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(ring),
+		      ((u32)ring->next_context_status_buffer & 0x07) << 8);
+
+	intel_uncore_forcewake_irqunlock(dev_priv, FORCEWAKE_ALL);
+	spin_unlock(&ring->execlist_lock);
 }
 
 static int execlists_context_queue(struct intel_engine_cs *ring,
@@ -424,8 +423,13 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 	spin_lock_irq(&ring->execlist_lock);
 
 	list_add_tail(&request->execlist_link, &ring->execlist_queue);
-	if (ring->execlist_port[0] == NULL)
+	if (ring->execlist_port[0] == NULL) {
+		intel_uncore_forcewake_irqlock(to_i915(ring->dev),
+					       FORCEWAKE_ALL);
 		execlists_context_unqueue(ring);
+		intel_uncore_forcewake_irqunlock(to_i915(ring->dev),
+						 FORCEWAKE_ALL);
+	}
 
 	spin_unlock_irq(&ring->execlist_lock);
 
-- 
2.1.4



More information about the Intel-gfx mailing list