[Intel-gfx] [PATCH] drm/i915: blitter ring workaround for gen6

Ben Widawsky ben at bwidawsk.net
Mon Oct 3 09:57:51 CEST 2011


I found this workaround in the docs while trying to debug a certain test
case I stumbled upon. The patch is in flux as I try to get it to be
useful. Both my test case and xscreensaver slip have similar scenarios
which I'm hoping some variation of this patch will fix.

Again, this doesn't fix anything yet.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h         |    2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   84 ++++++++++++++++++++++++++++--
 2 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4fd736e..44f72bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -543,6 +543,8 @@
 #define   GEN6_RENDER_USER_INTERRUPT			(1 << 0)
 
 #define GEN6_BLITTER_HWSTAM	0x22098
+#define GEN6_BCS_MI_MODE	0x2209c
+#define   GEN6_BCS_BYPASS_FENCE (1 << 1)
 #define GEN6_BLITTER_IMR	0x220a8
 #define   GEN6_BLITTER_MI_FLUSH_DW_NOTIFY_INTERRUPT	(1 << 26)
 #define   GEN6_BLITTER_COMMAND_PARSER_MASTER_ERROR	(1 << 25)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..3bf2dea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,9 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+static int blt_ring_begin(struct intel_ring_buffer *ring, int num_dwords);
+static void blt_ring_begin2(struct intel_ring_buffer *ring);
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -344,7 +347,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
 	u32 mbox2_reg;
 	int ret;
 
-	ret = intel_ring_begin(ring, 10);
+	if (ring->id == RING_BLT)
+		ret = blt_ring_begin(ring, 10);
+	else
+		ret = intel_ring_begin(ring, 10);
 	if (ret)
 		return ret;
 
@@ -359,6 +365,8 @@ gen6_add_request(struct intel_ring_buffer *ring,
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 	intel_ring_emit(ring, *seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
+//	if (ring->id == RING_BLT)
+//		blt_ring_begin2(ring);
 	intel_ring_advance(ring);
 
 	return 0;
@@ -382,7 +390,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
 		  MI_SEMAPHORE_COMPARE |
 		  MI_SEMAPHORE_REGISTER;
 
-	ret = intel_ring_begin(waiter, 4);
+	if (waiter->id == RING_BLT)
+		ret = blt_ring_begin(waiter, 4);
+	else
+		ret = intel_ring_begin(waiter, 4);
 	if (ret)
 		return ret;
 
@@ -390,6 +401,8 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
 	intel_ring_emit(waiter, seqno);
 	intel_ring_emit(waiter, 0);
 	intel_ring_emit(waiter, MI_NOOP);
+//	if (waiter->id == RING_BLT)
+//		blt_ring_begin2(waiter);
 	intel_ring_advance(waiter);
 
 	return 0;
@@ -1233,13 +1246,19 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
 }
 
 
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+	(dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
+
 /* Workaround for some stepping of SNB,
  * each time when BLT engine ring tail moved,
  * the first command in the ring to be parsed
  * should be MI_BATCH_BUFFER_START
  */
 #define NEED_BLT_WORKAROUND(dev) \
-	(IS_GEN6(dev) && (dev->pdev->revision < 8))
+	((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+	(IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
 
 static inline struct drm_i915_gem_object *
 to_blt_workaround(struct intel_ring_buffer *ring)
@@ -1286,18 +1305,53 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
 			  int num_dwords)
 {
 	if (ring->private) {
-		int ret = intel_ring_begin(ring, num_dwords+2);
+		int ret;
+		if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+			num_dwords += 48;
+		else
+			num_dwords += 2;
+		ret = intel_ring_begin(ring, num_dwords);
 		if (ret)
 			return ret;
 
+		if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+			int i = 0;
+			for (i = 0; i < 32; i++)
+				intel_ring_emit(ring, MI_NOOP);
+		}
 		intel_ring_emit(ring, MI_BATCH_BUFFER_START);
 		intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
-
+		blt_ring_begin2(ring);
 		return 0;
 	} else
 		return intel_ring_begin(ring, 4);
 }
 
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+	if (!ring->private)
+		return;
+
+	if (!NEED_MORE_BLT_WORKAROUND(ring->dev))
+		return;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16 |
+			      GEN6_BCS_BYPASS_FENCE);
+	intel_ring_emit(ring, MI_FLUSH_DW);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+	intel_ring_emit(ring, MI_NOOP);
+}
+
 static int blt_ring_flush(struct intel_ring_buffer *ring,
 			  u32 invalidate, u32 flush)
 {
@@ -1315,10 +1369,28 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
+	//blt_ring_begin2(ring);
 	intel_ring_advance(ring);
 	return 0;
 }
 
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+			      u32 offset, u32 len)
+{
+	int ret;
+
+	ret = blt_ring_begin(ring, 4);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+	/* bit0-7 is the length on GEN6+ */
+	intel_ring_emit(ring, offset);
+	//blt_ring_begin2(ring);
+	intel_ring_advance(ring);
+
+	return 0;
+}
 static void blt_ring_cleanup(struct intel_ring_buffer *ring)
 {
 	if (!ring->private)
@@ -1341,7 +1413,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
 	.get_seqno		= ring_get_seqno,
 	.irq_get		= blt_ring_get_irq,
 	.irq_put		= blt_ring_put_irq,
-	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.dispatch_execbuffer	= blt_ring_dispatch_execbuffer,
 	.cleanup		= blt_ring_cleanup,
 	.sync_to		= gen6_blt_ring_sync_to,
 	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
-- 
1.7.6.4




More information about the Intel-gfx mailing list