[Intel-gfx] [PATCH] drm/i915: blitter ring workaround for gen6

Ben Widawsky ben at bwidawsk.net
Mon Oct 3 10:55:07 CEST 2011


I found this workaround in the docs while trying to debug a certain test
case I stumbled upon. The patch is in flux as I try to get it to be
useful. Both my test case and xscreensaver slip have similar scenarios
which I'm hoping some variation of this patch will fix.

Again, this doesn't fix anything yet.

v2: bugfixes + cleanups... still doesn't help, still WIP

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h         |    2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  153 ++++++++++++++++++++++++++-----
 2 files changed, 133 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4fd736e..44f72bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -543,6 +543,8 @@
 #define   GEN6_RENDER_USER_INTERRUPT			(1 << 0)
 
 #define GEN6_BLITTER_HWSTAM	0x22098
+#define GEN6_BCS_MI_MODE	0x2209c
+#define   GEN6_BCS_BYPASS_FENCE (1 << 1)
 #define GEN6_BLITTER_IMR	0x220a8
 #define   GEN6_BLITTER_MI_FLUSH_DW_NOTIFY_INTERRUPT	(1 << 26)
 #define   GEN6_BLITTER_COMMAND_PARSER_MASTER_ERROR	(1 << 25)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..4c712f0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,31 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+static int blt_ring_begin(struct intel_ring_buffer *ring, int num_dwords);
+static void blt_ring_begin2(struct intel_ring_buffer *ring);
+
+#define BLT_WA_AFTER_WORK 1
+
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+	(dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
+
+/* Workaround for some stepping of SNB,
+ * each time when BLT engine ring tail moved,
+ * the first command in the ring to be parsed
+ * should be MI_BATCH_BUFFER_START
+ */
+#define NEED_BLT_WORKAROUND(dev) \
+	((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+	(IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
+
+static inline struct drm_i915_gem_object *
+to_blt_workaround(struct intel_ring_buffer *ring)
+{
+	return ring->private;
+}
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -344,7 +369,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
 	u32 mbox2_reg;
 	int ret;
 
-	ret = intel_ring_begin(ring, 10);
+	if (ring->id == RING_BLT)
+		ret = blt_ring_begin(ring, 10);
+	else
+		ret = intel_ring_begin(ring, 10);
 	if (ret)
 		return ret;
 
@@ -359,6 +387,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 	intel_ring_emit(ring, *seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
+#if (BLT_WA_AFTER_WORK == 1)
+	if (ring->id == RING_BLT)
+		blt_ring_begin2(ring);
+#endif
 	intel_ring_advance(ring);
 
 	return 0;
@@ -382,7 +414,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
 		  MI_SEMAPHORE_COMPARE |
 		  MI_SEMAPHORE_REGISTER;
 
-	ret = intel_ring_begin(waiter, 4);
+	if (waiter->id == RING_BLT)
+		ret = blt_ring_begin(waiter, 4);
+	else
+		ret = intel_ring_begin(waiter, 4);
 	if (ret)
 		return ret;
 
@@ -390,6 +425,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
 	intel_ring_emit(waiter, seqno);
 	intel_ring_emit(waiter, 0);
 	intel_ring_emit(waiter, MI_NOOP);
+#if (BLT_WA_AFTER_WORK == 1)
+	if (waiter->id == RING_BLT)
+		blt_ring_begin2(waiter);
+#endif
 	intel_ring_advance(waiter);
 
 	return 0;
@@ -967,6 +1006,10 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
 {
 	unsigned int *virt;
 	int rem = ring->size - ring->tail;
+	if (ring->id == RING_BLT)
+		rem-=(128+64); /* 32 noops, 14 wa, 2 wa * 4 */
+
+	BUG_ON(rem < 0);
 
 	if (ring->space < rem) {
 		int ret = intel_wait_ring_buffer(ring, rem);
@@ -975,6 +1018,27 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
 	}
 
 	virt = (unsigned int *)(ring->virtual_start + ring->tail);
+	if (ring->id == RING_BLT) {
+		int i = 0;
+		for (i = 0; i < 32; i++)
+			*virt++ = MI_NOOP;
+		*virt++ = MI_BATCH_BUFFER_START;
+		*virt++ = to_blt_workaround(ring)->gtt_offset;
+		*virt++ = MI_LOAD_REGISTER_IMM(1);
+		*virt++ = GEN6_BCS_MI_MODE;
+		*virt++ = GEN6_BCS_BYPASS_FENCE << 16 | GEN6_BCS_BYPASS_FENCE;
+		*virt++ = MI_FLUSH_DW;
+		*virt++ = 0;
+		*virt++ = 0;
+		*virt++ = MI_NOOP;
+		*virt++ = MI_LOAD_REGISTER_IMM(1);
+		*virt++ = GEN6_BCS_MI_MODE;
+		*virt++ = GEN6_BCS_BYPASS_FENCE << 16;
+		*virt++ = MI_LOAD_REGISTER_IMM(1);
+		*virt++ = GEN6_BCS_MI_MODE;
+		*virt++ = GEN6_BCS_BYPASS_FENCE << 16;
+		*virt++ = MI_NOOP;
+	}
 	rem /= 8;
 	while (rem--) {
 		*virt++ = MI_NOOP;
@@ -1232,21 +1296,6 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
 			  GEN6_BLITTER_USER_INTERRUPT);
 }
 
-
-/* Workaround for some stepping of SNB,
- * each time when BLT engine ring tail moved,
- * the first command in the ring to be parsed
- * should be MI_BATCH_BUFFER_START
- */
-#define NEED_BLT_WORKAROUND(dev) \
-	(IS_GEN6(dev) && (dev->pdev->revision < 8))
-
-static inline struct drm_i915_gem_object *
-to_blt_workaround(struct intel_ring_buffer *ring)
-{
-	return ring->private;
-}
-
 static int blt_ring_init(struct intel_ring_buffer *ring)
 {
 	if (NEED_BLT_WORKAROUND(ring->dev)) {
@@ -1275,7 +1324,6 @@ static int blt_ring_init(struct intel_ring_buffer *ring)
 			drm_gem_object_unreference(&obj->base);
 			return ret;
 		}
-
 		ring->private = obj;
 	}
 
@@ -1286,18 +1334,52 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
 			  int num_dwords)
 {
 	if (ring->private) {
-		int ret = intel_ring_begin(ring, num_dwords+2);
+		int ret;
+		if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+			num_dwords += (32 + 14 + 2); /* 32 noops, begin2, a0 wa */
+		else
+			num_dwords += 2;
+		ret = intel_ring_begin(ring, num_dwords);
 		if (ret)
 			return ret;
 
+		if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+			int i = 0;
+			for (i = 0; i < 32; i++)
+				intel_ring_emit(ring, MI_NOOP);
+		}
 		intel_ring_emit(ring, MI_BATCH_BUFFER_START);
 		intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
-
+#if (BLT_WA_AFTER_WORK == 0)
+		blt_ring_begin2(ring);
+#endif
 		return 0;
 	} else
 		return intel_ring_begin(ring, 4);
 }
 
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+	if (!NEED_MORE_BLT_WORKAROUND(ring->dev))
+		return;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16 |
+			      GEN6_BCS_BYPASS_FENCE);
+	intel_ring_emit(ring, MI_FLUSH_DW);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+	intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+	intel_ring_emit(ring, MI_NOOP);
+}
+
 static int blt_ring_flush(struct intel_ring_buffer *ring,
 			  u32 invalidate, u32 flush)
 {
@@ -1315,10 +1397,32 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
+#if (BLT_WA_AFTER_WORK == 1)
+	blt_ring_begin2(ring);
+#endif
 	intel_ring_advance(ring);
 	return 0;
 }
 
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+			      u32 offset, u32 len)
+{
+	int ret;
+
+	ret = blt_ring_begin(ring, 4);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+	/* bit0-7 is the length on GEN6+ */
+	intel_ring_emit(ring, offset);
+#if (BLT_WA_AFTER_WORK == 1)
+	blt_ring_begin2(ring);
+#endif
+	intel_ring_advance(ring);
+
+	return 0;
+}
 static void blt_ring_cleanup(struct intel_ring_buffer *ring)
 {
 	if (!ring->private)
@@ -1341,7 +1445,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
 	.get_seqno		= ring_get_seqno,
 	.irq_get		= blt_ring_get_irq,
 	.irq_put		= blt_ring_put_irq,
-	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.dispatch_execbuffer	= blt_ring_dispatch_execbuffer,
 	.cleanup		= blt_ring_cleanup,
 	.sync_to		= gen6_blt_ring_sync_to,
 	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
@@ -1435,8 +1539,13 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+	int ret;
 
 	*ring = gen6_blt_ring;
 
-	return intel_init_ring_buffer(dev, ring);
+	ret = intel_init_ring_buffer(dev, ring);
+
+	ring->effective_size -= 192;
+
+	return ret;
 }
-- 
1.7.6.4




More information about the Intel-gfx mailing list