[Intel-gfx] [RFC] drm/i915: use PIPE_CONTROL for flushing on gen6+
Jesse Barnes
jbarnes at virtuousgeek.org
Fri Aug 12 20:18:45 CEST 2011
I'm trying to figure out why this doesn't work. Anyone have ideas?
On gen6+ (well probably since Cantiga actually) we're supposed to use
PIPE_CONTROL rather than MI_FLUSH for flushing the pipeline and
caches. This patch doesn't cause hangs or crashes in my testing, but
does prevent glxgears from displaying anything.
The other worrying thing about this is that gen6+ has a command length
field of 3, but we're using 2 in the DRI driver, even on gen6.
Changing it doesn't seem to have any effect, but it's still of concern.
--
Jesse Barnes, Intel Open Source Technology Center
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5baaef4..4a456a6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -236,6 +236,8 @@
#define DISPLAY_PLANE_A (0<<20)
#define DISPLAY_PLANE_B (1<<20)
#define GFX_OP_PIPE_CONTROL ((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define GFX_OP_PIPE_CONTROL_GEN6 ((0x3<<29)|(0x3<<27)|(0x2<<24)|3)
+#define PIPE_CONTROL_CS_STALL (1<<20)
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
#define PIPE_CONTROL_WC_FLUSH (1<<12)
@@ -244,8 +246,8 @@
#define PIPE_CONTROL_ISP_DIS (1<<9)
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-#define PIPE_CONTROL_STALL_EN (1<<1) /* in addr word, Ironlake+ only */
-
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) /* in addr word, Ironlake+ only */
+#define PIPE_CONTROL_DEPTH_FLUSH (1<<0)
/*
* Reset registers
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47b9b27..cecc1e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
#include "i915_trace.h"
#include "intel_drv.h"
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+ struct drm_i915_gem_object *obj;
+ volatile u32 *cpu_page;
+ u32 gtt_offset;
+};
+
static inline int ring_space(struct intel_ring_buffer *ring)
{
int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,112 @@ render_ring_flush(struct intel_ring_buffer *ring,
return 0;
}
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6. From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+ struct pipe_control *pc = ring->private;
+ u32 scratch_addr = pc->gtt_offset + 128;
+ int ret;
+
+
+ ret = intel_ring_begin(ring, 6);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+ intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+ intel_ring_emit(ring, 0); /* low dword */
+ intel_ring_emit(ring, 0); /* high dword */
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ ret = intel_ring_begin(ring, 6);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+ intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+ u32 invalidate_domains, u32 flush_domains)
+{
+ u32 flags = 0;
+ struct pipe_control *pc = ring->private;
+ u32 scratch_addr = pc->gtt_offset + 128;
+ int ret;
+
+ /* Force SNB workarounds for PIPE_CONTROL flushes */
+ intel_emit_post_sync_nonzero_flush(ring);
+
+ /* Just flush everything for now */
+ flags |= PIPE_CONTROL_WC_FLUSH;
+ flags |= PIPE_CONTROL_IS_FLUSH;
+ flags |= PIPE_CONTROL_TC_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_FLUSH;
+
+ ret = intel_ring_begin(ring, 6);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL_GEN6);
+ intel_ring_emit(ring, flags);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0); /* lower dword */
+ intel_ring_emit(ring, 0); /* uppwer dword */
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
static void ring_write_tail(struct intel_ring_buffer *ring,
u32 value)
{
@@ -206,16 +322,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
return 0;
}
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
- struct drm_i915_gem_object *obj;
- volatile u32 *cpu_page;
- u32 gtt_offset;
-};
-
static int
init_pipe_control(struct intel_ring_buffer *ring)
{
@@ -292,8 +398,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
I915_WRITE(MI_MODE, mode);
}
- if (INTEL_INFO(dev)->gen >= 6) {
- } else if (IS_GEN5(dev)) {
+ if (INTEL_INFO(dev)->gen >= 5) {
ret = init_pipe_control(ring);
if (ret)
return ret;
@@ -1291,6 +1396,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
*ring = render_ring;
if (INTEL_INFO(dev)->gen >= 6) {
ring->add_request = gen6_add_request;
+ ring->flush = gen6_render_ring_flush;
ring->irq_get = gen6_render_ring_get_irq;
ring->irq_put = gen6_render_ring_put_irq;
} else if (IS_GEN5(dev)) {
More information about the Intel-gfx
mailing list