[Intel-gfx] [PATCH 28/53] drm/i915/bdw: GEN-specific logical ring emit flush

oscar.mateo at intel.com oscar.mateo at intel.com
Fri Jun 13 17:37:46 CEST 2014


From: Oscar Mateo <oscar.mateo at intel.com>

Notice that the BSD invalidate bit is no longer present in GEN8, so
we can consolidate the blt and bsd ring flushes into one.

Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 80 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  7 ---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++
 3 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3debe8b..3d7fcd6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -343,6 +343,81 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
 	return ret;
 }
 
+static int gen8_emit_flush(struct intel_engine_cs *ring,
+			   struct intel_context *ctx,
+			   u32 invalidate_domains,
+			   u32 unused)
+{
+	struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
+	uint32_t cmd;
+	int ret;
+
+	ret = intel_logical_ring_begin(ring, ctx, 4);
+	if (ret)
+		return ret;
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
+	if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
+		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+			MI_FLUSH_DW_OP_STOREDW;
+	intel_logical_ring_emit(ringbuf, cmd);
+	intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
+	intel_logical_ring_emit(ringbuf, 0); /* value */
+	intel_logical_ring_advance(ringbuf);
+
+	return 0;
+}
+
+static int gen8_emit_flush_render(struct intel_engine_cs *ring,
+				  struct intel_context *ctx,
+				  u32 invalidate_domains,
+				  u32 flush_domains)
+{
+	struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
+	u32 flags = 0;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	int ret;
+
+	flags |= PIPE_CONTROL_CS_STALL;
+
+	if (flush_domains) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	}
+	if (invalidate_domains) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+	}
+
+	ret = intel_logical_ring_begin(ring, ctx, 6);
+	if (ret)
+		return ret;
+
+	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+	intel_logical_ring_emit(ringbuf, flags);
+	intel_logical_ring_emit(ringbuf, scratch_addr);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_advance(ringbuf);
+
+	return 0;
+}
+
 static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
@@ -491,6 +566,7 @@ static int logical_render_ring_init(struct drm_device *dev)
 	ring->set_seqno = gen8_set_seqno;
 	ring->submit_ctx = gen8_submit_ctx;
 	ring->emit_request = gen8_emit_request_render;
+	ring->emit_flush = gen8_emit_flush_render;
 
 	return logical_ring_init(dev, ring);
 }
@@ -511,6 +587,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
 	ring->set_seqno = gen8_set_seqno;
 	ring->submit_ctx = gen8_submit_ctx;
 	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
 
 	return logical_ring_init(dev, ring);
 }
@@ -531,6 +608,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
 	ring->set_seqno = gen8_set_seqno;
 	ring->submit_ctx = gen8_submit_ctx;
 	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
 
 	return logical_ring_init(dev, ring);
 }
@@ -551,6 +629,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
 	ring->set_seqno = gen8_set_seqno;
 	ring->submit_ctx = gen8_submit_ctx;
 	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
 
 	return logical_ring_init(dev, ring);
 }
@@ -571,6 +650,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
 	ring->set_seqno = gen8_set_seqno;
 	ring->submit_ctx = gen8_submit_ctx;
 	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
 
 	return logical_ring_init(dev, ring);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 137ee9a..a128f6f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,13 +33,6 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
- * but keeps the logic simple. Indeed, the whole purpose of this macro is just
- * to give some inclination as to some of the magic values used in the various
- * workarounds!
- */
-#define CACHELINE_BYTES 64
-
 bool
 intel_ring_initialized(struct intel_engine_cs *ring)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d8ded14..527db2a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,13 @@
 
 #define I915_CMD_HASH_ORDER 9
 
+/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+
 /*
  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -153,6 +160,10 @@ struct  intel_engine_cs {
 				      struct intel_context *ctx, u32 value);
 	int		(*emit_request)(struct intel_engine_cs *ring,
 					struct intel_context *ctx);
+	int __must_check (*emit_flush)(struct intel_engine_cs *ring,
+				       struct intel_context *ctx,
+				       u32 invalidate_domains,
+				       u32 flush_domains);
 
 	/**
 	 * List of objects currently involved in rendering from the
-- 
1.9.0




More information about the Intel-gfx mailing list