[Intel-gfx] [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush

Chris Wilson chris at chris-wilson.co.uk
Thu Jun 20 19:18:28 CEST 2013


As now the invalidate and flush bitfields are only used as booleans, and
we may want to extend the range of actions in future, consolidate those
parameters into a new bitmask.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/i915_trace.h       |  11 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 284 +++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 4 files changed, 162 insertions(+), 142 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ff47145..540a9c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -360,7 +360,7 @@ mi_set_context(struct intel_ring_buffer *ring,
 	 * itlb_before_ctx_switch.
 	 */
 	if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) {
-		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
+		ret = intel_ring_invalidate_all_caches(ring);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 3db4a68..ce392eb 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -252,26 +252,23 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 );
 
 TRACE_EVENT(i915_gem_ring_flush,
-	    TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush),
-	    TP_ARGS(ring, invalidate, flush),
+	    TP_PROTO(struct intel_ring_buffer *ring, u32 flush),
+	    TP_ARGS(ring, flush),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, invalidate)
 			     __field(u32, flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->invalidate = invalidate;
 			   __entry->flush = flush;
 			   ),
 
-	    TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
-		      __entry->dev, __entry->ring,
-		      __entry->invalidate, __entry->flush)
+	    TP_printk("dev=%u, ring=%x, flush=%04x",
+		      __entry->dev, __entry->ring, __entry->flush)
 );
 
 DECLARE_EVENT_CLASS(i915_gem_request,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e51ab55..601e1eb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -52,38 +52,32 @@ static inline int ring_space(struct intel_ring_buffer *ring)
 }
 
 static int
-gen2_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
-	u32 cmd;
 	int ret;
 
-	cmd = MI_FLUSH;
-	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
-		cmd |= MI_NO_WRITE_FLUSH;
+	if (action & (RING_INVALIDATE | RING_FLUSH)) {
+		u32 cmd = MI_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
-		cmd |= MI_READ_FLUSH;
+		if (action & RING_INVALIDATE)
+			cmd |= MI_READ_FLUSH;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
 
 static int
-gen4_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen4_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	struct drm_device *dev = ring->dev;
-	u32 cmd;
 	int ret;
 
 	/*
@@ -114,23 +108,23 @@ gen4_render_ring_flush(struct intel_ring_buffer *ring,
 	 * are flushed at any MI_FLUSH.
 	 */
 
-	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
-		cmd &= ~MI_NO_WRITE_FLUSH;
-	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-		cmd |= MI_EXE_FLUSH;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		u32 cmd = MI_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-	    (IS_G4X(dev) || IS_GEN5(dev)))
-		cmd |= MI_INVALIDATE_ISP;
+		if (action & RING_INVALIDATE) {
+			cmd |= MI_EXE_FLUSH;
+			if (IS_G4X(dev) || IS_GEN5(dev))
+				cmd |= MI_INVALIDATE_ISP;
+		}
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -179,7 +173,6 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-
 	ret = intel_ring_begin(ring, 6);
 	if (ret)
 		return ret;
@@ -209,24 +202,18 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
 }
 
 static int
-gen6_render_ring_flush(struct intel_ring_buffer *ring,
-                         u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	u32 flags = 0;
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
-	if (ret)
-		return ret;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (action & RING_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		/*
@@ -235,7 +222,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		 */
 		flags |= PIPE_CONTROL_CS_STALL;
 	}
-	if (invalidate_domains) {
+	if (action & RING_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -248,15 +235,22 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags) {
+		/* Force SNB workarounds for PIPE_CONTROL flushes */
+		ret = intel_emit_post_sync_nonzero_flush(ring);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+		intel_ring_emit(ring, flags);
+		intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -302,33 +296,22 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
 }
 
 static int
-gen7_render_ring_flush(struct intel_ring_buffer *ring,
-		       u32 invalidate_domains, u32 flush_domains)
+gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	u32 flags = 0;
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/*
-	 * Ensure that any following seqno writes only happen when the render
-	 * cache is indeed flushed.
-	 *
-	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
-	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
-	 * don't try to be clever and just set it unconditionally.
-	 */
-	flags |= PIPE_CONTROL_CS_STALL;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (action & RING_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 	}
-	if (invalidate_domains) {
+	if (action & RING_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -347,17 +330,30 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
 		gen7_render_ring_cs_stall_wa(ring);
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags) {
+		/*
+		 * Ensure that any following seqno writes only happen when the render
+		 * cache is indeed flushed.
+		 *
+		 * Workaround: 4th PIPE_CONTROL command (except the ones with only
+		 * read-cache invalidate bits set) must have the CS_STALL bit set. We
+		 * don't try to be clever and just set it unconditionally.
+		 */
+		if ((flags & RING_INVALIDATE) == 0)
+			flags |= PIPE_CONTROL_CS_STALL;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
 
-	if (flush_domains)
+		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+		intel_ring_emit(ring, flags);
+		intel_ring_emit(ring, scratch_addr);
+		intel_ring_emit(ring, 0);
+		intel_ring_advance(ring);
+	}
+
+	if (action & RING_FLUSH)
 		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
 
 	return 0;
@@ -956,19 +952,19 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
 }
 
 static int
-bsd_ring_flush(struct intel_ring_buffer *ring,
-	       u32     invalidate_domains,
-	       u32     flush_domains)
+bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	int ret;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		ret = intel_ring_begin(ring, 2);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, MI_FLUSH);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, MI_FLUSH);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 	return 0;
 }
 
@@ -1636,31 +1632,34 @@ static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 }
 
-static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring,
-			       u32 invalidate, u32 flush)
+static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
-	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		u32 cmd = MI_FLUSH_DW;
+
+		/*
+		 * Bspec vol 1c.5 - video engine command streamer:
+		 * "If ENABLED, all TLBs will be invalidated once the flush
+		 * operation is complete. This bit is only valid when the
+		 * Post-Sync Operation field is a value of 1h or 3h."
+		 */
+		if (action & RING_INVALIDATE)
+			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+				MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
 
-	cmd = MI_FLUSH_DW;
-	/*
-	 * Bspec vol 1c.5 - video engine command streamer:
-	 * "If ENABLED, all TLBs will be invalidated once the flush
-	 * operation is complete. This bit is only valid when the
-	 * Post-Sync Operation field is a value of 1h or 3h."
-	 */
-	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
 	return 0;
 }
 
@@ -1708,34 +1707,37 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct intel_ring_buffer *ring,
-			   u32 invalidate, u32 flush)
+static int gen6_ring_flush(struct intel_ring_buffer *ring, u32 action)
 {
 	struct drm_device *dev = ring->dev;
 	uint32_t cmd;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (action & (RING_FLUSH | RING_INVALIDATE)) {
+		cmd = MI_FLUSH_DW;
 
-	cmd = MI_FLUSH_DW;
-	/*
-	 * Bspec vol 1c.3 - blitter engine command streamer:
-	 * "If ENABLED, all TLBs will be invalidated once the flush
-	 * operation is complete. This bit is only valid when the
-	 * Post-Sync Operation field is a value of 1h or 3h."
-	 */
-	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-			MI_FLUSH_DW_OP_STOREDW;
-	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+		/*
+		 * Bspec vol 1c.3 - blitter engine command streamer:
+		 * "If ENABLED, all TLBs will be invalidated once the flush
+		 * operation is complete. This bit is only valid when the
+		 * Post-Sync Operation field is a value of 1h or 3h."
+		 */
+		if (action & RING_INVALIDATE)
+			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+				MI_FLUSH_DW_OP_STOREDW;
 
-	if (IS_GEN7(dev) && flush)
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
+
+		intel_ring_emit(ring, cmd);
+		intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	}
+
+	if (IS_GEN7(dev) && action & RING_FLUSH)
 		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
 
 	return 0;
@@ -2027,11 +2029,11 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
 	if (!ring->gpu_caches_dirty)
 		return 0;
 
-	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	ret = ring->flush(ring, RING_FLUSH);
 	if (ret)
 		return ret;
 
-	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	trace_i915_gem_ring_flush(ring, RING_FLUSH);
 
 	ring->gpu_caches_dirty = false;
 	return 0;
@@ -2040,18 +2042,36 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
 int
 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
 {
-	uint32_t flush_domains;
+	u32 action;
+	int ret;
+
+	action = RING_INVALIDATE;
+	if (ring->gpu_caches_dirty)
+		action |= RING_FLUSH;
+
+	ret = ring->flush(ring, action);
+	if (ret)
+		return ret;
+
+	trace_i915_gem_ring_flush(ring, action);
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
+
+int
+intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action)
+{
 	int ret;
 
-	flush_domains = 0;
 	if (ring->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
+		action |= RING_FLUSH;
 
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	ret = ring->flush(ring, action);
 	if (ret)
 		return ret;
 
-	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	trace_i915_gem_ring_flush(ring, action);
 
 	ring->gpu_caches_dirty = false;
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 799f04c..5066b3b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -93,8 +93,10 @@ struct  intel_ring_buffer {
 	void		(*write_tail)(struct intel_ring_buffer *ring,
 				      u32 value);
 	int __must_check (*flush)(struct intel_ring_buffer *ring,
-				  u32	invalidate_domains,
-				  u32	flush_domains);
+				  u32 action);
+#define RING_FLUSH	0x1
+#define RING_INVALIDATE 0x2
+
 	int		(*add_request)(struct intel_ring_buffer *ring);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
@@ -240,6 +242,7 @@ int __must_check intel_ring_idle(struct intel_ring_buffer *ring);
 void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno);
 int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
 int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
+int intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
-- 
1.8.3.1




More information about the Intel-gfx mailing list