[Intel-gfx] [PATCH 08/73] drm/i915: Reduce engine->emit_flush() to a single mode parameter

Chris Wilson chris at chris-wilson.co.uk
Mon Aug 1 09:10:16 UTC 2016


Rather than passing a complete set of GPU cache domains for either
invalidation or for flushing, or even both, just pass a single parameter
to the engine->emit_flush to determine the required operations.

engine->emit_flush(GPU, 0) -> engine->emit_flush(EMIT_INVALIDATE)
engine->emit_flush(0, GPU) -> engine->emit_flush(EMIT_FLUSH)
engine->emit_flush(GPU, GPU) -> engine->emit_flush(EMIT_FLUSH | EMIT_INVALIDATE)

This allows us to extend the behaviour easily in future, for example if
we want just a command barrier without the overhead of flushing.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 10 ++----
 drivers/gpu/drm/i915/i915_gem_request.c    |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 23 +++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 57 +++++++++++-------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  6 ++--
 7 files changed, 38 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index beece8feb8fe..edde8411c478 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	 * itlb_before_ctx_switch.
 	 */
 	if (IS_GEN6(dev_priv)) {
-		ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
+		ret = engine->emit_flush(req, EMIT_INVALIDATE);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 35c4c595e5ba..e49776e34eed 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,7 +999,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		wmb();
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
+	return req->engine->emit_flush(req, EMIT_INVALIDATE);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 39fa9eb10514..671b1cab5e54 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1666,8 +1666,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->emit_flush(req,
-				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
 	if (ret)
 		return ret;
 
@@ -1694,8 +1693,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	int ret;
 
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->emit_flush(req,
-				 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
 	if (ret)
 		return ret;
 
@@ -1713,9 +1711,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
 	if (engine->id != RCS) {
-		ret = engine->emit_flush(req,
-					 I915_GEM_GPU_DOMAINS,
-					 I915_GEM_GPU_DOMAINS);
+		ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 7e3206051ced..67f16feb0552 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -451,7 +451,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 * what.
 	 */
 	if (flush_caches) {
-		ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS);
+		ret = engine->emit_flush(request, EMIT_FLUSH);
 
 		/* Not allowed to fail! */
 		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 33ff9f6d588f..3f1c5cdd38a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -672,7 +672,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
 	 */
-	return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
+	return req->engine->emit_flush(req, EMIT_INVALIDATE);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
@@ -998,9 +998,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	if (w->count == 0)
 		return 0;
 
-	ret = req->engine->emit_flush(req,
-				      I915_GEM_GPU_DOMAINS,
-				      I915_GEM_GPU_DOMAINS);
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
 	if (ret)
 		return ret;
 
@@ -1017,9 +1015,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 
 	intel_ring_advance(ring);
 
-	ret = req->engine->emit_flush(req,
-				      I915_GEM_GPU_DOMAINS,
-				      I915_GEM_GPU_DOMAINS);
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
 	if (ret)
 		return ret;
 
@@ -1600,9 +1596,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
 	I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
 }
 
-static int gen8_emit_flush(struct drm_i915_gem_request *request,
-			   u32 invalidate_domains,
-			   u32 unused)
+static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
 {
 	struct intel_ring *ring = request->ring;
 	u32 cmd;
@@ -1621,7 +1615,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request,
 	 */
 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 
-	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
+	if (mode & EMIT_INVALIDATE) {
 		cmd |= MI_INVALIDATE_TLB;
 		if (request->engine->id == VCS)
 			cmd |= MI_INVALIDATE_BSD;
@@ -1639,8 +1633,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request,
 }
 
 static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
-				  u32 invalidate_domains,
-				  u32 flush_domains)
+				  u32 mode)
 {
 	struct intel_ring *ring = request->ring;
 	struct intel_engine_cs *engine = request->engine;
@@ -1652,14 +1645,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
-	if (flush_domains) {
+	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
 
-	if (invalidate_domains) {
+	if (mode & EMIT_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9e4b49644553..1f876e7ce582 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -67,19 +67,15 @@ static void __intel_engine_submit(struct intel_engine_cs *engine)
 }
 
 static int
-gen2_render_ring_flush(struct drm_i915_gem_request *req,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	u32 cmd;
 	int ret;
 
 	cmd = MI_FLUSH;
-	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
-		cmd |= MI_NO_WRITE_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_READ_FLUSH;
 
 	ret = intel_ring_begin(req, 2);
@@ -94,9 +90,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req,
 }
 
 static int
-gen4_render_ring_flush(struct drm_i915_gem_request *req,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	u32 cmd;
@@ -131,7 +125,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
 	 */
 
 	cmd = MI_FLUSH;
-	if (invalidate_domains) {
+	if (mode & EMIT_INVALIDATE) {
 		cmd |= MI_EXE_FLUSH;
 		if (IS_G4X(req->i915) || IS_GEN5(req->i915))
 			cmd |= MI_INVALIDATE_ISP;
@@ -222,8 +216,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
 }
 
 static int
-gen6_render_ring_flush(struct drm_i915_gem_request *req,
-		       u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	u32 scratch_addr =
@@ -240,7 +233,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		/*
@@ -249,7 +242,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
 		 */
 		flags |= PIPE_CONTROL_CS_STALL;
 	}
-	if (invalidate_domains) {
+	if (mode & EMIT_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -297,8 +290,7 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
 }
 
 static int
-gen7_render_ring_flush(struct drm_i915_gem_request *req,
-		       u32 invalidate_domains, u32 flush_domains)
+gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	u32 scratch_addr =
@@ -320,13 +312,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
+	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
-	if (invalidate_domains) {
+	if (mode & EMIT_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -384,8 +376,7 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req,
 }
 
 static int
-gen8_render_ring_flush(struct drm_i915_gem_request *req,
-		       u32 invalidate_domains, u32 flush_domains)
+gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
 	u32 flags = 0;
@@ -393,13 +384,13 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
-	if (flush_domains) {
+	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
 	}
-	if (invalidate_domains) {
+	if (mode & EMIT_INVALIDATE) {
 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -688,9 +679,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	if (w->count == 0)
 		return 0;
 
-	ret = req->engine->emit_flush(req,
-				      I915_GEM_GPU_DOMAINS,
-				      I915_GEM_GPU_DOMAINS);
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
 	if (ret)
 		return ret;
 
@@ -707,9 +696,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 
 	intel_ring_advance(ring);
 
-	ret = req->engine->emit_flush(req,
-				      I915_GEM_GPU_DOMAINS,
-				      I915_GEM_GPU_DOMAINS);
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
 	if (ret)
 		return ret;
 
@@ -1700,9 +1687,7 @@ i8xx_irq_disable(struct intel_engine_cs *engine)
 }
 
 static int
-bsd_ring_flush(struct drm_i915_gem_request *req,
-	       u32     invalidate_domains,
-	       u32     flush_domains)
+bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	int ret;
@@ -2533,8 +2518,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
-			       u32 invalidate, u32 flush)
+static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	uint32_t cmd;
@@ -2561,7 +2545,7 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
 	 * operation is complete. This bit is only valid when the
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
-	if (invalidate & I915_GEM_GPU_DOMAINS)
+	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
 
 	intel_ring_emit(ring, cmd);
@@ -2653,8 +2637,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct drm_i915_gem_request *req,
-			   u32 invalidate, u32 flush)
+static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
 	struct intel_ring *ring = req->ring;
 	uint32_t cmd;
@@ -2681,7 +2664,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req,
 	 * operation is complete. This bit is only valid when the
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
-	if (invalidate & I915_GEM_DOMAIN_RENDER)
+	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_INVALIDATE_TLB;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 00723401f98c..76d0495943c3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -292,8 +292,10 @@ struct intel_engine_cs {
 	u32 ctx_desc_template;
 	int		(*emit_request)(struct drm_i915_gem_request *request);
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
-				      u32 invalidate_domains,
-				      u32 flush_domains);
+				      u32 mode);
+#define EMIT_INVALIDATE	BIT(0)
+#define EMIT_FLUSH	BIT(1)
+#define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
 	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
 					 u64 offset, unsigned dispatch_flags);
 
-- 
2.8.1



More information about the Intel-gfx mailing list