Mesa (master): freedreno: better manage our WFI's

Rob Clark robclark at kemper.freedesktop.org
Sat Feb 1 17:27:20 UTC 2014


Module: Mesa
Branch: master
Commit: dc00ec154bda15672861d1b508aa4aacdb306f68
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc00ec154bda15672861d1b508aa4aacdb306f68

Author: Rob Clark <robclark at freedesktop.org>
Date:   Sat Feb  1 10:53:00 2014 -0500

freedreno: better manage our WFI's

Updates to non-banked registers, CP_LOAD_STATE, etc, need a WFI if there
is potentially pending rendering.  Track this better, and add fd_wfi()
calls everywhere that might potentially need CP_WAIT_FOR_IDLE.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_draw.c     |    1 +
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     |   19 ++++++++-----------
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c     |   14 +++++++++++---
 src/gallium/drivers/freedreno/freedreno_context.c |    2 +-
 src/gallium/drivers/freedreno/freedreno_context.h |   16 +++++++++-------
 src/gallium/drivers/freedreno/freedreno_draw.h    |    2 +-
 src/gallium/drivers/freedreno/freedreno_gmem.c    |    6 +++++-
 7 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index be710d1..83024c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -249,6 +249,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 			{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
 		}, 1);
 
+	fd_wfi(ctx, ring);
 	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 3ca49ff..a364fbf 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -64,15 +64,6 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
 		src = SS_DIRECT;
 	}
 
-	/* we have this sometimes, not others.. perhaps we could be clever
-	 * and figure out actually when we need to invalidate cache:
-	 */
-	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
-
 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
 			CP_LOAD_STATE_0_STATE_SRC(src) |
@@ -458,8 +449,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
 	}
 
-	if (dirty & FD_DIRTY_PROG)
+	if (dirty & FD_DIRTY_PROG) {
+		fd_wfi(ctx, ring);
 		fd3_program_emit(ring, prog, binning);
+	}
 
 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 	OUT_RING(ring, HLSQ_FLUSH);
@@ -467,6 +460,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
 			/* evil hack to deal sanely with clear path: */
 			(prog == &ctx->prog)) {
+		fd_wfi(ctx, ring);
 		emit_constants(ring,  SB_VERT_SHADER,
 				&ctx->constbuf[PIPE_SHADER_VERTEX],
 				(prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
@@ -501,6 +495,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
 	}
 
+	if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
+		fd_wfi(ctx, ring);
+
 	if (dirty & FD_DIRTY_VERTTEX)
 		emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
 
@@ -638,5 +635,5 @@ fd3_emit_restore(struct fd_context *ctx)
 	OUT_RING(ring, 0x00000000);
 
 	emit_cache_flush(ring);
-	fd_rmw_wfi(ctx, ring);
+	fd_wfi(ctx, ring);
 }
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index a484544..2eb2024 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -146,6 +146,7 @@ emit_binning_workaround(struct fd_context *ctx)
 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
 
+	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->solid_prog, false);
 
 	fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
@@ -237,6 +238,7 @@ emit_binning_workaround(struct fd_context *ctx)
 	OUT_RING(ring, 2);            /* NumIndices */
 	OUT_RING(ring, 2);
 	OUT_RING(ring, 1);
+	fd_reset_wfi(ctx);
 
 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
@@ -244,8 +246,7 @@ emit_binning_workaround(struct fd_context *ctx)
 	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_WFI(ring);
-
+	fd_wfi(ctx, ring);
 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
 	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
 			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
@@ -363,6 +364,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
+	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->solid_prog, false);
 
 	fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
@@ -403,6 +405,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
 
 	emit_mrt(ring, 1, &psurf, &base, bin_w);
 
+	fd_wfi(ctx, ring);
 	fd3_emit_gmem_restore_tex(ring, psurf);
 
 	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
@@ -508,6 +511,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
+	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->blit_prog, false);
 
 	fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
@@ -685,6 +689,9 @@ emit_binning_pass(struct fd_context *ctx)
 
 	/* emit IB to binning drawcmds: */
 	OUT_IB(ring, ctx->binning_start, ctx->binning_end);
+	fd_reset_wfi(ctx);
+
+	fd_wfi(ctx, ring);
 
 	/* and then put stuff back the way it was: */
 
@@ -722,6 +729,7 @@ emit_binning_pass(struct fd_context *ctx)
 		OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
 				INDEX_SIZE_IGN, IGNORE_VISIBILITY));
 		OUT_RING(ring, 0);             /* NumIndices */
+		fd_reset_wfi(ctx);
 	}
 
 	OUT_PKT3(ring, CP_NOP, 4);
@@ -730,7 +738,7 @@ emit_binning_pass(struct fd_context *ctx)
 	OUT_RING(ring, 0x00000000);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_WFI(ring);
+	fd_wfi(ctx, ring);
 
 	if (ctx->screen->gpu_id == 320) {
 		emit_binning_workaround(ctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 33abb31..f0485d8 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -206,7 +206,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
 	}
 
 	fd_context_next_rb(pctx);
-	fd_reset_rmw_state(ctx);
+	fd_reset_wfi(ctx);
 
 	util_dynarray_init(&ctx->draw_patches);
 
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 0364d0f..ef83048 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -168,7 +168,7 @@ struct fd_context {
 	/* Keep track if WAIT_FOR_IDLE is needed for registers we need
 	 * to update via RMW:
 	 */
-	bool rmw_needs_wfi;
+	bool needs_wfi;
 
 	/* Keep track of DRAW initiators that need to be patched up depending
 	 * on whether we using binning or not:
@@ -275,18 +275,20 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim)
 }
 
 static INLINE void
-fd_reset_rmw_state(struct fd_context *ctx)
+fd_reset_wfi(struct fd_context *ctx)
 {
-	ctx->rmw_needs_wfi = true;
+	ctx->needs_wfi = true;
 }
 
-/* emit before a RMW a WAIT_FOR_IDLE only if needed: */
+/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
+ * been one since last draw:
+ */
 static inline void
-fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
+fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
 {
-	if (ctx->rmw_needs_wfi) {
+	if (ctx->needs_wfi) {
 		OUT_WFI(ring);
-		ctx->rmw_needs_wfi = false;
+		ctx->needs_wfi = false;
 	}
 }
 
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h
index 608d071..fe1c548 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.h
+++ b/src/gallium/drivers/freedreno/freedreno_draw.h
@@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 	emit_marker(ring, 7);
 
-	ctx->rmw_needs_wfi = true;
+	fd_reset_wfi(ctx);
 }
 
 #endif /* FREEDRENO_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 6a55aa4..80cf7c8 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -278,6 +278,7 @@ render_tiles(struct fd_context *ctx)
 
 		/* emit IB to drawcmds: */
 		OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+		fd_reset_wfi(ctx);
 
 		/* emit gmem2mem to transfer tile back to system memory: */
 		ctx->emit_tile_gmem2mem(ctx, tile);
@@ -291,6 +292,7 @@ render_sysmem(struct fd_context *ctx)
 
 	/* emit IB to drawcmds: */
 	OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+	fd_reset_wfi(ctx);
 }
 
 void
@@ -314,6 +316,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 	fd_ringmarker_mark(ctx->draw_end);
 	fd_ringmarker_mark(ctx->binning_end);
 
+	fd_reset_wfi(ctx);
+
 	ctx->stats.batch_total++;
 
 	if (sysmem) {
@@ -339,7 +343,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 	fd_ringmarker_mark(ctx->draw_start);
 	fd_ringmarker_mark(ctx->binning_start);
 
-	fd_reset_rmw_state(ctx);
+	fd_reset_wfi(ctx);
 
 	/* update timestamps on render targets: */
 	timestamp = fd_ringbuffer_timestamp(ctx->ring);




More information about the mesa-commit mailing list