Mesa (master): freedreno/a3xx: WFI fixes/cleanup

Rob Clark robclark at kemper.freedesktop.org
Sun Jun 22 11:36:00 UTC 2014


Module: Mesa
Branch: master
Commit: c63450e8298a34aa0f4077a846b5b0467cdeb567
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c63450e8298a34aa0f4077a846b5b0467cdeb567

Author: Rob Clark <robclark at freedesktop.org>
Date:   Fri Jun 13 17:39:59 2014 -0400

freedreno/a3xx: WFI fixes/cleanup

Blob driver seems to need WFI in some cases after CP_EVENT_WRITE,
implying that this is asynchronous and should reset needs_wfi.
Also, CP_INVALIDATE_STATE seems to need WFI.  But CP_LOAD_STATE
does not.

The blob driver also puts WFIs before writing GRAS_CL_VPORT registers.
The latter may be a work-around, as these registers should be banked/
context registers.  I haven't yet found a lockup that this averts, but
I expect viewport to change infrequently so out of paranoia I will
keep these for now.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_draw.c     |    8 ++--
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     |   45 ++++++++-------------
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c     |   22 +++++-----
 src/gallium/drivers/freedreno/a3xx/fd3_query.c    |    3 +-
 src/gallium/drivers/freedreno/freedreno_context.h |   11 +++++
 5 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index f822aa7..4b2d941 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -154,8 +154,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
 	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
 	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
 
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, PERFCOUNTER_STOP);
+	fd_event_write(ctx, ring, PERFCOUNTER_STOP);
 
 	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
 			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -195,6 +194,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 				A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
 				A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
 
+		fd_wfi(ctx, ring);
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
@@ -276,7 +276,6 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 				.format = PIPE_FORMAT_R32G32B32_FLOAT,
 			}}, 1);
 
-	fd_wfi(ctx, ring);
 	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
@@ -292,8 +291,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 	OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
 	OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
 
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, PERFCOUNTER_STOP);
+	fd_event_write(ctx, ring, PERFCOUNTER_STOP);
 
 	fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
 			DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 878d6ff..f77c722 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -234,26 +234,6 @@ emit_textures(struct fd_ringbuffer *ring,
 	}
 }
 
-static void
-emit_cache_flush(struct fd_ringbuffer *ring)
-{
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, CACHE_FLUSH);
-
-	/* probably only really needed on a320: */
-	OUT_PKT3(ring, CP_DRAW_INDX, 3);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
-			INDEX_SIZE_IGN, IGNORE_VISIBILITY));
-	OUT_RING(ring, 0);					/* NumIndices */
-
-	OUT_PKT3(ring, CP_NOP, 4);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000000);
-}
-
 /* emit texture state for mem->gmem restore operation.. eventually it would
  * be good to get rid of this and use normal CSO/etc state for more of these
  * special cases, but for now the compiler is not sufficient..
@@ -492,6 +472,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	}
 
 	if (dirty & FD_DIRTY_VIEWPORT) {
+		fd_wfi(ctx, ring);
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
@@ -502,17 +483,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	}
 
 	if (dirty & FD_DIRTY_PROG) {
-		fd_wfi(ctx, ring);
 		fd3_program_emit(ring, prog, key);
 	}
 
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, HLSQ_FLUSH);
-
 	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
 			/* evil hack to deal sanely with clear path: */
 			(prog == &ctx->prog)) {
-		fd_wfi(ctx, ring);
 		emit_constants(ring,  SB_VERT_SHADER,
 				&ctx->constbuf[PIPE_SHADER_VERTEX],
 				(prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
@@ -549,8 +525,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
 	}
 
-	if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
-		fd_wfi(ctx, ring);
 
 	if (dirty & FD_DIRTY_VERTTEX) {
 		if (vp->has_samp)
@@ -586,6 +560,7 @@ fd3_emit_restore(struct fd_context *ctx)
 		OUT_RING(ring, 0x00000000);
 	}
 
+	fd_wfi(ctx, ring);
 	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
 	OUT_RING(ring, 0x00007fff);
 
@@ -696,7 +671,21 @@ fd3_emit_restore(struct fd_context *ctx)
 	OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
 	OUT_RING(ring, 0x00000000);
 
-	emit_cache_flush(ring);
+	fd_event_write(ctx, ring, CACHE_FLUSH);
+
+	/* probably only really needed on a320: */
+	OUT_PKT3(ring, CP_DRAW_INDX, 3);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+			INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+	OUT_RING(ring, 0);					/* NumIndices */
+
+	OUT_PKT3(ring, CP_NOP, 4);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+
 	fd_wfi(ctx, ring);
 
 	ctx->needs_rb_fbd = true;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 0c9dd99..8519a90 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -177,7 +177,6 @@ emit_binning_workaround(struct fd_context *ctx)
 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
 
-	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->solid_prog, key);
 	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
 			(struct fd3_vertex_buf[]) {{
@@ -245,6 +244,7 @@ emit_binning_workaround(struct fd_context *ctx)
 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
 
+	fd_wfi(ctx, ring);
 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
@@ -356,6 +356,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
 
+	fd_wfi(ctx, ring);
 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
@@ -397,7 +398,6 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
-	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->solid_prog, key);
 	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
 			(struct fd3_vertex_buf[]) {{
@@ -435,7 +435,6 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
 
 	emit_mrt(ring, 1, &psurf, &base, bin_w);
 
-	fd_wfi(ctx, ring);
 	fd3_emit_gmem_restore_tex(ring, psurf);
 
 	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
@@ -487,12 +486,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 
+	fd_wfi(ctx, ring);
 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
 
 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
 
+	fd_wfi(ctx, ring);
 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
@@ -541,7 +542,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
-	fd_wfi(ctx, ring);
 	fd3_program_emit(ring, &ctx->blit_prog, key);
 	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
 			(struct fd3_vertex_buf[]) {{
@@ -677,7 +677,7 @@ emit_binning_pass(struct fd_context *ctx)
 
 	if (ctx->screen->gpu_id == 320) {
 		emit_binning_workaround(ctx);
-
+		fd_wfi(ctx, ring);
 		OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
 		OUT_RING(ring, 0x00007fff);
 	}
@@ -760,8 +760,8 @@ emit_binning_pass(struct fd_context *ctx)
 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, CACHE_FLUSH);
+	fd_event_write(ctx, ring, CACHE_FLUSH);
+	fd_wfi(ctx, ring);
 
 	if (ctx->screen->gpu_id == 320) {
 		/* dummy-draw workaround: */
@@ -877,17 +877,13 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
 
 		assert(pipe->w * pipe->h);
 
-		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-		OUT_RING(ring, HLSQ_FLUSH);
-
-		OUT_WFI(ring);
+		fd_event_write(ctx, ring, HLSQ_FLUSH);
+		fd_wfi(ctx, ring);
 
 		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
 		OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
 				A3XX_PC_VSTREAM_CONTROL_N(tile->n));
 
-		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-		OUT_RING(ring, CACHE_FLUSH);
 
 		OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
 		OUT_RELOC(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
index 77ae8b6..34e0b5a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_query.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
@@ -68,8 +68,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
 			INDEX_SIZE_IGN, USE_VISIBILITY));
 	OUT_RING(ring, 0);             /* NumIndices */
 
-	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
-	OUT_RING(ring, ZPASS_DONE);
+	fd_event_write(ctx, ring, ZPASS_DONE);
 
 	OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
 	OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 4698482..0051c9d 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -363,6 +363,17 @@ fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
 	}
 }
 
+/* emit a CP_EVENT_WRITE:
+ */
+static inline void
+fd_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		enum vgt_event_type evt)
+{
+	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+	OUT_RING(ring, evt);
+	fd_reset_wfi(ctx);
+}
+
 struct pipe_context * fd_context_init(struct fd_context *ctx,
 		struct pipe_screen *pscreen, const uint8_t *primtypes,
 		void *priv);




More information about the mesa-commit mailing list