Mesa (master): freedreno/a3xx: use cs patch instead of RFI+RMW

Rob Clark robclark at kemper.freedesktop.org
Wed Jan 8 21:52:49 UTC 2014


Module: Mesa
Branch: master
Commit: 725d736f6a6a14d10223888d585ddab80ee803f0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=725d736f6a6a14d10223888d585ddab80ee803f0

Author: Rob Clark <robclark at freedesktop.org>
Date:   Wed Jan  8 10:06:52 2014 -0500

freedreno/a3xx: use cs patch instead of RFI+RMW

Since we now have the cmdstream patch mechanism needed for hw binning,
might as well also use it for RB_RENDER_CONTROL updates.  This avoids
the need to use RMW (and associated WFI) to update RB_RENDER_CONTROL.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_context.c  |    4 ++++
 src/gallium/drivers/freedreno/a3xx/fd3_context.h  |    5 +++++
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c     |    5 +++--
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c     |   15 +++++++++++--
 src/gallium/drivers/freedreno/a3xx/fd3_emit.h     |   24 --------------------
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c     |   25 +++++++++++++--------
 src/gallium/drivers/freedreno/freedreno_context.h |   18 ++++-----------
 src/gallium/drivers/freedreno/freedreno_draw.h    |    2 +-
 8 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 2346719..3732896 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -44,6 +44,8 @@ fd3_context_destroy(struct pipe_context *pctx)
 
 	fd3_prog_fini(pctx);
 
+	util_dynarray_fini(&fd3_ctx->rbrc_patches);
+
 	fd_bo_del(fd3_ctx->vs_pvt_mem);
 	fd_bo_del(fd3_ctx->fs_pvt_mem);
 	fd_bo_del(fd3_ctx->vsc_size_mem);
@@ -119,6 +121,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
 	if (!pctx)
 		return NULL;
 
+	util_dynarray_init(&fd3_ctx->rbrc_patches);
+
 	fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
 			DRM_FREEDRENO_GEM_TYPE_KMEM);
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 3599fe1..26c8cc7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -36,6 +36,11 @@
 struct fd3_context {
 	struct fd_context base;
 
+	/* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+	 * once we know whether or not to use GMEM, and GMEM tile pitch.
+	 */
+	struct util_dynarray rbrc_patches;
+
 	struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
 
 	/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 4c90d98..4f28b0e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -162,8 +162,9 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 	OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
 			A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
 
-	fd3_emit_rbrc_draw_state(ctx, ring,
-			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+	OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
+			&fd3_ctx->rbrc_patches);
 
 	if (buffers & PIPE_CLEAR_DEPTH) {
 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 9cfe4dd..c479666 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -353,8 +353,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
 		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
 
-		if (!binning)
-			fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
+		if (!binning) {
+			struct fd3_context *fd3_ctx = fd3_context(ctx);
+
+			/* I suppose if we needed to (which I don't *think* we need
+			 * to), we could emit this for binning pass too.  But we
+			 * would need to keep a different patch-list for binning
+			 * vs render pass.
+			 */
+
+			OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+			OUT_RINGP(ring, zsa->rb_render_control,
+					&fd3_ctx->rbrc_patches);
+		}
 
 		OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
 		OUT_RING(ring, zsa->rb_alpha_ref);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 50559d1..1b4774d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -62,28 +62,4 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		uint32_t dirty, bool binning);
 void fd3_emit_restore(struct fd_context *ctx);
 
-
-/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the
- * GMEM/binning code is deciding on the bin-width (and whether to
- * use binning) after the draw/clear state is emitted.
- */
-
-#define RBRC_DRAW_STATE  (A3XX_RB_RENDER_CONTROL_ALPHA_TEST | \
-		A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK)
-
-static inline void
-fd3_emit_rbrc_draw_state(struct fd_context *ctx,
-		struct fd_ringbuffer *ring, uint32_t val)
-{
-	assert(!(val & ~RBRC_DRAW_STATE));
-	if (val != ctx->rmw.rbrc_draw) {
-		fd_rmw_wfi(ctx, ring);
-		OUT_PKT3(ring, CP_REG_RMW, 3);
-		OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
-		OUT_RING(ring, ~RBRC_DRAW_STATE);
-		OUT_RING(ring, val);
-		ctx->rmw.rbrc_draw = val;
-	}
-}
-
 #endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 8720e08..a08b482 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -544,6 +544,18 @@ patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
 	util_dynarray_resize(&ctx->draw_patches, 0);
 }
 
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+	struct fd3_context *fd3_ctx = fd3_context(ctx);
+	unsigned i;
+	for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) {
+		struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i);
+		*patch->cs = patch->val | val;
+	}
+	util_dynarray_resize(&fd3_ctx->rbrc_patches, 0);
+}
+
 /* for rendering directly to system memory: */
 static void
 fd3_emit_sysmem_prep(struct fd_context *ctx)
@@ -563,10 +575,6 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 
 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
 
-	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
-	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
-			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
-
 	/* setup scissor/offset for current tile: */
 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
@@ -584,6 +592,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
 
 	patch_draws(ctx, IGNORE_VISIBILITY);
+	patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
 }
 
 static void
@@ -757,6 +766,9 @@ fd3_emit_tile_init(struct fd_context *ctx)
 	} else {
 		patch_draws(ctx, IGNORE_VISIBILITY);
 	}
+
+	patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 }
 
 /* before mem2gmem */
@@ -837,11 +849,6 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
 
 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
 
-	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
-	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
-			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
-			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
-
 	/* setup scissor/offset for current tile: */
 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index a0227e4..5373de6 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -159,16 +159,7 @@ struct fd_context {
 	/* Keep track if WAIT_FOR_IDLE is needed for registers we need
 	 * to update via RMW:
 	 */
-	struct {
-		bool need_wfi;
-		/* note: would be nicer to have in fd3_context, fd2_context,
-		 * etc, because the registered modified via RMR differ across
-		 * generation.  But as long as it is a small set of registers
-		 * that might be more hassle than it's worth.
-		 */
-		/* state for RB_RENDER_CONTROL: */
-		uint32_t rbrc_draw;
-	} rmw;
+	bool rmw_needs_wfi;
 
 	/* Keep track of DRAW initiators that need to be patched up depending
 	 * on whether we using binning or not:
@@ -277,17 +268,16 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim)
 static INLINE void
 fd_reset_rmw_state(struct fd_context *ctx)
 {
-	ctx->rmw.need_wfi = true;
-	ctx->rmw.rbrc_draw = ~0;
+	ctx->rmw_needs_wfi = true;
 }
 
 /* emit before a RMW a WAIT_FOR_IDLE only if needed: */
 static inline void
 fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
 {
-	if (ctx->rmw.need_wfi) {
+	if (ctx->rmw_needs_wfi) {
 		OUT_WFI(ring);
-		ctx->rmw.need_wfi = false;
+		ctx->rmw_needs_wfi = false;
 	}
 }
 
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h
index e8bb420..608d071 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.h
+++ b/src/gallium/drivers/freedreno/freedreno_draw.h
@@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 	emit_marker(ring, 7);
 
-	ctx->rmw.need_wfi = true;
+	ctx->rmw_needs_wfi = true;
 }
 
 #endif /* FREEDRENO_DRAW_H_ */




More information about the mesa-commit mailing list