[Mesa-dev] [PATCH 18/42] radeonsi: avoid redundant CB and DB register updates

Marek Olšák maraeo at gmail.com
Sun Aug 30 12:11:48 PDT 2015


From: Marek Olšák <marek.olsak at amd.com>

The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.

Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.
---
 src/gallium/drivers/r600/r600_blit.c          |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
 src/gallium/drivers/radeon/r600_texture.c     |  4 +++-
 src/gallium/drivers/radeonsi/si_blit.c        |  7 +++++--
 src/gallium/drivers/radeonsi/si_hw_context.c  |  5 ++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |  2 ++
 src/gallium/drivers/radeonsi/si_state.c       | 26 ++++++++++++++++++++------
 7 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 22a0950..08b2f64 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 
 	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
 		evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
-					      &buffers, color);
+					      &buffers, NULL, color);
 		if (!buffers)
 			return; /* all buffers have been fast cleared */
 	}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 29db1cc..d22c230 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   struct pipe_framebuffer_state *fb,
 				   struct r600_atom *fb_state,
-				   unsigned *buffers,
+				   unsigned *buffers, unsigned *dirty_cbufs,
 				   const union pipe_color_union *color);
 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
 void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 5469691..89f18fb 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   struct pipe_framebuffer_state *fb,
 				   struct r600_atom *fb_state,
-				   unsigned *buffers,
+				   unsigned *buffers, unsigned *dirty_cbufs,
 				   const union pipe_color_union *color)
 {
 	int i;
@@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   tex->cmask.offset, tex->cmask.size, 0, true);
 
 		tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+		if (dirty_cbufs)
+			*dirty_cbufs |= 1 << i;
 		rctx->set_atom_dirty(rctx, fb_state, true);
 		*buffers &= ~clear_bit;
 	}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c28b2a8..d1486bd 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
 
 	if (buffers & PIPE_CLEAR_COLOR) {
-		evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
-					      &buffers, color);
+		evergreen_do_fast_color_clear(&sctx->b, fb,
+					      &sctx->framebuffer.atom, &buffers,
+					      &sctx->framebuffer.dirty_cbufs,
+					      color);
 		if (!buffers)
 			return; /* all buffers have been fast cleared */
 	}
@@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		}
 
 		zstex->depth_clear_value = depth;
+		sctx->framebuffer.dirty_zsbuf = true;
 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
 		sctx->db_depth_clear = true;
 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 2381b6c..5613781 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx)
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
 
-	si_mark_atom_dirty(ctx, &ctx->clip_regs);
+	ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+	ctx->framebuffer.dirty_zsbuf = true;
 	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+	si_mark_atom_dirty(ctx, &ctx->clip_regs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 47ad619..9be4aa7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -127,6 +127,8 @@ struct si_framebuffer {
 	unsigned			cb0_is_integer;
 	unsigned			compressed_cb_mask;
 	unsigned			export_16bpc;
+	unsigned			dirty_cbufs;
+	bool				dirty_zsbuf;
 };
 
 struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 88964e1..3c25048 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 			 SI_CONTEXT_INV_TC_L2 |
 			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
 
+	/* Take the maximum of the old and new count. If the new count is lower,
+	 * dirtying is needed to disable the unbound colorbuffers.
+	 */
+	sctx->framebuffer.dirty_cbufs |=
+		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
 	sctx->framebuffer.export_16bpc = 0;
@@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
 	/* Colorbuffers. */
 	for (i = 0; i < nr_cbufs; i++) {
+		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+			continue;
+
 		cb = (struct r600_surface*)state->cbufs[i];
 		if (!cb) {
 			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			radeon_emit(cs, 0);	/* R_028C94_CB_COLOR0_DCC_BASE */
 	}
 	/* set CB_COLOR1_INFO for possible dual-src blending */
-	if (i == 1 && state->cbufs[0]) {
+	if (i == 1 && state->cbufs[0] &&
+	    sctx->framebuffer.dirty_cbufs & (1 << 0)) {
 		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
 				       cb->cb_color_info | tex->cb_color_info);
 		i++;
 	}
-	for (; i < 8 ; i++) {
-		r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
-	}
+	for (; i < 8 ; i++)
+		if (sctx->framebuffer.dirty_cbufs & (1 << i))
+			r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
 	/* ZS buffer. */
-	if (state->zsbuf) {
+	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
@@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
 		r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       zb->pa_su_poly_offset_db_fmt_cntl);
-	} else {
+	} else if (sctx->framebuffer.dirty_zsbuf) {
 		r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
@@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
 	r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+	sctx->framebuffer.dirty_cbufs = 0;
+	sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
-- 
2.1.4



More information about the mesa-dev mailing list