[Mesa-dev] [PATCH 18/42] radeonsi: avoid redundant CB and DB register updates
Marek Olšák
maraeo at gmail.com
Sun Aug 30 12:11:48 PDT 2015
From: Marek Olšák <marek.olsak at amd.com>
The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.
Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.
---
src/gallium/drivers/r600/r600_blit.c | 2 +-
src/gallium/drivers/radeon/r600_pipe_common.h | 2 +-
src/gallium/drivers/radeon/r600_texture.c | 4 +++-
src/gallium/drivers/radeonsi/si_blit.c | 7 +++++--
src/gallium/drivers/radeonsi/si_hw_context.c | 5 ++++-
src/gallium/drivers/radeonsi/si_pipe.h | 2 ++
src/gallium/drivers/radeonsi/si_state.c | 26 ++++++++++++++++++++------
7 files changed, 36 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 22a0950..08b2f64 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
- &buffers, color);
+ &buffers, NULL, color);
if (!buffers)
return; /* all buffers have been fast cleared */
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 29db1cc..d22c230 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
- unsigned *buffers,
+ unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 5469691..89f18fb 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
- unsigned *buffers,
+ unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color)
{
int i;
@@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+ if (dirty_cbufs)
+ *dirty_cbufs |= 1 << i;
rctx->set_atom_dirty(rctx, fb_state, true);
*buffers &= ~clear_bit;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c28b2a8..d1486bd 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
if (buffers & PIPE_CLEAR_COLOR) {
- evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
- &buffers, color);
+ evergreen_do_fast_color_clear(&sctx->b, fb,
+ &sctx->framebuffer.atom, &buffers,
+ &sctx->framebuffer.dirty_cbufs,
+ color);
if (!buffers)
return; /* all buffers have been fast cleared */
}
@@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
}
zstex->depth_clear_value = depth;
+ sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 2381b6c..5613781 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx)
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
- si_mark_atom_dirty(ctx, &ctx->clip_regs);
+ ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+ ctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+ si_mark_atom_dirty(ctx, &ctx->clip_regs);
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 47ad619..9be4aa7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -127,6 +127,8 @@ struct si_framebuffer {
unsigned cb0_is_integer;
unsigned compressed_cb_mask;
unsigned export_16bpc;
+ unsigned dirty_cbufs;
+ bool dirty_zsbuf;
};
struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 88964e1..3c25048 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ /* Take the maximum of the old and new count. If the new count is lower,
+ * dirtying is needed to disable the unbound colorbuffers.
+ */
+ sctx->framebuffer.dirty_cbufs |=
+ (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+ sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
sctx->framebuffer.export_16bpc = 0;
@@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
+ if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+ continue;
+
cb = (struct r600_surface*)state->cbufs[i];
if (!cb) {
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
- if (i == 1 && state->cbufs[0]) {
+ if (i == 1 && state->cbufs[0] &&
+ sctx->framebuffer.dirty_cbufs & (1 << 0)) {
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
i++;
}
- for (; i < 8 ; i++) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
- }
+ for (; i < 8 ; i++)
+ if (sctx->framebuffer.dirty_cbufs & (1 << i))
+ r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
/* ZS buffer. */
- if (state->zsbuf) {
+ if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
@@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
- } else {
+ } else if (sctx->framebuffer.dirty_zsbuf) {
r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
@@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
/* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+ sctx->framebuffer.dirty_cbufs = 0;
+ sctx->framebuffer.dirty_zsbuf = false;
}
static void si_emit_msaa_sample_locs(struct si_context *sctx,
--
2.1.4
More information about the mesa-dev
mailing list