[Mesa-dev] [PATCH 5/5] radeonsi/gfx9: set PA_SC_TILE_STEERING_OVERRIDE for faster blits to GART
Marek Olšák
maraeo at gmail.com
Sun Mar 11 18:11:13 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
The improvement is +3.5%, not much.
---
src/gallium/drivers/radeonsi/si_pipe.h | 9 +++++----
src/gallium/drivers/radeonsi/si_state.c | 22 ++++++++++++++++++++++
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2053dcb..6f5939b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -281,24 +281,25 @@ struct si_framebuffer {
unsigned spi_shader_col_format;
unsigned spi_shader_col_format_alpha;
unsigned spi_shader_col_format_blend;
unsigned spi_shader_col_format_blend_alpha;
ubyte nr_samples:5; /* at most 16xAA */
ubyte log_samples:3; /* at most 4 = 16xAA */
ubyte compressed_cb_mask;
ubyte color_is_int8;
ubyte color_is_int10;
ubyte dirty_cbufs;
- bool dirty_zsbuf;
- bool any_dst_linear;
- bool CB_has_shader_readable_metadata;
- bool DB_has_shader_readable_metadata;
+ bool dirty_zsbuf:1;
+ bool any_dst_linear:1;
+ bool blitting_to_gart:1;
+ bool CB_has_shader_readable_metadata:1;
+ bool DB_has_shader_readable_metadata:1;
};
struct si_signed_scissor {
int minx;
int miny;
int maxx;
int maxy;
};
struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index aae7332..b0bd11d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2824,20 +2824,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.spi_shader_col_format_alpha = 0;
sctx->framebuffer.spi_shader_col_format_blend = 0;
sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
sctx->framebuffer.color_is_int8 = 0;
sctx->framebuffer.color_is_int10 = 0;
sctx->framebuffer.compressed_cb_mask = 0;
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
sctx->framebuffer.any_dst_linear = false;
+ /* This will be set to false later if any color buffer is not in GART. */
+ sctx->framebuffer.blitting_to_gart = sctx->blitter->running &&
+ !sctx->blitter->leaving &&
+ state->nr_cbufs &&
+ state->cbufs[0];
sctx->framebuffer.CB_has_shader_readable_metadata = false;
sctx->framebuffer.DB_has_shader_readable_metadata = false;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
continue;
surf = (struct r600_surface*)state->cbufs[i];
rtex = (struct r600_texture*)surf->base.texture;
@@ -2860,20 +2865,23 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (surf->color_is_int10)
sctx->framebuffer.color_is_int10 |= 1 << i;
if (rtex->fmask.size) {
sctx->framebuffer.compressed_cb_mask |= 1 << i;
}
if (rtex->surface.is_linear)
sctx->framebuffer.any_dst_linear = true;
+ if (rtex->resource.domains & RADEON_DOMAIN_VRAM)
+ sctx->framebuffer.blitting_to_gart = false;
+
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
sctx->framebuffer.CB_has_shader_readable_metadata = true;
si_context_add_resource_size(ctx, surf->base.texture);
p_atomic_inc(&rtex->framebuffers_bound);
if (rtex->dcc_gather_statistics) {
/* Dirty tracking must be enabled for DCC usage analysis. */
sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -3183,20 +3191,34 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
}
/* Framebuffer dimensions. */
/* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+ if (sctx->b.chip_class >= GFX9 &&
+ sctx->screen->info.has_dedicated_vram) {
+ /* For copies to GART, it is faster (although very unintuitive)
+ * to disable all but one RB. If all RBs were banging away on
+ * the PCIE bus, it would produce more traffic than the write-
+ * combiner can efficiently handle.
+ */
+ radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
+ sctx->framebuffer.blitting_to_gart ?
+ S_02835C_ENABLE(1) |
+ S_02835C_NUM_SE(1) |
+ S_02835C_NUM_RB_PER_SE(1) : 0);
+ }
+
if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
}
static void si_emit_msaa_sample_locs(struct si_context *sctx,
--
2.7.4
More information about the mesa-dev
mailing list