[Mesa-dev] [PATCH 5/5] radeonsi/gfx9: set PA_SC_TILE_STEERING_OVERRIDE for faster blits to GART

Marek Olšák maraeo at gmail.com
Sun Mar 11 18:11:13 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

The improvement is +3.5%, not much.
---
 src/gallium/drivers/radeonsi/si_pipe.h  |  9 +++++----
 src/gallium/drivers/radeonsi/si_state.c | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2053dcb..6f5939b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -281,24 +281,25 @@ struct si_framebuffer {
 	unsigned			spi_shader_col_format;
 	unsigned			spi_shader_col_format_alpha;
 	unsigned			spi_shader_col_format_blend;
 	unsigned			spi_shader_col_format_blend_alpha;
 	ubyte				nr_samples:5; /* at most 16xAA */
 	ubyte				log_samples:3; /* at most 4 = 16xAA */
 	ubyte				compressed_cb_mask;
 	ubyte				color_is_int8;
 	ubyte				color_is_int10;
 	ubyte				dirty_cbufs;
-	bool				dirty_zsbuf;
-	bool				any_dst_linear;
-	bool				CB_has_shader_readable_metadata;
-	bool				DB_has_shader_readable_metadata;
+	bool				dirty_zsbuf:1;
+	bool				any_dst_linear:1;
+	bool				blitting_to_gart:1;
+	bool				CB_has_shader_readable_metadata:1;
+	bool				DB_has_shader_readable_metadata:1;
 };
 
 struct si_signed_scissor {
 	int minx;
 	int miny;
 	int maxx;
 	int maxy;
 };
 
 struct si_scissors {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index aae7332..b0bd11d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2824,20 +2824,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	sctx->framebuffer.spi_shader_col_format_alpha = 0;
 	sctx->framebuffer.spi_shader_col_format_blend = 0;
 	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
 	sctx->framebuffer.color_is_int8 = 0;
 	sctx->framebuffer.color_is_int10 = 0;
 
 	sctx->framebuffer.compressed_cb_mask = 0;
 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
 	sctx->framebuffer.any_dst_linear = false;
+	/* This will be set to false later if any color buffer is not in GART. */
+	sctx->framebuffer.blitting_to_gart = sctx->blitter->running &&
+					     !sctx->blitter->leaving &&
+					     state->nr_cbufs &&
+					     state->cbufs[0];
 	sctx->framebuffer.CB_has_shader_readable_metadata = false;
 	sctx->framebuffer.DB_has_shader_readable_metadata = false;
 
 	for (i = 0; i < state->nr_cbufs; i++) {
 		if (!state->cbufs[i])
 			continue;
 
 		surf = (struct r600_surface*)state->cbufs[i];
 		rtex = (struct r600_texture*)surf->base.texture;
 
@@ -2860,20 +2865,23 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		if (surf->color_is_int10)
 			sctx->framebuffer.color_is_int10 |= 1 << i;
 
 		if (rtex->fmask.size) {
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
 		}
 
 		if (rtex->surface.is_linear)
 			sctx->framebuffer.any_dst_linear = true;
 
+		if (rtex->resource.domains & RADEON_DOMAIN_VRAM)
+			sctx->framebuffer.blitting_to_gart = false;
+
 		if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
 			sctx->framebuffer.CB_has_shader_readable_metadata = true;
 
 		si_context_add_resource_size(ctx, surf->base.texture);
 
 		p_atomic_inc(&rtex->framebuffers_bound);
 
 		if (rtex->dcc_gather_statistics) {
 			/* Dirty tracking must be enabled for DCC usage analysis. */
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -3183,20 +3191,34 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
 	}
 
 	/* Framebuffer dimensions. */
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
 	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
 
+	if (sctx->b.chip_class >= GFX9 &&
+	    sctx->screen->info.has_dedicated_vram) {
+		/* For copies to GART, it is faster (although very unintuitive)
+		 * to disable all but one RB. If all RBs were banging away on
+		 * the PCIE bus, it would produce more traffic than the write-
+		 * combiner can efficiently handle.
+		 */
+		radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
+				       sctx->framebuffer.blitting_to_gart ?
+					       S_02835C_ENABLE(1) |
+					       S_02835C_NUM_SE(1) |
+					       S_02835C_NUM_RB_PER_SE(1) : 0);
+	}
+
 	if (sctx->screen->dfsm_allowed) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
 	}
 
 	sctx->framebuffer.dirty_cbufs = 0;
 	sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
-- 
2.7.4



More information about the mesa-dev mailing list