[Mesa-dev] [PATCH] radeonsi: Optimize out exports to unbound color buffers

Jay Cornwall jay at jcornwall.me
Sun Oct 20 02:33:02 CEST 2013


This patch identifies shader exports to unbound CBs and removes them during
TGSI to LLVM IR lowering. The method is identical to the one used in the
gallium/r600 driver.

The GLSL lower_output_reads pass generates temporary copies for writes to
shader outputs. In the case of gl_FragData, this results in writes to every
MRT when one or more elements are written in the shader. When these MRTs
are unbound and masked out there is still a performance loss equivalent to
exports to bound, unmasked MRTs on SI.

Signed-off-by: Jay Cornwall <jay at jcornwall.me>
---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 +
 src/gallium/drivers/radeonsi/radeonsi_shader.c |  6 ++++++
 src/gallium/drivers/radeonsi/si_state.c        | 15 +++++++++++++--
 src/gallium/drivers/radeonsi/si_state.h        |  1 +
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 26f7e09..bede043 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -148,6 +148,7 @@ struct r600_context {
 	unsigned			fb_compressed_cb_mask;
 	unsigned			pa_sc_line_stipple;
 	unsigned			pa_su_sc_mode_cntl;
+	boolean				dual_src_blend;
 	/* for saving when using blitter */
 	struct pipe_stencil_ref		stencil_ref;
 	struct si_pipe_shader_selector	*ps_shader;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 80ee325..e4a9f56 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -995,6 +995,12 @@ handle_semantic:
 					semantic_name);
 			}
 
+			/* Shader is keyed on nr_cbufs, optimize out exports to unbound CBs */
+			if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+			    semantic_name == TGSI_SEMANTIC_COLOR &&
+			    color_count > si_shader_ctx->shader->key.ps.nr_cbufs)
+				continue;
+
 			si_llvm_init_export_args(bld_base, d, index, target, args);
 
 			if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index da7c3d0..8109bde 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -27,6 +27,7 @@
 #include "util/u_memory.h"
 #include "util/u_framebuffer.h"
 #include "util/u_blitter.h"
+#include "util/u_dual_blend.h"
 #include "util/u_helpers.h"
 #include "util/u_math.h"
 #include "util/u_pack_color.h"
@@ -168,6 +169,8 @@ static void si_update_fb_blend_state(struct r600_context *rctx)
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 
 	si_pm4_set_state(rctx, fb_blend, pm4);
+
+	rctx->dual_src_blend = blend->dual_src_blend;
 }
 
 /*
@@ -309,6 +312,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
 	}
 
+	/* only MRT0 has dual src blend */
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
+
 	return blend;
 }
 
@@ -2097,8 +2103,13 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
 		if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
 			key->vs.ucps_enabled |= 0x1;
 	} else if (sel->type == PIPE_SHADER_FRAGMENT) {
-		if (sel->fs_write_all)
-			key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+		/* Key on nr_cbufs to optimize unused EXPORTs. */
+		key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+
+		/* Dual-source blending only makes sense with nr_cbufs == 1. */
+		if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
+			key->ps.nr_cbufs = 2;
+
 		key->ps.export_16bpc = rctx->export_16bpc;
 
 		if (rctx->queued.named.rasterizer) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6dbf880..ca51496 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -34,6 +34,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };
 
 struct si_state_viewport {
-- 
1.8.4.1



More information about the mesa-dev mailing list