Mesa (master): radeonsi: eliminate VS outputs that aren' t used by PS at runtime

Marek Olšák mareko at kemper.freedesktop.org
Mon Nov 21 21:41:10 UTC 2016


Module: Mesa
Branch: master
Commit: ef6c84b301ce15022d4907dfb0db5764e31e68f5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef6c84b301ce15022d4907dfb0db5764e31e68f5

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Mon Nov 14 09:09:51 2016 +0100

radeonsi: eliminate VS outputs that aren't used by PS at runtime

A past commit added the ability to compile "optimized" shader variants
asynchronously (not stalling the app).

This commit builds upon that and adds what is basically a runtime shader
linker. If a VS output isn't used by the currently-bound PS, a new VS
compilation is started without that output. The new shader variant
is used when it's ready.

All apps using separate shader objects I've seen had unused VS outputs.

Eliminating unused/useless VS outputs also eliminates the corresponding
vertex attribute loads.

Tested-by: Edmondo Tommasina <edmondo.tommasina at gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeonsi/si_shader.c        | 26 ++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h        |  7 ++---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 37 ++++++++++++++++++++++---
 3 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2b43224..abe30e5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2281,6 +2281,26 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
 	for (i = 0; i < noutput; i++) {
 		semantic_name = outputs[i].name;
 		semantic_index = outputs[i].sid;
+		bool export_param = true;
+
+		switch (semantic_name) {
+		case TGSI_SEMANTIC_POSITION: /* ignore these */
+		case TGSI_SEMANTIC_PSIZE:
+		case TGSI_SEMANTIC_CLIPVERTEX:
+		case TGSI_SEMANTIC_EDGEFLAG:
+			break;
+		case TGSI_SEMANTIC_GENERIC:
+		case TGSI_SEMANTIC_CLIPDIST:
+			if (shader->key.opt.hw_vs.kill_outputs &
+			    (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+				export_param = false;
+			break;
+		default:
+			if (shader->key.opt.hw_vs.kill_outputs2 &
+			    (1u << si_shader_io_get_unique_index2(semantic_name, semantic_index)))
+				export_param = false;
+			break;
+		}
 
 handle_semantic:
 		/* Select the correct target */
@@ -2304,6 +2324,8 @@ handle_semantic:
 			break;
 		case TGSI_SEMANTIC_COLOR:
 		case TGSI_SEMANTIC_BCOLOR:
+			if (!export_param)
+				continue;
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
 			shader->info.vs_output_param_offset[i] = param_count;
@@ -2325,6 +2347,8 @@ handle_semantic:
 		case TGSI_SEMANTIC_FOG:
 		case TGSI_SEMANTIC_TEXCOORD:
 		case TGSI_SEMANTIC_GENERIC:
+			if (!export_param)
+				continue;
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
 			shader->info.vs_output_param_offset[i] = param_count;
@@ -7083,7 +7107,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	si_init_shader_ctx(&ctx, sscreen, shader, tm);
 	ctx.separate_prolog = !is_monolithic;
 
-	memset(shader->info.vs_output_param_offset, 0xff,
+	memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
 	       sizeof(shader->info.vs_output_param_offset));
 
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fc9c913..aa37676 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -322,10 +322,6 @@ struct si_vs_prolog_bits {
 /* Common VS bits between the shader key and the epilog key. */
 struct si_vs_epilog_bits {
 	unsigned	export_prim_id:1; /* when PS needs it and GS is disabled */
-	/* TODO:
-	 * - skip layer, viewport, clipdist, and culldist parameter exports
-	 *   if PS doesn't read them
-	 */
 };
 
 /* Common TCS bits between the shader key and the epilog key. */
@@ -440,6 +436,8 @@ struct si_shader_key {
 	/* Optimization flags for asynchronous compilation only. */
 	union {
 		struct {
+			uint64_t	kill_outputs; /* "get_unique_index" bits */
+			uint32_t	kill_outputs2; /* "get_unique_index2" bits */
 			unsigned	clip_disable:1;
 		} hw_vs; /* HW VS (it can be VS, TES, GS) */
 	} opt;
@@ -468,6 +466,7 @@ enum {
 	EXP_PARAM_DEFAULT_VAL_0001,
 	EXP_PARAM_DEFAULT_VAL_1110,
 	EXP_PARAM_DEFAULT_VAL_1111,
+	EXP_PARAM_UNDEFINED = 255,
 };
 
 /* GCN-specific shader info. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index e4d8747..7834f87 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -858,11 +858,35 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
 					 struct si_shader_selector *vs,
 					 struct si_shader_key *key)
 {
+	struct si_shader_selector *ps = sctx->ps_shader.cso;
+
 	key->opt.hw_vs.clip_disable =
 		sctx->queued.named.rasterizer->clip_plane_enable == 0 &&
 		(vs->info.clipdist_writemask ||
 		 vs->info.writes_clipvertex) &&
 		!vs->info.culldist_writemask;
+
+	/* Find out if PS is disabled. */
+	bool ps_disabled = ps == NULL;
+
+	/* Find out which VS outputs aren't used by the PS. */
+	uint64_t outputs_written = vs->outputs_written;
+	uint32_t outputs_written2 = vs->outputs_written2;
+	uint64_t inputs_read = 0;
+	uint32_t inputs_read2 = 0;
+
+	outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+
+	if (!ps_disabled) {
+		inputs_read = ps->inputs_read;
+		inputs_read2 = ps->inputs_read2;
+	}
+
+	uint64_t linked = outputs_written & inputs_read;
+	uint32_t linked2 = outputs_written2 & inputs_read2;
+
+	key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
+	key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
 }
 
 /* Compute the key for the hw shader variant */
@@ -1785,11 +1809,16 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 				/* The input is loaded from parameter memory. */
 				ps_input_cntl |= S_028644_OFFSET(offset);
 			} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
-				/* The input is a DEFAULT_VAL constant. */
-				assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
-				       offset <= EXP_PARAM_DEFAULT_VAL_1111);
+				if (offset == EXP_PARAM_UNDEFINED) {
+					/* This can happen with depth-only rendering. */
+					offset = 0;
+				} else {
+					/* The input is a DEFAULT_VAL constant. */
+					assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
+					       offset <= EXP_PARAM_DEFAULT_VAL_1111);
+					offset -= EXP_PARAM_DEFAULT_VAL_0000;
+				}
 
-				offset -= EXP_PARAM_DEFAULT_VAL_0000;
 				ps_input_cntl = S_028644_OFFSET(0x20) |
 						S_028644_DEFAULT_VAL(offset);
 			}




More information about the mesa-commit mailing list