[Mesa-dev] [PATCH 7/9] radeonsi/gfx9: don't read LS out vertex stride from an SGPR in monolithic HS

Marek Olšák maraeo at gmail.com
Wed Sep 6 17:03:58 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

-44 bytes in a monolithic LS-HS binary.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 5 +++++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 7c3bd8b..d622304 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -401,20 +401,25 @@ static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
 static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
 {
 	unsigned stride;
 
 	switch (ctx->type) {
 	case PIPE_SHADER_VERTEX:
 		stride = util_last_bit64(ctx->shader->selector->outputs_written);
 		return LLVMConstInt(ctx->i32, stride * 4, 0);
 
 	case PIPE_SHADER_TESS_CTRL:
+		if (ctx->screen->b.chip_class >= GFX9 &&
+		    ctx->shader->is_monolithic) {
+			stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
+			return LLVMConstInt(ctx->i32, stride * 4, 0);
+		}
 		return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
 
 	default:
 		assert(0);
 		return NULL;
 	}
 }
 
 static LLVMValueRef get_instance_index_for_fetch(
 	struct si_shader_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 25fcead..fe25598 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1284,21 +1284,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 						  key, &key->part.tcs.ls_prolog);
 			key->part.tcs.ls = sctx->vs_shader.cso;
 
 			/* When the LS VGPR fix is needed, monolithic shaders
 			 * can:
 			 *  - avoid initializing EXEC in both the LS prolog
 			 *    and the LS main part when !vs_needs_prolog
 			 *  - remove the fixup for unused input VGPRs
 			 */
 			key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
-			key->opt.prefer_mono = sctx->ls_vgpr_fix;
+
+			/* The LS output / HS input layout can be communicated
+			 * directly instead of via user SGPRs for merged LS-HS.
+			 * The LS VGPR fix prefers this too.
+			 */
+			key->opt.prefer_mono = 1;
 		}
 
 		key->part.tcs.epilog.prim_mode =
 			sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 		key->part.tcs.epilog.invoc0_tess_factors_are_def =
 			sel->tcs_info.invoc0_tessfactors_are_def;
 		key->part.tcs.epilog.tes_reads_tess_factors =
 			sctx->tes_shader.cso->info.reads_tess_factors;
 
 		if (sel == sctx->fixed_func_tcs_shader.cso)
-- 
2.7.4



More information about the mesa-dev mailing list