[Mesa-dev] [PATCH 2/2] radv: do not emit unnecessary ES output stores

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed May 16 15:43:23 UTC 2018


GFX9:
Totals from affected shaders:
SGPRS: 472 -> 464 (-1.69 %)
VGPRS: 576 -> 584 (1.39 %)
Code Size: 45432 -> 44324 (-2.44 %) bytes
Max Waves: 40 -> 40 (0.00 %)

VI:
SGPRS: 720 -> 720 (0.00 %)
VGPRS: 728 -> 728 (0.00 %)
Code Size: 45348 -> 43992 (-2.99 %) bytes
Max Waves: 120 -> 120 (0.00 %)

This affects Rise of Tomb Raider and the three Vulkan demos
that use a geometry shader (geometryshader, deferredshadows
and viewportarray).

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/radv_nir_to_llvm.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index bfae2a489e..2c3abc2109 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2580,6 +2580,8 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
 	}
 
 	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+		unsigned output_usage_mask =
+			ctx->shader_info->info.vs.output_usage_mask[i];
 		LLVMValueRef dw_addr = NULL;
 		LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
 		int param_index;
@@ -2588,8 +2590,10 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
 
-		if (i == VARYING_SLOT_CLIP_DIST0)
+		if (i == VARYING_SLOT_CLIP_DIST0) {
 			length = ctx->num_output_clips + ctx->num_output_culls;
+			output_usage_mask = (1 << length) - 1;
+		}
 
 		param_index = shader_io_get_unique_index(i);
 
@@ -2598,14 +2602,22 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
 			                       LLVMConstInt(ctx->ac.i32, param_index * 4, false),
 			                       "");
 		}
+
 		for (j = 0; j < length; j++) {
+			if (!(output_usage_mask & (1 << j)))
+				continue;
+
 			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
 			out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
 
 			if (ctx->ac.chip_class  >= GFX9) {
-				ac_lds_store(&ctx->ac, dw_addr,
+				LLVMValueRef dw_addr_offset =
+					LLVMBuildAdd(ctx->ac.builder, dw_addr,
+						     LLVMConstInt(ctx->ac.i32,
+								  j, false), "");
+
+				ac_lds_store(&ctx->ac, dw_addr_offset,
 					     LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""));
-				dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, "");
 			} else {
 				ac_build_buffer_store_dword(&ctx->ac,
 				                            ctx->esgs_ring,
-- 
2.17.0



More information about the mesa-dev mailing list