[Mesa-dev] [PATCH 2/2] ac/nir: don't write tcs outputs to LDS that aren't read back.

Dave Airlie airlied at gmail.com
Tue Nov 14 05:18:46 UTC 2017


From: Dave Airlie <airlied at redhat.com>

If the TCS doesn't read back the outputs, no need to store them
to LDS in the first place. (except for tess factors).

This seems to give about 50fps (3290->3330) with tessellation demo.

I haven't tested if it impacts DoW3 at all.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/common/ac_nir_to_llvm.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3d9f613..e7133ec 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -150,6 +150,9 @@ struct nir_to_llvm_context {
 	unsigned tes_primitive_mode;
 	uint64_t tess_outputs_written;
 	uint64_t tess_patch_outputs_written;
+
+	uint32_t tcs_patch_outputs_read;
+	uint64_t tcs_outputs_read;
 };
 
 static inline struct nir_to_llvm_context *
@@ -2790,7 +2793,15 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
 	const unsigned comp = instr->variables[0]->var->data.location_frac;
 	const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
 	const bool is_compact = instr->variables[0]->var->data.compact;
+	bool store_lds = true;
 
+	if (instr->variables[0]->var->data.patch) {
+		if (!(ctx->tcs_patch_outputs_read & (1U << instr->variables[0]->var->data.location)))
+			store_lds = false;
+	} else {
+		if (!(ctx->tcs_outputs_read & (1ULL << instr->variables[0]->var->data.location)))
+			store_lds = false;
+	}
 	get_deref_offset(ctx->nir, instr->variables[0],
 			 false, NULL, per_vertex ? &vertex_index : NULL,
 			 &const_index, &indir_index);
@@ -2827,7 +2838,8 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
 			continue;
 		LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp);
 
-		ac_lds_store(&ctx->ac, dw_addr, value);
+		if (store_lds || is_tess_factor)
+			ac_lds_store(&ctx->ac, dw_addr, value);
 
 		if (!is_tess_factor && writemask != 0xF)
 			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
@@ -6550,6 +6562,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 			ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
 
 			ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
+		} else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+			ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
+			ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read;
 		} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
 			ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
 		} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
-- 
2.9.5



More information about the mesa-dev mailing list