[Mesa-dev] [PATCH 19/19] radeonsi: don't declare LDS in PS when ds_bpermute is used

Sun Oct 2 21:09:34 UTC 2016

From: Marek Olšák <marek.olsak at amd.com>

I guess this is not needed because dead code elimination removes
the declaration.
---
 src/gallium/drivers/radeonsi/si_pipe.c   | 3 +++
 src/gallium/drivers/radeonsi/si_pipe.h   | 1 +
 src/gallium/drivers/radeonsi/si_shader.c | 7 +++----
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 43d6377..2aa679c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -810,20 +810,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 		(sscreen->b.chip_class == VI &&
 		 sscreen->b.info.pfp_fw_version >= 121 &&
 		 sscreen->b.info.me_fw_version >= 87) ||
 		(sscreen->b.chip_class == CIK &&
 		 sscreen->b.info.pfp_fw_version >= 211 &&
 		 sscreen->b.info.me_fw_version >= 173) ||
 		(sscreen->b.chip_class == SI &&
 		 sscreen->b.info.pfp_fw_version >= 121 &&
 		 sscreen->b.info.me_fw_version >= 87);
 
+	sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
+				   sscreen->b.chip_class >= VI;
+
 	sscreen->b.has_cp_dma = true;
 	sscreen->b.has_streamout = true;
 	pipe_mutex_init(sscreen->shader_parts_mutex);
 	sscreen->use_monolithic_shaders =
 		HAVE_LLVM < 0x0308 ||
 		(sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
 
 	sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
 					    SI_CONTEXT_INV_VMEM_L1 |
 					    SI_CONTEXT_INV_GLOBAL_L2;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 558e185..3cefee7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -76,20 +76,21 @@
 struct si_compute;
 struct hash_table;
 struct u_suballocator;
 
 struct si_screen {
 	struct r600_common_screen	b;
 	unsigned			gs_table_depth;
 	unsigned			tess_offchip_block_dw_size;
 	bool				has_distributed_tess;
 	bool				has_draw_indirect_multi;
+	bool				has_ds_bpermute;
 
 	/* Whether shaders are monolithic (1-part) or separate (3-part). */
 	bool				use_monolithic_shaders;
 	bool				record_llvm_ir;
 
 	pipe_mutex			shader_parts_mutex;
 	struct si_shader_part		*vs_prologs;
 	struct si_shader_part		*vs_epilogs;
 	struct si_shader_part		*tcs_epilogs;
 	struct si_shader_part		*ps_prologs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 7844ebd..30bf093 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5002,43 +5002,41 @@ static void si_llvm_emit_ddxy(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	unsigned opcode = emit_data->info->opcode;
 	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
 	int idx;
 	unsigned mask;
-	bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
-			       ctx->screen->b.chip_class >= VI;
 
 	thread_id = get_thread_id(ctx);
 
 	if (opcode == TGSI_OPCODE_DDX_FINE)
 		mask = TID_MASK_LEFT;
 	else if (opcode == TGSI_OPCODE_DDY_FINE)
 		mask = TID_MASK_TOP;
 	else
 		mask = TID_MASK_TOP_LEFT;
 
 	tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
 				lp_build_const_int32(gallivm, mask), "");
 
 	/* for DDX we want to next X pixel, DDY next Y pixel. */
 	idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
 	trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
 				  lp_build_const_int32(gallivm, idx), "");
 
 	val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
 
-	if (has_ds_bpermute) {
+	if (ctx->screen->has_ds_bpermute) {
 		args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
 				       lp_build_const_int32(gallivm, 4), "");
 		args[1] = val;
 		tl = lp_build_intrinsic(gallivm->builder,
 					"llvm.amdgcn.ds.bpermute", ctx->i32,
 					args, 2, LLVMReadNoneAttribute);
 
 		args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
 				       lp_build_const_int32(gallivm, 4), "");
 		trbl = lp_build_intrinsic(gallivm->builder,
@@ -5731,21 +5729,22 @@ static void create_function(struct si_shader_context *ctx)
 	for (i = 0; i <= last_sgpr; ++i)
 		shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
 
 	/* Unused fragment shader inputs are eliminated by the compiler,
 	 * so we don't know yet how many there will be.
 	 */
 	if (ctx->type != PIPE_SHADER_FRAGMENT)
 		for (; i < num_params; ++i)
 			shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
 
-	if (bld_base->info &&
+	if (!ctx->screen->has_ds_bpermute &&
+	    bld_base->info &&
 	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
 	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
 		ctx->lds =
 			LLVMAddGlobalInAddressSpace(gallivm->module,
 						    LLVMArrayType(ctx->i32, 64),
 						    "ddxy_lds",
-- 
2.7.4