[Mesa-dev] [PATCH 4/6] radeonsi: change the bit-packing of LS out/TCS in data

Nicolai Hähnle nhaehnle at gmail.com
Wed Apr 12 09:20:01 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Avoid conflicts when merging various VS state bits.
---
 src/gallium/drivers/radeonsi/si_shader.c     | 10 +++++-----
 src/gallium/drivers/radeonsi/si_shader.h     |  9 +++++++--
 src/gallium/drivers/radeonsi/si_state_draw.c |  4 ++--
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c97e27d..0f080cf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -220,23 +220,23 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
  * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
  * - ...
  *
  * All three shaders VS(LS), TCS, TES share the same LDS space.
  */
 
 static LLVMValueRef
 get_tcs_in_patch_stride(struct si_shader_context *ctx)
 {
 	if (ctx->type == PIPE_SHADER_VERTEX)
-		return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+		return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 8, 13);
 	else if (ctx->type == PIPE_SHADER_TESS_CTRL)
-		return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+		return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 8, 13);
 	else {
 		assert(0);
 		return NULL;
 	}
 }
 
 static LLVMValueRef
 get_tcs_out_patch_stride(struct si_shader_context *ctx)
 {
 	return unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
@@ -916,21 +916,21 @@ static void lds_store(struct lp_build_tgsi_context *bld_base,
 }
 
 static LLVMValueRef fetch_input_tcs(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type, unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	LLVMValueRef dw_addr, stride;
 
-	stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+	stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
 	dw_addr = get_tcs_in_current_patch_offset(ctx);
 	dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
 
 	return lds_load(bld_base, type, swizzle, dw_addr);
 }
 
 static LLVMValueRef fetch_output_tcs(
 		struct lp_build_tgsi_context *bld_base,
 		const struct tgsi_full_src_register *reg,
 		enum tgsi_opcode_type type, unsigned swizzle)
@@ -2401,21 +2401,21 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 	uint64_t inputs;
 
 	invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
 
 	rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
 	buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
 	                LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
 
 	buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
 
-	lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+	lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
 	lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
 	                                 lds_vertex_stride, "");
 	lds_base = get_tcs_in_current_patch_offset(ctx);
 	lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
 
 	inputs = ctx->shader->key.mono.tcs.inputs_to_copy;
 	while (inputs) {
 		unsigned i = u_bit_scan64(&inputs);
 
 		LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
@@ -2656,21 +2656,21 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	unsigned i, chan;
 	LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
 					      ctx->param_rel_auto_id);
 	LLVMValueRef vertex_dw_stride =
-		unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+		unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 24, 8);
 	LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
 						 vertex_dw_stride, "");
 
 	/* Write outputs to LDS. The next shader (TCS aka HS) will read
 	 * its inputs from it. */
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 		unsigned name = info->output_semantic_name[i];
 		unsigned index = info->output_semantic_index[i];
 		int param = si_shader_io_get_unique_index(name, index);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 4a0f270..6ce2b26 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -168,22 +168,23 @@ enum {
 	SI_PARAM_TCS_OUT_OFFSETS,
 
 	/* Layout of TCS outputs / TES inputs:
 	 *   [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
 	 *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
 	 *   [26:31] = gl_PatchVerticesIn, max = 32
 	 */
 	SI_PARAM_TCS_OUT_LAYOUT,
 
 	/* Layout of LS outputs / TCS inputs
-	 *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
-	 *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+	 *   [8:20] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+	 *   [24:31] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+	 * (same layout as SI_PARAM_VS_STATE_BITS)
 	 */
 	SI_PARAM_TCS_IN_LAYOUT,
 
 	SI_PARAM_TCS_OC_LDS,
 	SI_PARAM_TESS_FACTOR_OFFSET,
 	SI_PARAM_PATCH_ID,
 	SI_PARAM_REL_IDS,
 
 	/* GS only parameters */
 	SI_PARAM_GS2VS_OFFSET = SI_NUM_RESOURCE_PARAMS,
@@ -223,20 +224,24 @@ enum {
 	SI_PARAM_BLOCK_ID,
 	SI_PARAM_THREAD_ID,
 
 	SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
 };
 
 /* Fields of driver-defined VS state SGPR. */
 /* Clamp vertex color output (only used in VS as VS). */
 #define S_VS_STATE_CLAMP_VERTEX_COLOR(x)	(((unsigned)(x) & 0x1) << 0)
 #define C_VS_STATE_CLAMP_VERTEX_COLOR		0xFFFFFFFE
+#define S_VS_STATE_LS_OUT_PATCH_SIZE(x)		(((unsigned)(x) & 0x1FFF) << 8)
+#define C_VS_STATE_LS_OUT_PATCH_SIZE		0xFFE000FF
+#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)	(((unsigned)(x) & 0xFF) << 24)
+#define C_VS_STATE_LS_OUT_VERTEX_SIZE		0x00FFFFFF
 
 /* SI-specific system values. */
 enum {
 	TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
 	TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
 };
 
 /* For VS shader key fix_fetch. */
 enum {
 	SI_FIX_FETCH_NONE = 0,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 54e02d2..7bf4f4d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -205,22 +205,22 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	/* Compute userdata SGPRs. */
 	assert(((input_vertex_size / 4) & ~0xff) == 0);
 	assert(((output_vertex_size / 4) & ~0xff) == 0);
 	assert(((input_patch_size / 4) & ~0x1fff) == 0);
 	assert(((output_patch_size / 4) & ~0x1fff) == 0);
 	assert(((output_patch0_offset / 16) & ~0xffff) == 0);
 	assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
 	assert(num_tcs_input_cp <= 32);
 	assert(num_tcs_output_cp <= 32);
 
-	tcs_in_layout = (input_patch_size / 4) |
-			((input_vertex_size / 4) << 13);
+	tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
+			S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
 	tcs_out_layout = (output_patch_size / 4) |
 			 ((output_vertex_size / 4) << 13);
 	tcs_out_offsets = (output_patch0_offset / 16) |
 			  ((perpatch_output_offset / 16) << 16);
 	offchip_layout = (pervertex_output_patch_size * *num_patches << 16) |
 			 (num_tcs_output_cp << 9) | *num_patches;
 
 	/* Set them for LS. */
 	radeon_set_sh_reg(cs,
 		R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
-- 
2.9.3



More information about the mesa-dev mailing list