[Mesa-dev] [PATCH 60/61] radeonsi: fix tess offchip offset for per-patch attributes

Marek Olšák maraeo at gmail.com
Mon Apr 24 08:45:57 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

We need 4 more bits there. I don't know what is fixed by this.
---
 src/gallium/drivers/radeonsi/si_shader.c          |  8 ++++----
 src/gallium/drivers/radeonsi/si_shader_internal.h | 17 +++++++++++------
 src/gallium/drivers/radeonsi/si_state_draw.c      |  5 +++--
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 57bcd6b..3b00bea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -707,22 +707,22 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
  */
 static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
 					       LLVMValueRef rel_patch_id,
                                                LLVMValueRef vertex_index,
                                                LLVMValueRef param_index)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
 	LLVMValueRef param_stride, constant16;
 
-	vertices_per_patch = unpack_param(ctx, ctx->param_tcs_offchip_layout, 9, 6);
-	num_patches = unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 9);
+	vertices_per_patch = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
+	num_patches = unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 6);
 	total_vertices = LLVMBuildMul(gallivm->builder, vertices_per_patch,
 	                              num_patches, "");
 
 	constant16 = LLVMConstInt(ctx->i32, 16, 0);
 	if (vertex_index) {
 		base_addr = LLVMBuildMul(gallivm->builder, rel_patch_id,
 		                         vertices_per_patch, "");
 
 		base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
 		                         vertex_index, "");
@@ -734,21 +734,21 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
 	}
 
 	base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
 	                         LLVMBuildMul(gallivm->builder, param_index,
 	                                      param_stride, ""), "");
 
 	base_addr = LLVMBuildMul(gallivm->builder, base_addr, constant16, "");
 
 	if (!vertex_index) {
 		LLVMValueRef patch_data_offset =
-		           unpack_param(ctx, ctx->param_tcs_offchip_layout, 16, 16);
+		           unpack_param(ctx, ctx->param_tcs_offchip_layout, 12, 20);
 
 		base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
 		                         patch_data_offset, "");
 	}
 	return base_addr;
 }
 
 static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
                                        struct si_shader_context *ctx,
                                        const struct tgsi_full_dst_register *dst,
@@ -1537,21 +1537,21 @@ static void declare_system_value(struct si_shader_context *ctx,
 						lp_build_add(bld, coord[0], coord[1]));
 
 		value = lp_build_gather_values(gallivm, coord, 4);
 		break;
 	}
 
 	case TGSI_SEMANTIC_VERTICESIN:
 		if (ctx->type == PIPE_SHADER_TESS_CTRL)
 			value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
 		else if (ctx->type == PIPE_SHADER_TESS_EVAL)
-			value = unpack_param(ctx, ctx->param_tcs_offchip_layout, 9, 7);
+			value = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
 		else
 			assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
 		break;
 
 	case TGSI_SEMANTIC_TESSINNER:
 	case TGSI_SEMANTIC_TESSOUTER:
 	{
 		LLVMValueRef buffer, base, addr;
 		int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 8a9bf10..954b83d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -123,47 +123,52 @@ struct si_shader_context {
 	int param_draw_id;
 	int param_vertex_id;
 	int param_rel_auto_id;
 	int param_vs_prim_id;
 	int param_instance_id;
 	int param_vertex_index0;
 	/* VS states and layout of LS outputs / TCS inputs at the end
 	 *   [0] = clamp vertex color
 	 *   [1] = indexed
 	 *   [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
-	 *            max = 32*32*4
+	 *            max = 32*32*4 + 32*4
 	 *   [24:31] = stride between vertices in DW = num_inputs * 4
 	 *             max = 32*4
 	 */
 	int param_vs_state_bits;
 	/* HW VS */
 	int param_streamout_config;
 	int param_streamout_write_index;
 	int param_streamout_offset[4];
 
 	/* API TCS & TES */
 	/* Layout of TCS outputs in the offchip buffer
-	 *   [0:8] = the number of patches per threadgroup.
-	 *   [9:15] = the number of output vertices per patch.
-	 *   [16:31] = the offset of per patch attributes in the buffer in bytes. */
+	 * # 6 bits
+	 *   [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
+	 * # 6 bits
+	 *   [6:11] = the number of output vertices per patch, max = 32
+	 * # 20 bits
+	 *   [12:31] = the offset of per patch attributes in the buffer in bytes.
+	 *             max = NUM_PATCHES*32*32*16
+	 */
 	int param_tcs_offchip_layout;
 
 	/* API TCS */
 	/* Offsets where TCS outputs and TCS patch outputs live in LDS:
 	 *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
 	 *   [16:31] = TCS output patch0 offset for per-patch / 16
-	 *             max = NUM_PATCHES*32*32* + 32*32
+	 *             max = (NUM_PATCHES + 1) * 32*32
 	 */
 	int param_tcs_out_lds_offsets;
 	/* Layout of TCS outputs / TES inputs:
 	 *   [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
-	 *            max = 32*32*4
+	 *            max = 32*32*4 + 32*4
 	 *   [13:20] = stride between output vertices in DW = num_inputs * 4
 	 *             max = 32*4
 	 *   [26:31] = gl_PatchVerticesIn, max = 32
 	 */
 	int param_tcs_out_lds_layout;
 	int param_tcs_offchip_addr_base64k;
 	int param_tcs_factor_addr_base64k;
 	int param_tcs_offchip_offset;
 	int param_tcs_factor_offset;
 	int param_tcs_patch_id;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 6a756c4..2d5a08e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -204,22 +204,23 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
 	assert(num_tcs_input_cp <= 32);
 	assert(num_tcs_output_cp <= 32);
 
 	tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
 			S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
 	tcs_out_layout = (output_patch_size / 4) |
 			 ((output_vertex_size / 4) << 13);
 	tcs_out_offsets = (output_patch0_offset / 16) |
 			  ((perpatch_output_offset / 16) << 16);
-	offchip_layout = (pervertex_output_patch_size * *num_patches << 16) |
-			 (num_tcs_output_cp << 9) | *num_patches;
+	offchip_layout = *num_patches |
+			 (num_tcs_output_cp << 6) |
+			 (pervertex_output_patch_size * *num_patches << 12);
 
 	/* Compute the LDS size. */
 	lds_size = output_patch0_offset + output_patch_size * *num_patches;
 
 	if (sctx->b.chip_class >= CIK) {
 		assert(lds_size <= 65536);
 		lds_size = align(lds_size, 512) / 512;
 	} else {
 		assert(lds_size <= 32768);
 		lds_size = align(lds_size, 256) / 256;
-- 
2.7.4



More information about the mesa-dev mailing list