[Mesa-dev] [PATCH 30/53] r600: create LDS info constants buffer and write LDS registers.

Dave Airlie airlied at gmail.com
Sun Nov 29 22:20:39 PST 2015


From: Dave Airlie <airlied at redhat.com>

This creates a constant buffer with the information about
the layout of the LDS memory that is given to the vertex, tess
control and tess evaluation shaders.

This also programs the LDS size and the LS_HS_CONFIG registers,
on evergreen only.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c   | 128 +++++++++++++++++++++++++++
 src/gallium/drivers/r600/r600_pipe.h         |  24 ++++-
 src/gallium/drivers/r600/r600_state_common.c |  13 +++
 3 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c01e8e3..edc6f28 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3763,3 +3763,131 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 
 	evergreen_init_compute_state_functions(rctx);
 }
+
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to the const buffer.
+
+ * const buffer contains -
+ * uint32_t input_patch_size
+ * uint32_t input_vertex_size
+ * uint32_t num_tcs_input_cp
+ * uint32_t num_tcs_output_cp;
+ * uint32_t output_patch_size
+ * uint32_t output_vertex_size
+ * uint32_t output_patch0_offset
+ * uint32_t perpatch_output_offset
+ * and the same constbuf is bound to LS/HS/VS(ES).
+ */
+void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches, uint32_t *lds_alloc)
+{
+	struct pipe_constant_buffer constbuf = {0};
+	struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
+	struct r600_pipe_shader_selector *ls = rctx->vs_shader;
+	unsigned num_tcs_input_cp = info->vertices_per_patch;
+	unsigned num_tcs_outputs;
+	unsigned num_tcs_output_cp;
+	unsigned num_tcs_patch_outputs;
+	unsigned num_tcs_inputs;
+	unsigned input_vertex_size, output_vertex_size;
+	unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
+	unsigned output_patch0_offset, perpatch_output_offset, lds_size;
+	uint32_t values[16];
+	uint32_t tmp;
+
+	if (!rctx->tes_shader)
+		return;
+
+	*num_patches = 1;
+
+	num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask);
+
+	if (rctx->tcs_shader) {
+		num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask);
+		num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+		num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask);
+	} else {
+		num_tcs_outputs = num_tcs_inputs;
+		num_tcs_output_cp = num_tcs_input_cp;
+		num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+	}
+
+	/* size in bytes */
+	input_vertex_size = num_tcs_inputs * 16;
+	output_vertex_size = num_tcs_outputs * 16;
+
+	input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+	pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+	output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+	output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0;
+	perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+	lds_size = output_patch0_offset + output_patch_size * *num_patches;
+
+	values[0] = input_patch_size;
+	values[1] = input_vertex_size;
+	values[2] = num_tcs_input_cp;
+	values[3] = num_tcs_output_cp;
+
+	values[4] = output_patch_size;
+	values[5] = output_vertex_size;
+	values[6] = output_patch0_offset;
+	values[7] = perpatch_output_offset;
+
+	/* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
+	   LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
+	tmp = (lds_size | (1 << 14)); /* TODO */
+
+	*lds_alloc = tmp;
+
+	memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
+	values[14] = 0;
+	values[15] = 0;
+
+	constbuf.user_buffer = values;
+	constbuf.buffer_size = 16 * 4;
+
+	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
+	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
+	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
+	pipe_resource_reference(&constbuf.buffer, NULL);
+}
+
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+				    const struct pipe_draw_info *info,
+				    unsigned num_patches)
+{
+	unsigned num_output_cp;
+
+	if (!rctx->tes_shader)
+		return 0;
+
+	num_output_cp = rctx->tcs_shader ?
+		rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+		info->vertices_per_patch;
+
+	return S_028B58_NUM_PATCHES(num_patches) |
+		S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+		S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
+}
+
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+				struct radeon_winsys_cs *cs,
+				uint32_t ls_hs_config)
+{
+	radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+}
+
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+			     struct radeon_winsys_cs *cs,
+			     uint32_t lds_alloc)
+{
+	radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
+}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index ae04c8c..dfb5f46 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -59,11 +59,11 @@
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS	16
-#define R600_MAX_DRAW_CS_DWORDS		47
+#define R600_MAX_DRAW_CS_DWORDS		53
 #define R600_TRACE_CS_DWORDS		7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
@@ -71,7 +71,12 @@
 #define R600_UCP_SIZE (4*4*8)
 #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
 
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+/*
+ * Note GS doesn't use a constant buffer binding, just a resource index,
+ * so it's fine to have it exist at index 16.
+ */
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
  * of 16 const buffers.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -702,6 +707,19 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 			       uint64_t dst_offset,
 			       uint64_t src_offset,
 			       uint64_t size);
+void evergreen_setup_tess_constants(struct r600_context *rctx,
+				    const struct pipe_draw_info *info,
+				    unsigned *num_patches,
+				    uint32_t *lds_alloc);
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+				    const struct pipe_draw_info *info,
+				    unsigned num_patches);
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+				struct radeon_winsys_cs *cs,
+				uint32_t ls_hs_config);
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+			     struct radeon_winsys_cs *cs,
+			     uint32_t lds_alloc);
 
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 73e9494..d9152d7 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1612,6 +1612,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
 	uint64_t mask;
+	uint32_t lds_alloc = 0;
+	unsigned num_patches;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
 		return;
@@ -1717,6 +1719,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	}
 
+	if (rctx->b.chip_class >= EVERGREEN)
+		evergreen_setup_tess_constants(rctx, &info, &num_patches, &lds_alloc);
+
 	/* Emit states. */
 	r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
 	r600_flush_emit(rctx);
@@ -1750,6 +1755,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				       S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
 	}
 
+	if (rctx->b.chip_class >= EVERGREEN) {
+		uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, &info,
+								   num_patches);
+
+		evergreen_set_ls_hs_config(rctx, cs, ls_hs_config);
+		evergreen_set_lds_alloc(rctx, cs, lds_alloc);
+	}
+
 	/* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
 	 * even though it should have no effect on those. */
 	if (rctx->b.chip_class == R600 && rctx->rasterizer) {
-- 
2.5.0



More information about the mesa-dev mailing list