[Mesa-dev] [PATCH 6/8] radeonsi: precompute some fields for PA_CL_VS_OUT_CNTL in si_shader_selector

Marek Olšák maraeo at gmail.com
Mon Jun 5 16:50:59 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h          |  1 +
 src/gallium/drivers/radeonsi/si_shader.h        |  3 +++
 src/gallium/drivers/radeonsi/si_state.c         | 21 +++++----------------
 src/gallium/drivers/radeonsi/si_state_shaders.c | 16 ++++++++++++++++
 4 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index c5e6e7d..db747d6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -60,20 +60,21 @@
 #define SI_CONTEXT_FLUSH_AND_INV_DB	(R600_CONTEXT_PRIVATE_FLAG << 7)
 #define SI_CONTEXT_FLUSH_AND_INV_CB	(R600_CONTEXT_PRIVATE_FLAG << 8)
 /* Engine synchronization. */
 #define SI_CONTEXT_VS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 9)
 #define SI_CONTEXT_PS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 10)
 #define SI_CONTEXT_CS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 11)
 #define SI_CONTEXT_VGT_FLUSH		(R600_CONTEXT_PRIVATE_FLAG << 12)
 #define SI_CONTEXT_VGT_STREAMOUT_SYNC	(R600_CONTEXT_PRIVATE_FLAG << 13)
 
 #define SI_MAX_BORDER_COLORS	4096
+#define SIX_BITS		0x3F
 
 struct si_compute;
 struct hash_table;
 struct u_suballocator;
 
 struct si_screen {
 	struct r600_common_screen	b;
 	unsigned			gs_table_depth;
 	unsigned			tess_offchip_block_dw_size;
 	bool				has_distributed_tess;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index f739769..7c04b7e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -319,20 +319,23 @@ struct si_shader_selector {
 
 	struct si_shader	*gs_copy_shader;
 
 	struct tgsi_token       *tokens;
 	struct pipe_stream_output_info  so;
 	struct tgsi_shader_info		info;
 
 	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
 	unsigned	type;
 	bool		vs_needs_prolog;
+	unsigned	pa_cl_vs_out_cntl;
+	ubyte		clipdist_mask;
+	ubyte		culldist_mask;
 
 	/* GS parameters. */
 	unsigned	esgs_itemsize;
 	unsigned	gs_input_verts_per_prim;
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
 	unsigned	max_gs_stream; /* count - 1 */
 	unsigned	gsvs_vertex_size;
 	unsigned	max_gsvs_emit_size;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 50a0bd1..5e5f564 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -656,65 +656,54 @@ static void si_set_clip_state(struct pipe_context *ctx,
 }
 
 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
 }
 
-#define SIX_BITS 0x3F
-
 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader *vs = si_get_vs_state(sctx);
-	struct tgsi_shader_info *info = si_get_vs_info(sctx);
+	struct si_shader_selector *vs_sel = vs->selector;
+	struct tgsi_shader_info *info = &vs_sel->info;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned window_space =
 	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
-	unsigned clipdist_mask =
-		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
+	unsigned clipdist_mask = vs_sel->clipdist_mask;
 	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
-	unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
+	unsigned culldist_mask = vs_sel->culldist_mask;
 	unsigned total_mask;
-	bool misc_vec_ena;
 
 	if (vs->key.opt.hw_vs.clip_disable) {
 		assert(!info->culldist_writemask);
 		clipdist_mask = 0;
 		culldist_mask = 0;
 	}
 	total_mask = clipdist_mask | culldist_mask;
 
 	/* Clip distances on points have no effect, so need to be implemented
 	 * as cull distances. This applies for the clipvertex case as well.
 	 *
 	 * Setting this for primitives other than points should have no adverse
 	 * effects.
 	 */
 	clipdist_mask &= rs->clip_plane_enable;
 	culldist_mask |= clipdist_mask;
 
-	misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
-		       info->writes_layer || info->writes_viewport_index;
-
 	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
-		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
-		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
-		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
-	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
+		vs_sel->pa_cl_vs_out_cntl |
 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
-		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
-		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
 		clipdist_mask | (culldist_mask << 8));
 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
 		rs->pa_cl_clip_cntl |
 		ucp_mask |
 		S_028810_CLIP_DISABLE(window_space));
 
 	if (sctx->b.chip_class <= VI) {
 		/* reuse needs to be set off if we write oViewport */
 		radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
 				       S_028AB4_REUSE_OFF(info->writes_viewport_index));
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 078b5e6..2e33138 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2080,20 +2080,36 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
 				int index = sel->info.input_semantic_index[i];
 				sel->color_attr_index[index] = i;
 			}
 		}
 		break;
 	}
 
+	/* PA_CL_VS_OUT_CNTL */
+	bool misc_vec_ena =
+		sel->info.writes_psize || sel->info.writes_edgeflag ||
+		sel->info.writes_layer || sel->info.writes_viewport_index;
+	sel->pa_cl_vs_out_cntl =
+		S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+		S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) |
+		S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
+		S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
+		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena);
+	sel->clipdist_mask = sel->info.writes_clipvertex ?
+				     SIX_BITS : sel->info.clipdist_writemask;
+	sel->culldist_mask = sel->info.culldist_writemask <<
+			     sel->info.num_written_clipdistance;
+
 	/* DB_SHADER_CONTROL */
 	sel->db_shader_control =
 		S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
 		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
 		S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
 		S_02880C_KILL_ENABLE(sel->info.uses_kill);
 
 	switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
 	case TGSI_FS_DEPTH_LAYOUT_GREATER:
 		sel->db_shader_control |=
-- 
2.7.4



More information about the mesa-dev mailing list