[Mesa-dev] [PATCH 5/6] radeonsi: provide VS_STATE input to all VS variants

Nicolai Hähnle nhaehnle at gmail.com
Wed Apr 12 09:20:02 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c   |  6 +++++-
 src/gallium/drivers/radeonsi/si_shader.c        | 15 ++++++---------
 src/gallium/drivers/radeonsi/si_shader.h        | 13 +------------
 src/gallium/drivers/radeonsi/si_state_draw.c    |  9 +++++----
 src/gallium/drivers/radeonsi/si_state_shaders.c |  4 ++--
 5 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8f5a16b..4cb6d86 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1823,22 +1823,26 @@ static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
  * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
  */
 static void si_set_user_data_base(struct si_context *sctx,
 				  unsigned shader, uint32_t new_base)
 {
 	uint32_t *base = &sctx->shader_userdata.sh_base[shader];
 
 	if (*base != new_base) {
 		*base = new_base;
 
-		if (new_base)
+		if (new_base) {
 			si_mark_shader_pointers_dirty(sctx, shader);
+
+			if (shader == PIPE_SHADER_VERTEX)
+				sctx->last_vs_state = ~0;
+		}
 	}
 }
 
 /* This must be called when these shaders are changed from non-NULL to NULL
  * and vice versa:
  * - geometry shader
  * - tessellation control shader
  * - tessellation evaluation shader
  */
 void si_shader_change_notify(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0f080cf..02447dd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -220,33 +220,33 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
  * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
  * - ...
  *
  * All three shaders VS(LS), TCS, TES share the same LDS space.
  */
 
 static LLVMValueRef
 get_tcs_in_patch_stride(struct si_shader_context *ctx)
 {
 	if (ctx->type == PIPE_SHADER_VERTEX)
-		return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 8, 13);
+		return unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 8, 13);
 	else if (ctx->type == PIPE_SHADER_TESS_CTRL)
 		return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 8, 13);
 	else {
 		assert(0);
 		return NULL;
 	}
 }
 
 static LLVMValueRef
 get_tcs_out_patch_stride(struct si_shader_context *ctx)
 {
-	return unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+	return unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 0, 13);
 }
 
 static LLVMValueRef
 get_tcs_out_patch0_offset(struct si_shader_context *ctx)
 {
 	return lp_build_mul_imm(&ctx->bld_base.uint_bld,
 				unpack_param(ctx,
 					     SI_PARAM_TCS_OUT_OFFSETS,
 					     0, 16),
 				4);
@@ -2656,21 +2656,21 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	unsigned i, chan;
 	LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
 					      ctx->param_rel_auto_id);
 	LLVMValueRef vertex_dw_stride =
-		unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 24, 8);
+		unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 24, 8);
 	LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
 						 vertex_dw_stride, "");
 
 	/* Write outputs to LDS. The next shader (TCS aka HS) will read
 	 * its inputs from it. */
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr = ctx->outputs[i];
 		unsigned name = info->output_semantic_name[i];
 		unsigned index = info->output_semantic_index[i];
 		int param = si_shader_io_get_unique_index(name, index);
@@ -5602,33 +5602,30 @@ static void create_function(struct si_shader_context *ctx)
 	params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
 	params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
 
 	switch (ctx->type) {
 	case PIPE_SHADER_VERTEX:
 		params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_MAX_ATTRIBS);
 		params[SI_PARAM_BASE_VERTEX] = ctx->i32;
 		params[SI_PARAM_START_INSTANCE] = ctx->i32;
 		params[SI_PARAM_DRAWID] = ctx->i32;
-		num_params = SI_PARAM_DRAWID+1;
+		params[SI_PARAM_VS_STATE_BITS] = ctx->i32;
+		num_params = SI_PARAM_VS_STATE_BITS+1;
 
 		if (shader->key.as_es) {
 			params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
 		} else if (shader->key.as_ls) {
-			params[SI_PARAM_LS_OUT_LAYOUT] = ctx->i32;
-			num_params = SI_PARAM_LS_OUT_LAYOUT+1;
+			/* no extra parameters */
 		} else {
 			if (shader->is_gs_copy_shader) {
 				num_params = SI_PARAM_RW_BUFFERS+1;
-			} else {
-				params[SI_PARAM_VS_STATE_BITS] = ctx->i32;
-				num_params = SI_PARAM_VS_STATE_BITS+1;
 			}
 
 			/* The locations of the other parameters are assigned dynamically. */
 			declare_streamout_params(ctx, &shader->selector->so,
 						 params, ctx->i32, &num_params);
 		}
 
 		last_sgpr = num_params-1;
 
 		/* VGPRs */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6ce2b26..fdb0dd4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -91,30 +91,23 @@ enum {
 	SI_SGPR_SHADER_BUFFERS,
 	SI_SGPR_SHADER_BUFFERS_HI,
 	SI_NUM_RESOURCE_SGPRS,
 
 	/* all VS variants */
 	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_VERTEX_BUFFERS_HI,
 	SI_SGPR_BASE_VERTEX,
 	SI_SGPR_START_INSTANCE,
 	SI_SGPR_DRAWID,
-	SI_ES_NUM_USER_SGPR,
-
-	/* hw VS only */
-	SI_SGPR_VS_STATE_BITS	= SI_ES_NUM_USER_SGPR,
+	SI_SGPR_VS_STATE_BITS,
 	SI_VS_NUM_USER_SGPR,
 
-	/* hw LS only */
-	SI_SGPR_LS_OUT_LAYOUT	= SI_ES_NUM_USER_SGPR,
-	SI_LS_NUM_USER_SGPR,
-
 	/* both TCS and TES */
 	SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
 	SI_TES_NUM_USER_SGPR,
 
 	/* TCS only */
 	SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
 	SI_SGPR_TCS_OUT_LAYOUT,
 	SI_SGPR_TCS_IN_LAYOUT,
 	SI_TCS_NUM_USER_SGPR,
 
@@ -139,25 +132,21 @@ enum {
 	SI_PARAM_SAMPLERS,
 	SI_PARAM_IMAGES,
 	SI_PARAM_SHADER_BUFFERS,
 	SI_NUM_RESOURCE_PARAMS,
 
 	/* VS only parameters */
 	SI_PARAM_VERTEX_BUFFERS	= SI_NUM_RESOURCE_PARAMS,
 	SI_PARAM_BASE_VERTEX,
 	SI_PARAM_START_INSTANCE,
 	SI_PARAM_DRAWID,
-	/* [0] = clamp vertex color, VS as VS only */
 	SI_PARAM_VS_STATE_BITS,
-	/* same value as TCS_IN_LAYOUT, VS as LS only */
-	SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_DRAWID + 1,
-	/* the other VS parameters are assigned dynamically */
 
 	/* Layout of TCS outputs in the offchip buffer
 	 *   [0:8] = the number of patches per threadgroup.
 	 *   [9:15] = the number of output vertices per patch.
 	 *   [16:31] = the offset of per patch attributes in the buffer in bytes.
 	 */
 	SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
 
 	/* TCS only parameters. */
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 7bf4f4d..0d70ea9 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -215,23 +215,23 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
 			S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
 	tcs_out_layout = (output_patch_size / 4) |
 			 ((output_vertex_size / 4) << 13);
 	tcs_out_offsets = (output_patch0_offset / 16) |
 			  ((perpatch_output_offset / 16) << 16);
 	offchip_layout = (pervertex_output_patch_size * *num_patches << 16) |
 			 (num_tcs_output_cp << 9) | *num_patches;
 
 	/* Set them for LS. */
-	radeon_set_sh_reg(cs,
-		R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
-		tcs_in_layout);
+	sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
+				  C_VS_STATE_LS_OUT_VERTEX_SIZE;
+	sctx->current_vs_state |= tcs_in_layout;
 
 	/* Set them for TCS. */
 	radeon_set_sh_reg_seq(cs,
 		R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
 	radeon_emit(cs, offchip_layout);
 	radeon_emit(cs, tcs_out_offsets);
 	radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
 	radeon_emit(cs, tcs_in_layout);
 
 	/* Set them for TES. */
@@ -493,21 +493,22 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 	sctx->last_rast_prim = rast_prim;
 	sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
 }
 
 static void si_emit_vs_state(struct si_context *sctx)
 {
 	if (sctx->current_vs_state != sctx->last_vs_state) {
 		struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 		radeon_set_sh_reg(cs,
-			R_00B130_SPI_SHADER_USER_DATA_VS_0 + SI_SGPR_VS_STATE_BITS * 4,
+			sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX] +
+			SI_SGPR_VS_STATE_BITS * 4,
 			sctx->current_vs_state);
 
 		sctx->last_vs_state = sctx->current_vs_state;
 	}
 }
 
 static void si_emit_draw_registers(struct si_context *sctx,
 				   const struct pipe_draw_info *info,
 				   unsigned num_patches)
 {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ff4ff01..c52ffd9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -458,21 +458,21 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 	vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
 	si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
 
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			   S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
 			   S_00B528_DX10_CLAMP(1) |
 			   S_00B528_FLOAT_MODE(shader->config.float_mode);
-	shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
+	shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	uint64_t va;
 
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
@@ -506,21 +506,21 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
 		return;
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
 	if (shader->selector->type == PIPE_SHADER_VERTEX) {
 		vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
-		num_user_sgprs = SI_ES_NUM_USER_SGPR;
+		num_user_sgprs = SI_VS_NUM_USER_SGPR;
 	} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
 		vgpr_comp_cnt = 3; /* all components are needed for TES */
 		num_user_sgprs = SI_TES_NUM_USER_SGPR;
 	} else
 		unreachable("invalid shader selector type");
 
 	oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
 
 	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
 		       shader->selector->esgs_itemsize / 4);
-- 
2.9.3



More information about the mesa-dev mailing list