[Mesa-dev] [PATCH 19/61] radeonsi/gfx9: set up shader registers for merged LS-HS

Marek Olšák maraeo at gmail.com
Mon Apr 24 08:45:16 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h          |  3 ++-
 src/gallium/drivers/radeonsi/si_state_draw.c    | 31 ++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_state_shaders.c | 36 ++++++++++++++++++++-----
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 0978831..918aa0f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -355,21 +355,22 @@ struct si_context {
 
 	/* Scratch buffer */
 	struct r600_atom	scratch_state;
 	struct r600_resource	*scratch_buffer;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
 
 	struct r600_resource	*compute_scratch_buffer;
 
 	/* Emitted derived tessellation state. */
-	struct si_shader	*last_ls; /* local shader (VS) */
+	/* Local shader (VS), or HS if LS-HS are merged. */
+	struct si_shader	*last_ls;
 	struct si_shader_selector *last_tcs;
 	int			last_num_tcs_input_cp;
 	int			last_tes_sh_base;
 	unsigned		last_num_patches;
 
 	/* Debug state. */
 	bool			is_debug;
 	struct radeon_saved_cs	last_gfx;
 	struct r600_resource	*last_trace_buf;
 	struct r600_resource	*trace_buf;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c24d607..4feadbe 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -89,51 +89,65 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
  * LS.LDS_SIZE is shared by all 3 shader stages.
  *
  * The information about LDS and other non-compile-time parameters is then
  * written to userdata SGPRs.
  */
 static void si_emit_derived_tess_state(struct si_context *sctx,
 				       const struct pipe_draw_info *info,
 				       unsigned *num_patches)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-	struct si_shader_ctx_state *ls = &sctx->vs_shader;
+	struct si_shader *ls_current;
+	struct si_shader_selector *ls;
 	/* The TES pointer will only be used for sctx->last_tcs.
 	 * It would be wrong to think that TCS = TES. */
 	struct si_shader_selector *tcs =
 		sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
 	unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
 	unsigned num_tcs_input_cp = info->vertices_per_patch;
 	unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
 	unsigned num_tcs_patch_outputs;
 	unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
 	unsigned input_patch_size, output_patch_size, output_patch0_offset;
 	unsigned perpatch_output_offset, lds_size;
 	unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
 	unsigned offchip_layout, hardware_lds_size, ls_hs_config;
 
-	if (sctx->last_ls == ls->current &&
+	/* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
+	if (sctx->b.chip_class >= GFX9) {
+		if (sctx->tcs_shader.cso)
+			ls_current = sctx->tcs_shader.current;
+		else
+			ls_current = sctx->fixed_func_tcs_shader.current;
+
+		ls = ls_current->key.part.tcs.ls;
+	} else {
+		ls_current = sctx->vs_shader.current;
+		ls = sctx->vs_shader.cso;
+	}
+
+	if (sctx->last_ls == ls_current &&
 	    sctx->last_tcs == tcs &&
 	    sctx->last_tes_sh_base == tes_sh_base &&
 	    sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
 		*num_patches = sctx->last_num_patches;
 		return;
 	}
 
-	sctx->last_ls = ls->current;
+	sctx->last_ls = ls_current;
 	sctx->last_tcs = tcs;
 	sctx->last_tes_sh_base = tes_sh_base;
 	sctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
 	/* This calculates how shader inputs and outputs among VS, TCS, and TES
 	 * are laid out in LDS. */
-	num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
+	num_tcs_inputs = util_last_bit64(ls->outputs_written);
 
 	if (sctx->tcs_shader.cso) {
 		num_tcs_outputs = util_last_bit64(tcs->outputs_written);
 		num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
 		num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
 	} else {
 		/* No TCS. Route varyings from LS to TES. */
 		num_tcs_outputs = num_tcs_inputs;
 		num_tcs_output_cp = num_tcs_input_cp;
 		num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
@@ -210,33 +224,36 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 		assert(lds_size <= 32768);
 		lds_size = align(lds_size, 256) / 256;
 	}
 
 	/* Set SI_SGPR_VS_STATE_BITS. */
 	sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
 				  C_VS_STATE_LS_OUT_VERTEX_SIZE;
 	sctx->current_vs_state |= tcs_in_layout;
 
 	if (sctx->b.chip_class >= GFX9) {
-		// TODO
+		unsigned hs_rsrc2 = ls_current->config.rsrc2 |
+				    S_00B42C_LDS_SIZE(lds_size);
+
+		radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
 	} else {
-		unsigned ls_rsrc2 = ls->current->config.rsrc2;
+		unsigned ls_rsrc2 = ls_current->config.rsrc2;
 
 		si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
 		ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
 		/* Due to a hw bug, RSRC2_LS must be written twice with another
 		 * LS register written in between. */
 		if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
 			radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
 		radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-		radeon_emit(cs, ls->current->config.rsrc1);
+		radeon_emit(cs, ls_current->config.rsrc1);
 		radeon_emit(cs, ls_rsrc2);
 
 		/* Set userdata SGPRs for TCS. */
 		radeon_set_sh_reg_seq(cs,
 			R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
 		radeon_emit(cs, offchip_layout);
 		radeon_emit(cs, tcs_out_offsets);
 		radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
 		radeon_emit(cs, tcs_in_layout);
 	}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4ac6182..943f7b9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -466,39 +466,61 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 			   S_00B528_DX10_CLAMP(1) |
 			   S_00B528_FLOAT_MODE(shader->config.float_mode);
 	shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	uint64_t va;
+	unsigned ls_vgpr_comp_cnt = 0;
 
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
 		return;
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
-	si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
-	si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+	if (sscreen->b.chip_class >= GFX9) {
+		si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
+		si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+		/* We need at least 2 components for LS.
+		 * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+		ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+
+		shader->config.rsrc2 =
+			S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+	} else {
+		si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+		si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+
+		shader->config.rsrc2 =
+			S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+			S_00B42C_OC_LDS_EN(1) |
+			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+	}
+
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B428_DX10_CLAMP(1) |
-		       S_00B428_FLOAT_MODE(shader->config.float_mode));
-	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
-		       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
-		       S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) |
-		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
+		       S_00B428_FLOAT_MODE(shader->config.float_mode) |
+		       S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
+
+	if (sscreen->b.chip_class <= VI) {
+		si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+			       shader->config.rsrc2);
+	}
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	unsigned num_user_sgprs;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 	unsigned oc_lds_en;
 
-- 
2.7.4



More information about the mesa-dev mailing list