[Mesa-dev] [PATCH 20/61] radeonsi/gfx9: define and set LS-HS user SGPRs

Marek Olšák maraeo at gmail.com
Mon Apr 24 08:45:17 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c   | 20 +++++++++++++----
 src/gallium/drivers/radeonsi/si_shader.c        | 10 ++++-----
 src/gallium/drivers/radeonsi/si_shader.h        | 30 +++++++++++++++++++------
 src/gallium/drivers/radeonsi/si_state_draw.c    | 12 ++++++++--
 src/gallium/drivers/radeonsi/si_state_shaders.c |  5 +++--
 5 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index bd73fcc..f04ed87 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1979,49 +1979,61 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
 	sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;
 	unsigned ce_offset = 0;
 
+	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
+
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
+				i == PIPE_SHADER_TESS_CTRL;
 		/* GFX9 has only 4KB of CE, while previous chips had 32KB.
 		 * Rarely used descriptors don't use CE RAM.
 		 */
 		bool big_ce = sctx->b.chip_class <= VI;
 		bool images_use_ce = big_ce;
 		bool shaderbufs_use_ce = big_ce ||
 					 i == PIPE_SHADER_COMPUTE;
 		bool samplers_use_ce = big_ce ||
 				       i == PIPE_SHADER_FRAGMENT;
 
 		si_init_buffer_resources(&sctx->const_buffers[i],
 					 si_const_buffer_descriptors(sctx, i),
-					 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
+					 SI_NUM_CONST_BUFFERS,
+					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
+						    SI_SGPR_CONST_BUFFERS,
 					 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
 					 &ce_offset);
 		si_init_buffer_resources(&sctx->shader_buffers[i],
 					 si_shader_buffer_descriptors(sctx, i),
-					 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+					 SI_NUM_SHADER_BUFFERS,
+					 gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
+						    SI_SGPR_SHADER_BUFFERS,
 					 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
 					 shaderbufs_use_ce ? &ce_offset : NULL);
 
 		si_init_descriptors(si_sampler_descriptors(sctx, i),
-				    SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
+					       SI_SGPR_SAMPLERS,
+				    16, SI_NUM_SAMPLERS,
 				    null_texture_descriptor,
 				    samplers_use_ce ? &ce_offset : NULL);
 
 		si_init_descriptors(si_image_descriptors(sctx, i),
-				    SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+				    gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
+					       SI_SGPR_IMAGES,
+				    8, SI_NUM_IMAGES,
 				    null_image_descriptor,
 				    images_use_ce ? &ce_offset : NULL);
 	}
 
 	si_init_buffer_resources(&sctx->rw_buffers,
 				 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
 				 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
 				 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
 				 &ce_offset);
 	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6c1565b..4ea1633 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2640,32 +2640,32 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
 	ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
 
 	/* Tess offchip and factor buffer soffset are after user SGPRs. */
 	offchip_layout = LLVMGetParam(ctx->main_fn,
 				      SI_PARAM_TCS_OFFCHIP_LAYOUT);
 	offchip_soffset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
 	tf_soffset = LLVMGetParam(ctx->main_fn,
 				  SI_PARAM_TESS_FACTOR_OFFSET);
 	ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
-				   SI_SGPR_TCS_OFFCHIP_LAYOUT, "");
+				   GFX6_SGPR_TCS_OFFCHIP_LAYOUT, "");
 	ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
-				   SI_TCS_NUM_USER_SGPR, "");
+				   GFX6_TCS_NUM_USER_SGPR, "");
 	ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
-				   SI_TCS_NUM_USER_SGPR + 1, "");
+				   GFX6_TCS_NUM_USER_SGPR + 1, "");
 
 	/* VGPRs */
 	rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
 	invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
 	tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
 
-	vgpr = SI_TCS_NUM_USER_SGPR + 2;
+	vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
 	ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
 	ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
 	ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
 	ctx->return_value = ret;
 }
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct si_shader *shader = ctx->shader;
@@ -5707,21 +5707,21 @@ static void create_function(struct si_shader_context *ctx)
 		last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
 
 		/* VGPRs */
 		params[SI_PARAM_PATCH_ID] = ctx->i32;
 		params[SI_PARAM_REL_IDS] = ctx->i32;
 		num_params = SI_PARAM_REL_IDS+1;
 
 		/* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
 		 * placed after the user SGPRs.
 		 */
-		for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
+		for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
 			returns[num_returns++] = ctx->i32; /* SGPRs */
 
 		for (i = 0; i < 3; i++)
 			returns[num_returns++] = ctx->f32; /* VGPRs */
 		break;
 
 	case PIPE_SHADER_TESS_EVAL:
 		params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
 		num_params = SI_PARAM_TCS_OFFCHIP_LAYOUT+1;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 69874e4..fa6f9af 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -94,29 +94,45 @@ enum {
 
 	/* all VS variants */
 	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_VERTEX_BUFFERS_HI,
 	SI_SGPR_BASE_VERTEX,
 	SI_SGPR_START_INSTANCE,
 	SI_SGPR_DRAWID,
 	SI_SGPR_VS_STATE_BITS,
 	SI_VS_NUM_USER_SGPR,
 
-	/* both TCS and TES */
-	SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+	/* TES */
+	SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
 	SI_TES_NUM_USER_SGPR,
 
-	/* TCS only */
-	SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
-	SI_SGPR_TCS_OUT_LAYOUT,
-	SI_SGPR_TCS_IN_LAYOUT,
-	SI_TCS_NUM_USER_SGPR,
+	/* GFX6-8: TCS only */
+	GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+	GFX6_SGPR_TCS_OUT_OFFSETS,
+	GFX6_SGPR_TCS_OUT_LAYOUT,
+	GFX6_SGPR_TCS_IN_LAYOUT,
+	GFX6_TCS_NUM_USER_SGPR,
+
+	/* GFX9: Merged LS-HS (VS-TCS) only. */
+	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
+	GFX9_SGPR_TCS_OUT_OFFSETS,
+	GFX9_SGPR_TCS_OUT_LAYOUT,
+	GFX9_SGPR_unused_to_align_the_next_pointer,
+	GFX9_SGPR_TCS_CONST_BUFFERS,
+	GFX9_SGPR_TCS_CONST_BUFFERS_HI,
+	GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
+	GFX9_SGPR_TCS_SAMPLERS_HI,
+	GFX9_SGPR_TCS_IMAGES,
+	GFX9_SGPR_TCS_IMAGES_HI,
+	GFX9_SGPR_TCS_SHADER_BUFFERS,
+	GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
+	GFX9_TCS_NUM_USER_SGPR,
 
 	/* GS limits */
 	SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
 	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
 
 	/* PS only */
 	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
 	SI_PS_NUM_USER_SGPR,
 
 	/* CS only */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 4feadbe..de97c0e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -228,45 +228,53 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	/* Set SI_SGPR_VS_STATE_BITS. */
 	sctx->current_vs_state &= C_VS_STATE_LS_OUT_PATCH_SIZE &
 				  C_VS_STATE_LS_OUT_VERTEX_SIZE;
 	sctx->current_vs_state |= tcs_in_layout;
 
 	if (sctx->b.chip_class >= GFX9) {
 		unsigned hs_rsrc2 = ls_current->config.rsrc2 |
 				    S_00B42C_LDS_SIZE(lds_size);
 
 		radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
+
+		/* Set userdata SGPRs for merged LS-HS. */
+		radeon_set_sh_reg_seq(cs,
+				      R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+				      GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
+		radeon_emit(cs, offchip_layout);
+		radeon_emit(cs, tcs_out_offsets);
+		radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
 	} else {
 		unsigned ls_rsrc2 = ls_current->config.rsrc2;
 
 		si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
 		ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
 		/* Due to a hw bug, RSRC2_LS must be written twice with another
 		 * LS register written in between. */
 		if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
 			radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
 		radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
 		radeon_emit(cs, ls_current->config.rsrc1);
 		radeon_emit(cs, ls_rsrc2);
 
 		/* Set userdata SGPRs for TCS. */
 		radeon_set_sh_reg_seq(cs,
-			R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
+			R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
 		radeon_emit(cs, offchip_layout);
 		radeon_emit(cs, tcs_out_offsets);
 		radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
 		radeon_emit(cs, tcs_in_layout);
 	}
 
 	/* Set userdata SGPRs for TES. */
-	radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1);
+	radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 1);
 	radeon_emit(cs, offchip_layout);
 
 	ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
 		       S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
 		       S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
 
 	if (sctx->b.chip_class >= CIK)
 		radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
 					   ls_hs_config);
 	else
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 943f7b9..a330bc0 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -484,28 +484,29 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 
 	if (sscreen->b.chip_class >= GFX9) {
 		si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
 		si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
 
 		/* We need at least 2 components for LS.
 		 * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
 		ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
 		shader->config.rsrc2 =
-			S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+			S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
+			S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
 			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 	} else {
 		si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
 		si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 
 		shader->config.rsrc2 =
-			S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+			S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) |
 			S_00B42C_OC_LDS_EN(1) |
 			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 	}
 
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B428_DX10_CLAMP(1) |
 		       S_00B428_FLOAT_MODE(shader->config.float_mode) |
 		       S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
-- 
2.7.4



More information about the mesa-dev mailing list