[Mesa-dev] [PATCH 1/2] radeonsi: fix NUM_SGPRS calculation once more

Marek Olšák maraeo at gmail.com
Wed Apr 13 12:19:00 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

This fixes GS piglit failures after adding SI_PARAM_SHADER_BUFFERS,
which bumped NUM_USER_SGPRS and uncovered this bug on SI.

If this was fixed in LLVM, these workarounds wouldn't be needed.

LLVM would have to look at the calling convention to know how many SGPR
inputs are declared, and add VCC and the scratch wave offset (which is
enabled even if we spill SGPRs but not VGPRs, oh well).
---
 src/gallium/drivers/radeonsi/si_shader.c        | 11 ++++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 67 +++++--------------------
 2 files changed, 23 insertions(+), 55 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c58467d..31fae85 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6761,6 +6761,16 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 	return true;
 }
 
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+	unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+	if (shader->config.scratch_bytes_per_wave)
+		min_sgprs += 2; /* scratch wave offset */
+
+	shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug)
@@ -6850,6 +6860,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		}
 	}
 
+	si_fix_num_sgprs(shader);
 	si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
 		       stderr);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b7ebb48..1ce7ecc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -306,7 +306,7 @@ static void si_set_tesseval_regs(struct si_shader *shader,
 static void si_shader_ls(struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 
@@ -322,18 +322,12 @@ static void si_shader_ls(struct si_shader *shader)
 	vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
 	num_user_sgprs = SI_LS_NUM_USER_SGPR;
-	num_sgprs = shader->config.num_sgprs;
-	if (num_user_sgprs > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 2;
-	}
-	assert(num_sgprs <= 104);
 
 	si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
 
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
-			   S_00B528_SGPRS((num_sgprs - 1) / 8) |
+			   S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
 			   S_00B528_DX10_CLAMP(1) |
 			   S_00B528_FLOAT_MODE(shader->config.float_mode);
@@ -344,7 +338,7 @@ static void si_shader_ls(struct si_shader *shader)
 static void si_shader_hs(struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	uint64_t va;
 
 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -355,20 +349,12 @@ static void si_shader_hs(struct si_shader *shader)
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 
 	num_user_sgprs = SI_TCS_NUM_USER_SGPR;
-	num_sgprs = shader->config.num_sgprs;
-	/* One SGPR after user SGPRs is pre-loaded with tessellation factor
-	 * buffer offset. */
-	if ((num_user_sgprs + 1) > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 1 + 2;
-	}
-	assert(num_sgprs <= 104);
 
 	si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
-		       S_00B428_SGPRS((num_sgprs - 1) / 8) |
+		       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B428_DX10_CLAMP(1) |
 		       S_00B428_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
@@ -379,7 +365,7 @@ static void si_shader_hs(struct si_shader *shader)
 static void si_shader_es(struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 
@@ -400,21 +386,13 @@ static void si_shader_es(struct si_shader *shader)
 	} else
 		unreachable("invalid shader selector type");
 
-	num_sgprs = shader->config.num_sgprs;
-	/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
-	if ((num_user_sgprs + 1) > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 1 + 2;
-	}
-	assert(num_sgprs <= 104);
-
 	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
 		       shader->selector->esgs_itemsize / 4);
 	si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
 	si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
 	si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
 		       S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
-		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
+		       S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
 		       S_00B328_DX10_CLAMP(1) |
 		       S_00B328_FLOAT_MODE(shader->config.float_mode));
@@ -458,7 +436,7 @@ static void si_shader_gs(struct si_shader *shader)
 	unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
 	unsigned gs_num_invocations = shader->selector->gs_num_invocations;
 	struct si_pm4_state *pm4;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	uint64_t va;
 	unsigned max_stream = shader->selector->max_gs_stream;
 
@@ -495,17 +473,10 @@ static void si_shader_gs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
 
 	num_user_sgprs = SI_GS_NUM_USER_SGPR;
-	num_sgprs = shader->config.num_sgprs;
-	/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
-	if ((num_user_sgprs + 2) > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 2 + 2;
-	}
-	assert(num_sgprs <= 104);
 
 	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
 		       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
-		       S_00B228_SGPRS((num_sgprs - 1) / 8) |
+		       S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B228_DX10_CLAMP(1) |
 		       S_00B228_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
@@ -523,7 +494,7 @@ static void si_shader_gs(struct si_shader *shader)
 static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 {
 	struct si_pm4_state *pm4;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	unsigned nparams, vgpr_comp_cnt;
 	uint64_t va;
 	unsigned window_space =
@@ -566,13 +537,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 	} else
 		unreachable("invalid shader selector type");
 
-	num_sgprs = shader->config.num_sgprs;
-	if (num_user_sgprs > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 2;
-	}
-	assert(num_sgprs <= 104);
-
 	/* VS is required to export at least one param. */
 	nparams = MAX2(shader->info.nr_param_exports, 1);
 	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@@ -594,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 	si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
 	si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
 		       S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
-		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
+		       S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
 		       S_00B128_DX10_CLAMP(1) |
 		       S_00B128_FLOAT_MODE(shader->config.float_mode));
@@ -684,7 +648,7 @@ static void si_shader_ps(struct si_shader *shader)
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct si_pm4_state *pm4;
 	unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
-	unsigned num_sgprs, num_user_sgprs;
+	unsigned num_user_sgprs;
 	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	uint64_t va;
 	bool has_centroid;
@@ -772,17 +736,10 @@ static void si_shader_ps(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
 
 	num_user_sgprs = SI_PS_NUM_USER_SGPR;
-	num_sgprs = shader->config.num_sgprs;
-	/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
-	if ((num_user_sgprs + 1) > num_sgprs) {
-		/* Last 2 reserved SGPRs are used for VCC */
-		num_sgprs = num_user_sgprs + 1 + 2;
-	}
-	assert(num_sgprs <= 104);
 
 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
 		       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
-		       S_00B028_SGPRS((num_sgprs - 1) / 8) |
+		       S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B028_DX10_CLAMP(1) |
 		       S_00B028_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
-- 
2.5.0



More information about the mesa-dev mailing list