[Mesa-dev] [PATCH 04/16] radeonsi/gfx9: move RW_BUFFERS from s[0:1] to s[8:9] for HS and GS

Marek Olšák maraeo at gmail.com
Fri Oct 13 12:04:00 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Let's use the same user data SGPRs in all stages.
(for SPI_SHADER_USER_DATA_COMMON_0)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 24 ++++-----------------
 src/gallium/drivers/radeonsi/si_shader.c      | 31 +++++++++++----------------
 2 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index cda7d94..9ba8df6 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1973,47 +1973,31 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 	radeon_emit(cs, va >> 32);
 }
 
 static void si_emit_global_shader_pointers(struct si_context *sctx,
 					   struct si_descriptors *descs)
 {
 	si_emit_shader_pointer(sctx, descs,
 			       R_00B030_SPI_SHADER_USER_DATA_PS_0);
 	si_emit_shader_pointer(sctx, descs,
 			       R_00B130_SPI_SHADER_USER_DATA_VS_0);
+	si_emit_shader_pointer(sctx, descs,
+			       R_00B330_SPI_SHADER_USER_DATA_ES_0);
 
 	if (sctx->b.chip_class >= GFX9) {
 		/* GFX9 merged LS-HS and ES-GS. */
-		if (descs == &sctx->descriptors[SI_DESCS_RW_BUFFERS]) {
-			/* Set RW_BUFFERS in the special registers, so that
-			 * it's preloaded into s[0:1] instead of s[8:9].
-			 */
-			si_emit_shader_pointer(sctx, descs,
-					       R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
-			si_emit_shader_pointer(sctx, descs,
-					       R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
-		} else {
-			/* Set BINDLESS_SAMPLERS_AND_IMAGES into s[10:11],
-			 * s[8:9] remains unused for now.
-			 */
-			assert(descs == &sctx->bindless_descriptors);
-			si_emit_shader_pointer(sctx, descs,
-					       R_00B330_SPI_SHADER_USER_DATA_ES_0);
-			si_emit_shader_pointer(sctx, descs,
-					       R_00B430_SPI_SHADER_USER_DATA_LS_0);
-		}
+		si_emit_shader_pointer(sctx, descs,
+				       R_00B430_SPI_SHADER_USER_DATA_LS_0);
 	} else {
 		si_emit_shader_pointer(sctx, descs,
 				       R_00B230_SPI_SHADER_USER_DATA_GS_0);
 		si_emit_shader_pointer(sctx, descs,
-				       R_00B330_SPI_SHADER_USER_DATA_ES_0);
-		si_emit_shader_pointer(sctx, descs,
 				       R_00B430_SPI_SHADER_USER_DATA_HS_0);
 		si_emit_shader_pointer(sctx, descs,
 				       R_00B530_SPI_SHADER_USER_DATA_LS_0);
 	}
 }
 
 void si_emit_graphics_shader_pointers(struct si_context *sctx,
                                       struct r600_atom *atom)
 {
 	unsigned mask;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4456c3d..62a056d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3024,25 +3024,27 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 		ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
 	}
 	ctx->return_value = ret;
 }
 
 /* Pass TCS inputs from LS to TCS on GFX9. */
 static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 {
 	LLVMValueRef ret = ctx->return_value;
 
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+
+	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
+					   8 + SI_SGPR_RW_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret,
 		ctx->param_bindless_samplers_and_images,
 		8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
 	ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
 				  8 + SI_SGPR_VS_STATE_BITS);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
 				  8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
 				  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
@@ -3065,25 +3067,26 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 	ret = si_insert_input_ret_float(ctx, ret,
 					ctx->param_tcs_rel_ids, vgpr++);
 	ctx->return_value = ret;
 }
 
 /* Pass GS inputs from ES to GS on GFX9. */
 static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 {
 	LLVMValueRef ret = ctx->return_value;
 
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
-
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+
+	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
+					   8 + SI_SGPR_RW_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret,
 		ctx->param_bindless_samplers_and_images,
 		8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
 	unsigned desc_param = ctx->param_vs_state_bits + 1;
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
 					   8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
 					   8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
 
@@ -4430,35 +4433,30 @@ static void create_function(struct si_shader_context *ctx)
 		 * placed after the user SGPRs.
 		 */
 		for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
 			returns[num_returns++] = ctx->i32; /* SGPRs */
 		for (i = 0; i < 11; i++)
 			returns[num_returns++] = ctx->f32; /* VGPRs */
 		break;
 
 	case SI_SHADER_MERGED_VERTEX_TESSCTRL:
 		/* Merged stages have 8 system SGPRs at the beginning. */
-		ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
-			add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
+		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_HI_HS */
 		ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 
-		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-
-		ctx->param_bindless_samplers_and_images =
-			add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v8i32, 0));
-
+		declare_global_desc_pointers(ctx, &fninfo);
 		declare_per_stage_desc_pointers(ctx, &fninfo,
 						ctx->type == PIPE_SHADER_VERTEX);
 		declare_vs_specific_input_sgprs(ctx, &fninfo);
 
 		ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
@@ -4488,35 +4486,30 @@ static void create_function(struct si_shader_context *ctx)
 			 */
 			for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
 				returns[num_returns++] = ctx->i32; /* SGPRs */
 			for (i = 0; i < 11; i++)
 				returns[num_returns++] = ctx->f32; /* VGPRs */
 		}
 		break;
 
 	case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
 		/* Merged stages have 8 system SGPRs at the beginning. */
-		ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
-			add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_LO_GS) */
+		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_HI_GS) */
 		ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
 
-		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-
-		ctx->param_bindless_samplers_and_images =
-			add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v8i32, 0));
-
+		declare_global_desc_pointers(ctx, &fninfo);
 		declare_per_stage_desc_pointers(ctx, &fninfo,
 						(ctx->type == PIPE_SHADER_VERTEX ||
 						 ctx->type == PIPE_SHADER_TESS_EVAL));
 		if (ctx->type == PIPE_SHADER_VERTEX) {
 			declare_vs_specific_input_sgprs(ctx, &fninfo);
 		} else {
 			/* TESS_EVAL (and also GEOMETRY):
 			 * Declare as many input SGPRs as the VS has. */
 			ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 			ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
-- 
2.7.4



More information about the mesa-dev mailing list