[Mesa-dev] [PATCH 30/61] radeonsi/gfx9: move RW_BUFFERS to s[0:1] for merged shaders

Nicolai Hähnle nhaehnle at gmail.com
Fri Apr 28 10:32:45 UTC 2017


One small comment on patch 13. Apart from that, patches 9-30:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


On 24.04.2017 10:45, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 13 ++++++++----
>  src/gallium/drivers/radeonsi/si_shader.c      | 29 +++++++++++----------------
>  src/gallium/drivers/radeonsi/si_shader.h      |  3 +++
>  3 files changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index f04ed87..5b7298e 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1919,31 +1919,36 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx,
>  	uint32_t *sh_base = sctx->shader_userdata.sh_base;
>  	struct si_descriptors *descs;
>
>  	descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
>
>  	if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
>  		si_emit_shader_pointer(sctx, descs,
>  				       R_00B030_SPI_SHADER_USER_DATA_PS_0);
>  		si_emit_shader_pointer(sctx, descs,
>  				       R_00B130_SPI_SHADER_USER_DATA_VS_0);
> -		si_emit_shader_pointer(sctx, descs,
> -				       R_00B330_SPI_SHADER_USER_DATA_ES_0);
>
> -		/* GFX9 merged LS-HS and ES-GS. Only set RW_BUFFERS for ES and LS. */
>  		if (sctx->b.chip_class >= GFX9) {
> +			/* GFX9 merged LS-HS and ES-GS.
> +			 * Set RW_BUFFERS in the special registers, so that
> +			 * it's preloaded into s[0:1] instead of s[8:9].
> +			 */
>  			si_emit_shader_pointer(sctx, descs,
> -					       R_00B430_SPI_SHADER_USER_DATA_LS_0);
> +					       R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
> +			si_emit_shader_pointer(sctx, descs,
> +					       R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
>  		} else {
>  			si_emit_shader_pointer(sctx, descs,
>  					       R_00B230_SPI_SHADER_USER_DATA_GS_0);
>  			si_emit_shader_pointer(sctx, descs,
> +					       R_00B330_SPI_SHADER_USER_DATA_ES_0);
> +			si_emit_shader_pointer(sctx, descs,
>  					       R_00B430_SPI_SHADER_USER_DATA_HS_0);
>  		}
>  	}
>
>  	mask = sctx->shader_pointers_dirty &
>  	       u_bit_consecutive(SI_DESCS_FIRST_SHADER,
>  				 SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
>
>  	while (mask) {
>  		unsigned i = u_bit_scan(&mask);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 9c5dd5e..9e51622 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2672,32 +2672,31 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
>  	LLVMValueRef tf_soffset;
>  	unsigned vgpr;
>
>  	offchip_layout = LLVMGetParam(ctx->main_fn,
>  				      ctx->param_tcs_offchip_layout);
>  	offchip_soffset = LLVMGetParam(ctx->main_fn,
>  				       ctx->param_tcs_offchip_offset);
>  	tf_soffset = LLVMGetParam(ctx->main_fn,
>  				  ctx->param_tcs_factor_offset);
>
> +	ret = si_insert_input_ptr_as_2xi32(ctx, ret,
> +					   ctx->param_rw_buffers, 0);
> +
>  	if (ctx->screen->b.chip_class >= GFX9) {
> -		ret = si_insert_input_ptr_as_2xi32(ctx, ret,
> -						   ctx->param_rw_buffers, 8);
>  		ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
>  					   8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT, "");
>  		/* Tess offchip and tess factor offsets are at the beginning. */
>  		ret = LLVMBuildInsertValue(builder, ret, offchip_soffset, 2, "");
>  		ret = LLVMBuildInsertValue(builder, ret, tf_soffset, 4, "");
>  		vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT + 1;
>  	} else {
> -		ret = si_insert_input_ptr_as_2xi32(ctx, ret,
> -						   ctx->param_rw_buffers, 0);
>  		ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
>  					   GFX6_SGPR_TCS_OFFCHIP_LAYOUT, "");
>  		/* Tess offchip and tess factor offsets are after user SGPRs. */
>  		ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
>  					   GFX6_TCS_NUM_USER_SGPR, "");
>  		ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
>  					   GFX6_TCS_NUM_USER_SGPR + 1, "");
>  		vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
>  	}
>
> @@ -2710,28 +2709,26 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
>  	ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
>  	ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
>  	ctx->return_value = ret;
>  }
>
>  /* Pass TCS inputs from LS to TCS on GFX9. */
>  static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
>  {
>  	LLVMValueRef ret = ctx->return_value;
>
> +	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
>
> -	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
> -					   8 + SI_SGPR_RW_BUFFERS);
> -
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
>  				  8 + SI_SGPR_VS_STATE_BITS);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
>  				  8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
>  				  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
>  				  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
>
>  	unsigned desc_param = ctx->param_tcs_out_lds_layout + 2;
> @@ -5858,31 +5855,31 @@ static void create_function(struct si_shader_context *ctx)
>  		 * placed after the user SGPRs.
>  		 */
>  		for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
>  			returns[num_returns++] = ctx->i32; /* SGPRs */
>  		for (i = 0; i < 3; i++)
>  			returns[num_returns++] = ctx->f32; /* VGPRs */
>  		break;
>
>  	case SI_SHADER_MERGED_VERTEX_TESSCTRL:
>  		/* Merged stages have 8 system SGPRs at the beginning. */
> -		params[num_params++] = ctx->i32; /* unused */
> -		params[num_params++] = ctx->i32; /* unused */
> +		params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
> +			const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
>  		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
>  		params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
>  		params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32; /* unused */
>  		params[num_params++] = ctx->i32; /* unused */
>
> -		params[ctx->param_rw_buffers = num_params++] =
> -			const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
> +		params[num_params++] = ctx->i32; /* unused */
> +		params[num_params++] = ctx->i32; /* unused */
>  		declare_per_stage_desc_pointers(ctx, params, &num_params,
>  						ctx->type == PIPE_SHADER_VERTEX);
>  		declare_vs_specific_input_sgprs(ctx, params, &num_params);
>
>  		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32; /* unused */
>
>  		declare_per_stage_desc_pointers(ctx, params, &num_params,
> @@ -8459,33 +8456,31 @@ static bool si_shader_select_tes_parts(struct si_screen *sscreen,
>  static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
>  					 union si_shader_part_key *key)
>  {
>  	struct gallivm_state *gallivm = &ctx->gallivm;
>  	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
>  	LLVMTypeRef params[32];
>  	LLVMValueRef func;
>  	int last_sgpr, num_params = 0;
>
>  	/* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
> +	params[ctx->param_rw_buffers = num_params++] =
> +		const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
> +
>  	if (ctx->screen->b.chip_class >= GFX9) {
> -		params[num_params++] = ctx->i32;
> -		params[num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32; /* wave info */
>  		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
> -	}
> -	params[ctx->param_rw_buffers = num_params++] =
> -		const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
> -	if (ctx->screen->b.chip_class >= GFX9) {
> +		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index e24b8b8..39eee86 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -73,20 +73,23 @@
>  #include "tgsi/tgsi_scan.h"
>  #include "util/u_queue.h"
>  #include "si_state.h"
>
>  struct ac_shader_binary;
>
>  #define SI_MAX_VS_OUTPUTS	40
>
>  /* SGPR user data indices */
>  enum {
> +	/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
> +	 * and these two are used for other purposes.
> +	 */
>  	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
>  	SI_SGPR_RW_BUFFERS_HI,
>  	SI_SGPR_CONST_BUFFERS,
>  	SI_SGPR_CONST_BUFFERS_HI,
>  	SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
>  	SI_SGPR_SAMPLERS_HI,
>  	SI_SGPR_IMAGES,
>  	SI_SGPR_IMAGES_HI,
>  	SI_SGPR_SHADER_BUFFERS,
>  	SI_SGPR_SHADER_BUFFERS_HI,
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list