[Mesa-dev] [PATCH] radeonsi: don't preload constants at the beginning of shaders

Nicolai Hähnle nhaehnle at gmail.com
Sat Sep 10 11:13:33 UTC 2016


On 10.09.2016 00:40, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> LLVM can CSE the loads, thus we can always re-load constants before each
> use. The decrease in SGPR spilling is huge.
>
> The best improvements are the dumbest ones.

Indeed :)

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

>
> 26011 shaders in 14651 tests
> Totals:
> SGPRS: 1453346 -> 1251920 (-13.86 %)
> VGPRS: 742576 -> 728421 (-1.91 %)
> Spilled SGPRs: 52298 -> 16644 (-68.17 %)
> Spilled VGPRs: 397 -> 369 (-7.05 %)
> Scratch VGPRs: 1372 -> 1344 (-2.04 %) dwords per thread
> Code Size: 36136488 -> 36001064 (-0.37 %) bytes
> LDS: 767 -> 767 (0.00 %) blocks
> Max Waves: 219315 -> 222221 (1.33 %)
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++++-------------------
>  1 file changed, 11 insertions(+), 19 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 0b7de18..08e3cee 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1874,26 +1874,33 @@ static LLVMValueRef fetch_constant(
>  		for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
>  			values[chan] = fetch_constant(bld_base, reg, type, chan);
>
>  		return lp_build_gather_values(bld_base->base.gallivm, values, 4);
>  	}
>
>  	buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
>  	idx = reg->Register.Index * 4 + swizzle;
>
>  	if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
> +		LLVMValueRef c0, c1;
> +
> +		c0 = buffer_load_const(ctx, ctx->const_buffers[buf],
> +				       LLVMConstInt(ctx->i32, idx * 4, 0));
> +
>  		if (!tgsi_type_is_64bit(type))
> -			return bitcast(bld_base, type, ctx->constants[buf][idx]);
> +			return bitcast(bld_base, type, c0);
>  		else {
> +			c1 = buffer_load_const(ctx, ctx->const_buffers[buf],
> +					       LLVMConstInt(ctx->i32,
> +							    (idx + 1) * 4, 0));
>  			return radeon_llvm_emit_fetch_64bit(bld_base, type,
> -							    ctx->constants[buf][idx],
> -							    ctx->constants[buf][idx + 1]);
> +							    c0, c1);
>  		}
>  	}
>
>  	if (reg->Register.Dimension && reg->Dimension.Indirect) {
>  		LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
>  		LLVMValueRef index;
>  		index = get_bounded_indirect_index(ctx, &reg->DimIndirect,
>  						   reg->Dimension.Index,
>  						   SI_NUM_CONST_BUFFERS);
>  		bufp = build_indexed_load_const(ctx, ptr, index);
> @@ -5789,39 +5796,26 @@ static void create_function(struct si_shader_context *ctx)
>
>  static void preload_constants(struct si_shader_context *ctx)
>  {
>  	struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
>  	const struct tgsi_shader_info *info = bld_base->info;
>  	unsigned buf;
>  	LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
>
>  	for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
> -		unsigned i, num_const = info->const_file_max[buf] + 1;
> -
> -		if (num_const == 0)
> +		if (info->const_file_max[buf] == -1)
>  			continue;
>
> -		/* Allocate space for the constant values */
> -		ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
> -
>  		/* Load the resource descriptor */
>  		ctx->const_buffers[buf] =
>  			build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf));
> -
> -		/* Load the constants, we rely on the code sinking to do the rest */
> -		for (i = 0; i < num_const * 4; ++i) {
> -			ctx->constants[buf][i] =
> -				buffer_load_const(ctx,
> -					ctx->const_buffers[buf],
> -					lp_build_const_int32(gallivm, i * 4));
> -		}
>  	}
>  }
>
>  static void preload_shader_buffers(struct si_shader_context *ctx)
>  {
>  	struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
>  	LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS);
>  	int buf, maxbuf;
>
>  	maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
> @@ -6898,22 +6892,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
>  		ctx.shader = shader->gs_copy_shader;
>  		if ((r = si_generate_gs_copy_shader(sscreen, &ctx,
>  						    shader, debug))) {
>  			free(shader->gs_copy_shader);
>  			shader->gs_copy_shader = NULL;
>  			goto out;
>  		}
>  	}
>
>  out:
> -	for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
> -		FREE(ctx.constants[i]);
>  	return r;
>  }
>
>  /**
>   * Create, compile and return a shader part (prolog or epilog).
>   *
>   * \param sscreen	screen
>   * \param list		list of shader parts of the same category
>   * \param key		shader part key
>   * \param tm		LLVM target machine
>


More information about the mesa-dev mailing list