[Mesa-dev] [PATCH] radeonsi: Use build_buffer_load helper function for geometry shaders

Nicolai Hähnle nhaehnle at gmail.com
Fri Dec 16 11:05:04 UTC 2016


On 15.12.2016 21:35, Tom Stellard wrote:
> Also add a need_range_checks parameter to this function, which can be
> set to false to enable some additional optimizations.  Currently, this
> will cause the compiler to emit the llvm.SI.buffer.load.dword intrinsic
> instead of llvm.amdgcn.buffer.load.  Eventually, this information
> will be passed to LLVM to enable more agressive addressing mode optimizations.

This change mostly makes sense to me, but please rebase this on current 
master.

Nicolai

> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++--------------------
>  1 file changed, 29 insertions(+), 50 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 72cf827..5b15ad4 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -825,12 +825,13 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>                                        LLVMValueRef soffset,
>                                        unsigned inst_offset,
>                                        unsigned glc,
> -                                      unsigned slc)
> +                                      unsigned slc,
> +                                      bool need_range_checks)
>  {
>  	struct gallivm_state *gallivm = &ctx->gallivm;
>  	unsigned func = CLAMP(num_channels, 1, 3) - 1;
>
> -	if (HAVE_LLVM >= 0x309) {
> +	if (need_range_checks && HAVE_LLVM >= 0x309) {
>  		LLVMValueRef args[] = {
>  			LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""),
>  			vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
> @@ -896,7 +897,7 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>  static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>                                  enum tgsi_opcode_type type, unsigned swizzle,
>                                  LLVMValueRef buffer, LLVMValueRef offset,
> -                                LLVMValueRef base)
> +                                LLVMValueRef base, bool need_range_checks)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct gallivm_state *gallivm = bld_base->base.gallivm;
> @@ -906,14 +907,14 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>
>  	if (swizzle == ~0) {
>  		value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> -		                          0, 1, 0);
> +		                          0, 1, 0, need_range_checks);
>
>  		return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
>  	}
>
>  	if (!tgsi_type_is_64bit(type)) {
>  		value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> -		                          0, 1, 0);
> +		                          0, 1, 0, need_range_checks);
>
>  		value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
>  		return LLVMBuildExtractElement(gallivm->builder, value,
> @@ -921,10 +922,10 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>  	}
>
>  	value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> -	                          swizzle * 4, 1, 0);
> +	                          swizzle * 4, 1, 0, need_range_checks);
>
>  	value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> -	                           swizzle * 4 + 4, 1, 0);
> +	                           swizzle * 4 + 4, 1, 0, need_range_checks);
>
>  	return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
>  }
> @@ -1044,7 +1045,7 @@ static LLVMValueRef fetch_input_tes(
>  	base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
>  	addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
>
> -	return buffer_load(bld_base, type, swizzle, buffer, base, addr);
> +	return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
>  }
>
>  static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
> @@ -1125,13 +1126,12 @@ static LLVMValueRef fetch_input_gs(
>  	struct lp_build_context *uint =	&ctx->soa.bld_base.uint_bld;
>  	struct gallivm_state *gallivm = base->gallivm;
>  	LLVMValueRef vtx_offset;
> -	LLVMValueRef args[9];
>  	unsigned vtx_offset_param;
>  	struct tgsi_shader_info *info = &shader->selector->info;
>  	unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
>  	unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
>  	unsigned param;
> -	LLVMValueRef value;
> +	LLVMValueRef soffset, value;
>
>  	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
>  		return get_primitive_id(bld_base, swizzle);
> @@ -1163,27 +1163,15 @@ static LLVMValueRef fetch_input_gs(
>  				      4);
>
>  	param = si_shader_io_get_unique_index(semantic_name, semantic_index);
> -	args[0] = ctx->esgs_ring;
> -	args[1] = vtx_offset;
> -	args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
> -	args[3] = uint->zero;
> -	args[4] = uint->one;  /* OFFEN */
> -	args[5] = uint->zero; /* IDXEN */
> -	args[6] = uint->one;  /* GLC */
> -	args[7] = uint->zero; /* SLC */
> -	args[8] = uint->zero; /* TFE */
> -
> -	value = lp_build_intrinsic(gallivm->builder,
> -				   "llvm.SI.buffer.load.dword.i32.i32",
> -				   ctx->i32, args, 9,
> -				   LP_FUNC_ATTR_READONLY);
> +	soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
> +
> +	value = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
> +				  vtx_offset, soffset, 0, 1, 0, false);
>  	if (tgsi_type_is_64bit(type)) {
>  		LLVMValueRef value2;
> -		args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
> -		value2 = lp_build_intrinsic(gallivm->builder,
> -					    "llvm.SI.buffer.load.dword.i32.i32",
> -					    ctx->i32, args, 9,
> -					    LP_FUNC_ATTR_READONLY);
> +		soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
> +		value2 = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
> +					   vtx_offset, soffset, 0, 1, 0, false);
>  		return si_llvm_emit_fetch_64bit(bld_base, type,
>  						value, value2);
>  	}
> @@ -1709,7 +1697,7 @@ static void declare_system_value(
>  		                          lp_build_const_int32(gallivm, param));
>
>  		value = buffer_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
> -		                    ~0, buffer, base, addr);
> +		                    ~0, buffer, base, addr, true);
>
>  		break;
>  	}
> @@ -6420,7 +6408,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
>  	struct lp_build_context *uint = &bld_base->uint_bld;
>  	struct si_shader_output_values *outputs;
>  	struct tgsi_shader_info *gsinfo = &gs_selector->info;
> -	LLVMValueRef args[9];
> +	LLVMValueRef voffset;
>  	int i, r;
>
>  	outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
> @@ -6447,18 +6435,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
>  	create_function(&ctx);
>  	preload_ring_buffers(&ctx);
>
> -	args[0] = ctx.gsvs_ring[0];
> -	args[1] = lp_build_mul_imm(uint,
> -				   LLVMGetParam(ctx.main_fn,
> -						ctx.param_vertex_id),
> -				   4);
> -	args[3] = uint->zero;
> -	args[4] = uint->one;  /* OFFEN */
> -	args[5] = uint->zero; /* IDXEN */
> -	args[6] = uint->one;  /* GLC */
> -	args[7] = uint->one;  /* SLC */
> -	args[8] = uint->zero; /* TFE */
> -
>  	/* Fetch the vertex stream ID.*/
>  	LLVMValueRef stream_id;
>
> @@ -6468,6 +6444,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
>  		stream_id = uint->zero;
>
>  	/* Fill in output information. */
> +	voffset = lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
> +                                                      ctx.param_vertex_id), 4);
> +	/* Fetch vertex data from GSVS ring */
>  	for (i = 0; i < gsinfo->num_outputs; ++i) {
>  		outputs[i].semantic_name = gsinfo->output_semantic_name[i];
>  		outputs[i].semantic_index = gsinfo->output_semantic_index[i];
> @@ -6502,24 +6481,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
>  		offset = 0;
>  		for (i = 0; i < gsinfo->num_outputs; ++i) {
>  			for (unsigned chan = 0; chan < 4; chan++) {
> +				LLVMValueRef load, soffset;
>  				if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
>  				    outputs[i].vertex_stream[chan] != stream) {
>  					outputs[i].values[chan] = ctx.soa.bld_base.base.undef;
>  					continue;
>  				}
>
> -				args[2] = lp_build_const_int32(
> -					gallivm,
> +				soffset = lp_build_const_int32(gallivm,
>  					offset * gs_selector->gs_max_out_vertices * 16 * 4);
>  				offset++;
>
> +				load = build_buffer_load(&ctx, ctx.gsvs_ring[0], 1,
> +							 NULL, voffset, soffset,
> +							 0, 1, 1, false);
> +
>  				outputs[i].values[chan] =
>  					LLVMBuildBitCast(gallivm->builder,
> -						 lp_build_intrinsic(gallivm->builder,
> -								 "llvm.SI.buffer.load.dword.i32.i32",
> -								 ctx.i32, args, 9,
> -								 LP_FUNC_ATTR_READONLY),
> -						 ctx.f32, "");
> +							 load, ctx.f32, "");
>  			}
>  		}
>
>


More information about the mesa-dev mailing list