[Mesa-dev] [PATCH 2/2] radeonsi: Use buffer_load intrinsics instead of llvm.SI.vs.load.input
Nicolai Hähnle
nhaehnle at gmail.com
Wed Nov 16 10:13:45 UTC 2016
Have you looked at the shader-db impact?
I do think we should eventually do this, but llvm.SI.vs.load.input is
ReadNone while llvm.amdgcn.buffer.load.* is only ReadOnly, so as long as
we can't teach LLVM properly about no-aliasing and speculability, there
may be performance regressions.
Cheers,
Nicolai
On 16.11.2016 03:14, Tom Stellard wrote:
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 69 +++++++++++++++++++++++---------
> 1 file changed, 50 insertions(+), 19 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 306e12f..ee4fe2f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -82,6 +82,17 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
> static void si_build_ps_epilog_function(struct si_shader_context *ctx,
> union si_shader_part_key *key);
>
> +static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
> + LLVMValueRef rsrc,
> + int num_channels,
> + LLVMValueRef vindex,
> + LLVMValueRef voffset,
> + LLVMValueRef soffset,
> + unsigned inst_offset,
> + unsigned glc,
> + unsigned slc,
> + bool is_format);
> +
> /* Ideally pass the sample mask input to the PS epilog as v13, which
> * is its usual location, so that the shader doesn't have to add v_mov.
> */
> @@ -368,6 +379,31 @@ static LLVMValueRef get_instance_index_for_fetch(
> LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
> }
>
> +static LLVMValueRef build_vs_load_input(struct si_shader_context *ctx,
> + LLVMValueRef rsrc,
> + LLVMValueRef index,
> + LLVMValueRef offset) {
> +
> + struct lp_build_context *base = &ctx->soa.bld_base.base;
> + struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
> + struct gallivm_state *gallivm = base->gallivm;
> +
> + LLVMValueRef args[8];
> +
> + if (HAVE_LLVM < 0x0400) {
> + args[0] = rsrc;
> + args[1] = offset;
> + args[2] = index;
> +
> + return lp_build_intrinsic(gallivm->builder,
> + "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> + LP_FUNC_ATTR_READNONE);
> + }
> +
> + return build_buffer_load(ctx, rsrc, 4, index, offset,
> + uint->zero, 0, 0, 0, true);
> +}
> +
> static void declare_input_vs(
> struct si_shader_context *ctx,
> unsigned input_index,
> @@ -385,7 +421,6 @@ static void declare_input_vs(
> LLVMValueRef t_list;
> LLVMValueRef attribute_offset;
> LLVMValueRef buffer_index;
> - LLVMValueRef args[3];
> LLVMValueRef input;
>
> /* Load the T list */
> @@ -402,12 +437,8 @@ static void declare_input_vs(
> ctx->param_vertex_index0 +
> input_index);
>
> - args[0] = t_list;
> - args[1] = attribute_offset;
> - args[2] = buffer_index;
> - input = lp_build_intrinsic(gallivm->builder,
> - "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> - LP_FUNC_ATTR_READNONE);
> + input = build_vs_load_input(ctx, t_list, buffer_index,
> + attribute_offset);
>
> /* Break up the vec4 into individual components */
> for (chan = 0; chan < 4; chan++) {
> @@ -808,7 +839,8 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
> LLVMValueRef soffset,
> unsigned inst_offset,
> unsigned glc,
> - unsigned slc)
> + unsigned slc,
> + bool is_format)
> {
> struct gallivm_state *gallivm = &ctx->gallivm;
> unsigned func = CLAMP(num_channels, 1, 3) - 1;
> @@ -837,8 +869,8 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
> "");
> }
>
> - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
> - type_names[func]);
> + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s%s",
> + is_format ? "format." : "", type_names[func]);
>
> return lp_build_intrinsic(gallivm->builder, name, types[func], args,
> ARRAY_SIZE(args), LP_FUNC_ATTR_READONLY);
> @@ -889,14 +921,14 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>
> if (swizzle == ~0) {
> value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> - 0, 1, 0);
> + 0, 1, 0, false);
>
> return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> }
>
> if (!tgsi_type_is_64bit(type)) {
> value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> - 0, 1, 0);
> + 0, 1, 0, false);
>
> value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> return LLVMBuildExtractElement(gallivm->builder, value,
> @@ -904,10 +936,10 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
> }
>
> value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> - swizzle * 4, 1, 0);
> + swizzle * 4, 1, 0, false);
>
> value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> - swizzle * 4 + 4, 1, 0);
> + swizzle * 4 + 4, 1, 0, false);
>
> return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
> }
> @@ -4779,11 +4811,10 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
> const char *infix = "";
>
> if (target == TGSI_TEXTURE_BUFFER) {
> - emit_data->output[emit_data->chan] = lp_build_intrinsic(
> - base->gallivm->builder,
> - "llvm.SI.vs.load.input", emit_data->dst_type,
> - emit_data->args, emit_data->arg_count,
> - LP_FUNC_ATTR_READNONE);
> + emit_data->output[emit_data->chan] =
> + build_vs_load_input(ctx, emit_data->args[0],
> + emit_data->args[2],
> + emit_data->args[1]);
> return;
> }
>
>
More information about the mesa-dev
mailing list