[Mesa-dev] [PATCH 2/2] radeonsi: Use buffer_load intrinsics instead of llvm.SI.vs.load.input
Nicolai Hähnle
nhaehnle at gmail.com
Wed Nov 16 21:08:29 UTC 2016
On 16.11.2016 16:38, Tom Stellard wrote:
> On Wed, Nov 16, 2016 at 11:13:45AM +0100, Nicolai Hähnle wrote:
>> Have you looked at the shader-db impact?
>>
>
> shader-db is mostly unchanged. There are a few decreases in SGPR usage and
> code size, and a 4 byte increase in code size for one shader.
Okay, in that case you can add my R-b to this patch as well.
>> I do think we should eventually do this, but llvm.SI.vs.load.input is
>> ReadNone while llvm.amdgcn.buffer.load.* is only ReadOnly, so as long as we
>> can't teach LLVM properly about no-aliasing and speculability, there may be
>> performance regressions.
>>
>
> Ideally llvm.amdgcn.buffer.load.* would be ReadOnly and ArgMemOnly, but I think
> as long as it has non-pointer arguments this combination behaves the same as
> ReadNone, which would be incorrect.
Agreed. This is something that the "fat" pointers would help with, right?
Cheers,
Nicolai
>
> -Tom
>
>> Cheers,
>> Nicolai
>>
>> On 16.11.2016 03:14, Tom Stellard wrote:
>>> ---
>>> src/gallium/drivers/radeonsi/si_shader.c | 69 +++++++++++++++++++++++---------
>>> 1 file changed, 50 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
>>> index 306e12f..ee4fe2f 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>> @@ -82,6 +82,17 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
>>> static void si_build_ps_epilog_function(struct si_shader_context *ctx,
>>> union si_shader_part_key *key);
>>>
>>> +static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>>> + LLVMValueRef rsrc,
>>> + int num_channels,
>>> + LLVMValueRef vindex,
>>> + LLVMValueRef voffset,
>>> + LLVMValueRef soffset,
>>> + unsigned inst_offset,
>>> + unsigned glc,
>>> + unsigned slc,
>>> + bool is_format);
>>> +
>>> /* Ideally pass the sample mask input to the PS epilog as v13, which
>>> * is its usual location, so that the shader doesn't have to add v_mov.
>>> */
>>> @@ -368,6 +379,31 @@ static LLVMValueRef get_instance_index_for_fetch(
>>> LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
>>> }
>>>
>>> +static LLVMValueRef build_vs_load_input(struct si_shader_context *ctx,
>>> + LLVMValueRef rsrc,
>>> + LLVMValueRef index,
>>> + LLVMValueRef offset) {
>>> +
>>> + struct lp_build_context *base = &ctx->soa.bld_base.base;
>>> + struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
>>> + struct gallivm_state *gallivm = base->gallivm;
>>> +
>>> + LLVMValueRef args[8];
>>> +
>>> + if (HAVE_LLVM < 0x0400) {
>>> + args[0] = rsrc;
>>> + args[1] = offset;
>>> + args[2] = index;
>>> +
>>> + return lp_build_intrinsic(gallivm->builder,
>>> + "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
>>> + LP_FUNC_ATTR_READNONE);
>>> + }
>>> +
>>> + return build_buffer_load(ctx, rsrc, 4, index, offset,
>>> + uint->zero, 0, 0, 0, true);
>>> +}
>>> +
>>> static void declare_input_vs(
>>> struct si_shader_context *ctx,
>>> unsigned input_index,
>>> @@ -385,7 +421,6 @@ static void declare_input_vs(
>>> LLVMValueRef t_list;
>>> LLVMValueRef attribute_offset;
>>> LLVMValueRef buffer_index;
>>> - LLVMValueRef args[3];
>>> LLVMValueRef input;
>>>
>>> /* Load the T list */
>>> @@ -402,12 +437,8 @@ static void declare_input_vs(
>>> ctx->param_vertex_index0 +
>>> input_index);
>>>
>>> - args[0] = t_list;
>>> - args[1] = attribute_offset;
>>> - args[2] = buffer_index;
>>> - input = lp_build_intrinsic(gallivm->builder,
>>> - "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
>>> - LP_FUNC_ATTR_READNONE);
>>> + input = build_vs_load_input(ctx, t_list, buffer_index,
>>> + attribute_offset);
>>>
>>> /* Break up the vec4 into individual components */
>>> for (chan = 0; chan < 4; chan++) {
>>> @@ -808,7 +839,8 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>>> LLVMValueRef soffset,
>>> unsigned inst_offset,
>>> unsigned glc,
>>> - unsigned slc)
>>> + unsigned slc,
>>> + bool is_format)
>>> {
>>> struct gallivm_state *gallivm = &ctx->gallivm;
>>> unsigned func = CLAMP(num_channels, 1, 3) - 1;
>>> @@ -837,8 +869,8 @@ static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>>> "");
>>> }
>>>
>>> - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
>>> - type_names[func]);
>>> + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s%s",
>>> + is_format ? "format." : "", type_names[func]);
>>>
>>> return lp_build_intrinsic(gallivm->builder, name, types[func], args,
>>> ARRAY_SIZE(args), LP_FUNC_ATTR_READONLY);
>>> @@ -889,14 +921,14 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>>>
>>> if (swizzle == ~0) {
>>> value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
>>> - 0, 1, 0);
>>> + 0, 1, 0, false);
>>>
>>> return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
>>> }
>>>
>>> if (!tgsi_type_is_64bit(type)) {
>>> value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
>>> - 0, 1, 0);
>>> + 0, 1, 0, false);
>>>
>>> value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
>>> return LLVMBuildExtractElement(gallivm->builder, value,
>>> @@ -904,10 +936,10 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>>> }
>>>
>>> value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
>>> - swizzle * 4, 1, 0);
>>> + swizzle * 4, 1, 0, false);
>>>
>>> value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
>>> - swizzle * 4 + 4, 1, 0);
>>> + swizzle * 4 + 4, 1, 0, false);
>>>
>>> return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
>>> }
>>> @@ -4779,11 +4811,10 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
>>> const char *infix = "";
>>>
>>> if (target == TGSI_TEXTURE_BUFFER) {
>>> - emit_data->output[emit_data->chan] = lp_build_intrinsic(
>>> - base->gallivm->builder,
>>> - "llvm.SI.vs.load.input", emit_data->dst_type,
>>> - emit_data->args, emit_data->arg_count,
>>> - LP_FUNC_ATTR_READNONE);
>>> + emit_data->output[emit_data->chan] =
>>> + build_vs_load_input(ctx, emit_data->args[0],
>>> + emit_data->args[2],
>>> + emit_data->args[1]);
>>> return;
>>> }
>>>
>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list