[Mesa-dev] [PATCH 04/14] radeonsi: Add buffer load functions.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Tue May 10 16:36:10 UTC 2016
On Tue, May 10, 2016 at 6:28 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 10.05.2016 11:25, Bas Nieuwenhuizen wrote:
>>
>> On Tue, May 10, 2016 at 6:13 PM, Nicolai Hähnle <nhaehnle at gmail.com>
>> wrote:
>>>
>>> On 10.05.2016 05:52, Bas Nieuwenhuizen wrote:
>>>>
>>>>
>>>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>>>> ---
>>>> src/gallium/drivers/radeonsi/si_shader.c | 81
>>>> ++++++++++++++++++++++++++++++++
>>>> 1 file changed, 81 insertions(+)
>>>>
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>> index 5897149..d3df4d6 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>> @@ -733,6 +733,87 @@ static void build_tbuffer_store_dwords(struct
>>>> si_shader_context *ctx,
>>>> V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1,
>>>> 0);
>>>> }
>>>>
>>>> +static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
>>>> + LLVMValueRef rsrc,
>>>> + int num_channels,
>>>> + LLVMValueRef vindex,
>>>> + LLVMValueRef voffset,
>>>> + LLVMValueRef soffset,
>>>> + unsigned inst_offset,
>>>> + unsigned glc,
>>>> + unsigned slc)
>>>> +{
>>>> + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
>>>> + LLVMValueRef args[] = {
>>>> + LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v16i8,
>>>> ""),
>>>> + voffset ? voffset : vindex,
>>>> + soffset,
>>>> + LLVMConstInt(ctx->i32, inst_offset, 0),
>>>> + LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
>>>> + LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
>>>> + LLVMConstInt(ctx->i32, glc, 0),
>>>> + LLVMConstInt(ctx->i32, slc, 0),
>>>> + LLVMConstInt(ctx->i32, 0, 0), // TFE
>>>> + };
>>>> +
>>>> + unsigned func = CLAMP(num_channels, 1, 3) - 1;
>>>> + LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
>>>> ctx->v4i32};
>>>> + const char *type_names[] = {"i32", "v2i32", "v4i32"};
>>>> + const char *arg_type = "i32";
>>>> +
>>>> + if (voffset && vindex) {
>>>> + LLVMValueRef vaddr[] = {vindex, voffset};
>>>> +
>>>> + arg_type = "v2i32";
>>>> + args[1] = lp_build_gather_values(gallivm, vaddr, 2);
>>>> + }
>>>> +
>>>> + char name[256];
>>>> + snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
>>>> + type_names[func], arg_type);
>>>
>>>
>>>
>>> We're generally trying to get away from the llvm.SI.* intrinsics and use
>>> the
>>> llvm.amdgcn.* intrinsic instead - in this case llvm.amdgcn.buffer.load.
>>
>>
>> The llvm.amdgcn.buffer.load doesn't allow specifiying VGPR + SGPR +
>> immediate offset separately though as far as I can see. Furthermore I
>> was trying to avoid a LLVM 3.9 dependency, although I can solve that
>> with if based on LLVM version.
>
>
> Fair enough on the LLVM version dependency.
>
> I also think you're right about llvm.amdgcn.buffer.load, but that's
> something that should be fixed on the LLVM side eventually without
> introducing a new intrinsic.
I am not sure if LLVM can eventually be able to. I don't know if the
vgpr + sgpr + immediate offset is wrapped around in 32-bit. Otherwise
a v_add and the offsets have different behavior and LLVM should not
sink the add into the load instruction.
- Bas
>
> I think we should go for the if (HAVE_LLVM) approach even if it produces
> slightly worse code for now (it really should be only one additional v_add
> at most).
>
> Nicolai
>
>
>> - Bas
>>
>>>
>>> Nicolai
>>>
>>>
>>>> +
>>>> + return lp_build_intrinsic(gallivm->builder, name, types[func],
>>>> args,
>>>> + ARRAY_SIZE(args),
>>>> LLVMReadOnlyAttribute
>>>> |
>>>> +
>>>> LLVMNoUnwindAttribute);
>>>> +}
>>>> +
>>>> +static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
>>>> + enum tgsi_opcode_type type, unsigned
>>>> swizzle,
>>>> + LLVMValueRef buffer, LLVMValueRef
>>>> offset,
>>>> + LLVMValueRef base)
>>>> +{
>>>> + struct si_shader_context *ctx = si_shader_context(bld_base);
>>>> + struct gallivm_state *gallivm = bld_base->base.gallivm;
>>>> + LLVMValueRef value, value2;
>>>> + LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
>>>> + LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
>>>> +
>>>> + if (swizzle == ~0) {
>>>> +
>>>> + value = build_buffer_load(ctx, buffer, 4, NULL, base,
>>>> offset,
>>>> + 0, 1, 0);
>>>> +
>>>> + return LLVMBuildBitCast(gallivm->builder, value,
>>>> vec_type,
>>>> "");
>>>> + }
>>>> +
>>>> + if (type != TGSI_TYPE_DOUBLE) {
>>>> + value = build_buffer_load(ctx, buffer, 4, NULL, base,
>>>> offset,
>>>> + 0, 1, 0);
>>>> +
>>>> + value = LLVMBuildBitCast(gallivm->builder, value,
>>>> vec_type, "");
>>>> + return LLVMBuildExtractElement(gallivm->builder, value,
>>>> + lp_build_const_int32(gallivm,
>>>> swizzle), "");
>>>> + }
>>>> +
>>>> + value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
>>>> + swizzle * 4, 1, 0);
>>>> +
>>>> + value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
>>>> + swizzle * 4 + 4, 1, 0);
>>>> +
>>>> + return radeon_llvm_emit_fetch_double(bld_base, value, value2);
>>>> +}
>>>> +
>>>> /**
>>>> * Load from LDS.
>>>> *
>>>>
>>>
>
More information about the mesa-dev
mailing list