[Mesa-dev] [PATCH] radv: Use structured intrinsics instead of indexing workaround for GFX9.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Fri Nov 16 21:30:36 UTC 2018


On Fri, Nov 16, 2018 at 9:34 AM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
>
>
> On 11/12/18 10:51 PM, Bas Nieuwenhuizen wrote:
> > These force the index to be used in the instruction so we don't need the
> > workaround.
> >
> > Totals:
> > SGPRS: 1321642 -> 1321802 (0.01 %)
> > VGPRS: 943664 -> 943788 (0.01 %)
> > Spilled SGPRs: 28468 -> 28480 (0.04 %)
> > Spilled VGPRs: 88 -> 89 (1.14 %)
> > Private memory VGPRs: 0 -> 0 (0.00 %)
> > Scratch size: 80 -> 80 (0.00 %) dwords per thread
> > Code Size: 52415292 -> 52338932 (-0.15 %) bytes
> > LDS: 400 -> 400 (0.00 %) blocks
> > Max Waves: 233903 -> 233803 (-0.04 %)
> > Wait states: 0 -> 0 (0.00 %)
> >
> > Totals from affected shaders:
> > SGPRS: 238344 -> 238504 (0.07 %)
> > VGPRS: 232732 -> 232856 (0.05 %)
> > Spilled SGPRs: 13125 -> 13137 (0.09 %)
> > Spilled VGPRs: 88 -> 89 (1.14 %)
> > Private memory VGPRs: 0 -> 0 (0.00 %)
> > Scratch size: 80 -> 80 (0.00 %) dwords per thread
> > Code Size: 15752712 -> 15676352 (-0.48 %) bytes
> > LDS: 139 -> 139 (0.00 %) blocks
> > Max Waves: 31680 -> 31580 (-0.32 %)
> > Wait states: 0 -> 0 (0.00 %)
> > ---
> >   src/amd/common/ac_llvm_build.c    | 52 +++++++++++++++++++++++++++++++
> >   src/amd/common/ac_nir_to_llvm.c   | 29 ++++++++++++-----
> >   src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
> >   3 files changed, 75 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> > index 1392ec0f238..22245aadba1 100644
> > --- a/src/amd/common/ac_llvm_build.c
> > +++ b/src/amd/common/ac_llvm_build.c
> > @@ -1161,6 +1161,47 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
> >                                 ac_get_load_intr_attribs(can_speculate));
> >   }
> >
> > +static LLVMValueRef
> > +ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
> > +                               LLVMValueRef rsrc,
> > +                               LLVMValueRef vindex,
> > +                               LLVMValueRef voffset,
> > +                               LLVMValueRef soffset,
> > +                               unsigned num_channels,
> > +                               bool glc,
> > +                               bool slc,
> > +                               bool can_speculate,
> > +                               bool use_format,
> > +                               bool structurized)
> > +{
> > +     LLVMValueRef args[5];
> > +     int idx = 0;
> > +     args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
> > +     if (structurized)
> > +             args[idx++] = vindex ? vindex : ctx->i32_0;
> > +     args[idx++] = voffset ? voffset : ctx->i32_0;
> > +     args[idx++] = soffset ? soffset : ctx->i32_0;
> > +     args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
> > +     unsigned func = CLAMP(num_channels, 1, 3) - 1;
> > +
> > +     LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
> > +     const char *type_names[] = {"f32", "v2f32", "v4f32"};
> > +     const char *indexing_kind = structurized ? "struct" : "raw";
> > +     char name[256];
> > +
> > +     if (use_format) {
> > +             snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
> > +                      indexing_kind, type_names[func]);
> > +     } else {
> > +             snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
> > +                      indexing_kind, type_names[func]);
> > +     }
> > +
> > +     return ac_build_intrinsic(ctx, name, types[func], args,
> > +                               idx,
> > +                               ac_get_load_intr_attribs(can_speculate));
> > +}
> > +
> >   LLVMValueRef
> >   ac_build_buffer_load(struct ac_llvm_context *ctx,
> >                    LLVMValueRef rsrc,
> > @@ -1218,6 +1259,11 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> >                                        bool glc,
> >                                        bool can_speculate)
> >   {
> > +     if (HAVE_LLVM >= 0x800) {
> > +             return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
> > +                                                      num_channels, glc, false,
> > +                                                      can_speculate, true, true);
> > +     }
> >       return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
> >                                          num_channels, glc, false,
> >                                          can_speculate, true);
> > @@ -1231,6 +1277,12 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
> >                                                     bool glc,
> >                                                     bool can_speculate)
> >   {
> > +     if (HAVE_LLVM >= 0x800) {
> > +             return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
> > +                                                      num_channels, glc, false,
> > +                                                      can_speculate, true, true);
> > +     }
> > +
> >       LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
> >       LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, "");
> >       stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
> > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> > index c950b81dca2..a19e66fe2a0 100644
> > --- a/src/amd/common/ac_nir_to_llvm.c
> > +++ b/src/amd/common/ac_nir_to_llvm.c
> > @@ -2387,10 +2387,17 @@ static void visit_image_store(struct ac_nir_context *ctx,
> >               params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
> >                                                   ctx->ac.i32_0, ""); /* vindex */
> >               params[3] = ctx->ac.i32_0; /* voffset */
> > -             params[4] = glc;  /* glc */
> > -             params[5] = ctx->ac.i1false;  /* slc */
> > -             ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
> > -                                params, 6, 0);
> > +             if (HAVE_LLVM >= 0x800) {
> > +                     params[4] = ctx->ac.i32_0; /* soffset */
> > +                     params[5] = glc ? ctx->ac.i32_1 : ctx->ac.i32_0;
> > +                     ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.struct.buffer.store.format.v4f32", ctx->ac.voidt,
> > +                                        params, 6, 0);
>
> No SLC for this one?

GLC + SLC are combined into a single arg as bitfield (1 = glc, 2 =
slc), no mention of slc since it is always false.
>
> > +             } else {
> > +                     params[4] = glc;  /* glc */
> > +                     params[5] = ctx->ac.i1false;  /* slc */
> > +                     ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
> > +                                        params, 6, 0);
> > +             }
> >       } else {
> >               struct ac_image_args args = {};
> >               args.opcode = ac_image_store;
> > @@ -2470,10 +2477,18 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
> >               params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
> >                                                               ctx->ac.i32_0, ""); /* vindex */
> >               params[param_count++] = ctx->ac.i32_0; /* voffset */
> > -             params[param_count++] = ctx->ac.i1false;  /* slc */
> > +             if (HAVE_LLVM >= 0x800) {
> > +                     params[param_count++] = ctx->ac.i32_0; /* soffset */
> > +                     params[param_count++] = ctx->ac.i32_0;  /* slc */
> >
> > -             length = snprintf(intrinsic_name, sizeof(intrinsic_name),
> > -                               "llvm.amdgcn.buffer.atomic.%s", atomic_name);
> > +                     length = snprintf(intrinsic_name, sizeof(intrinsic_name),
> > +                                       "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
> > +             } else {
> > +                     params[param_count++] = ctx->ac.i1false;  /* slc */
> > +
> > +                     length = snprintf(intrinsic_name, sizeof(intrinsic_name),
> > +                                       "llvm.amdgcn.buffer.atomic.%s", atomic_name);
> > +             }
> >
> >               assert(length < sizeof(intrinsic_name));
> >               return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
> > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> > index f56eb01dc52..2e6f88ac342 100644
> > --- a/src/amd/vulkan/radv_nir_to_llvm.c
> > +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> > @@ -3500,7 +3500,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
> >       ctx.abi.load_sampler_desc = radv_get_sampler_desc;
> >       ctx.abi.load_resource = radv_load_resource;
> >       ctx.abi.clamp_shadow_reference = false;
> > -     ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
> > +     ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
> >
> >       if (shader_count >= 2)
> >               ac_init_exec_full_mask(&ctx.ac);
> >


More information about the mesa-dev mailing list