[Mesa-dev] [PATCH] ac/nir: replace SI.buffer.load.dword with amdgcn.buffer.load

Bas Nieuwenhuizen basni at chromium.org
Thu Feb 1 16:08:26 UTC 2018


I'm surprised it does not do a idxen and swizzling, but with the current
code, this is

Reviewed-by:  Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

On Thu, Feb 1, 2018 at 4:37 PM, Samuel Pitoiset <samuel.pitoiset at gmail.com>
wrote:

> The old one generates useless instructions in there, found while
> comparing geometry shaders between RadeonSI and RADV.
>
> This improves all Vulkan demos that use geometry shaders, +4%
> for deferredshadows, +9% for viewportarray, +7% for
> geometryshader on Polaris10.
>
> This seems to also improve DOW3 a little bit (+1%).
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c | 52 ++++++++++++++++--------------
> -----------
>  1 file changed, 20 insertions(+), 32 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_
> llvm.c
> index 5c5594956b..f89012bfe9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3063,7 +3063,6 @@ load_gs_input(struct ac_shader_abi *abi,
>  {
>         struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(
> abi);
>         LLVMValueRef vtx_offset;
> -       LLVMValueRef args[9];
>         unsigned param, vtx_offset_param;
>         LLVMValueRef value[4], result;
>
> @@ -3081,20 +3080,16 @@ load_gs_input(struct ac_shader_abi *abi,
>                                                LLVMConstInt(ctx->ac.i32,
> param * 4 + i + const_index, 0), "");
>                         value[i] = ac_lds_load(&ctx->ac, dw_addr);
>                 } else {
> -                       args[0] = ctx->esgs_ring;
> -                       args[1] = vtx_offset;
> -                       args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i
> + const_index) * 256, false);
> -                       args[3] = ctx->ac.i32_0;
> -                       args[4] = ctx->ac.i32_1; /* OFFEN */
> -                       args[5] = ctx->ac.i32_0; /* IDXEN */
> -                       args[6] = ctx->ac.i32_1; /* GLC */
> -                       args[7] = ctx->ac.i32_0; /* SLC */
> -                       args[8] = ctx->ac.i32_0; /* TFE */
> -
> -                       value[i] = ac_build_intrinsic(&ctx->ac,
> "llvm.SI.buffer.load.dword.i32.i32",
> -                                                     ctx->ac.i32, args, 9,
> -
>  AC_FUNC_ATTR_READONLY |
> -                                                     AC_FUNC_ATTR_LEGACY);
> +                       LLVMValueRef soffset =
> +                               LLVMConstInt(ctx->ac.i32,
> +                                            (param * 4 + i + const_index)
> * 256,
> +                                            false);
> +
> +                       value[i] = ac_build_buffer_load(&ctx->ac,
> +                                                       ctx->esgs_ring, 1,
> +                                                       ctx->ac.i32_0,
> +                                                       vtx_offset,
> soffset,
> +                                                       0, 1, 0, true,
> false);
>                 }
>         }
>         result = ac_build_varying_gather_values(&ctx->ac, value,
> num_components, component);
> @@ -7213,16 +7208,9 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
>  static void
>  ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
>  {
> -       LLVMValueRef args[9];
> -       args[0] = ctx->gsvs_ring;
> -       args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id,
> LLVMConstInt(ctx->ac.i32, 4, false), "");
> -       args[3] = ctx->ac.i32_0;
> -       args[4] = ctx->ac.i32_1;  /* OFFEN */
> -       args[5] = ctx->ac.i32_0; /* IDXEN */
> -       args[6] = ctx->ac.i32_1;  /* GLC */
> -       args[7] = ctx->ac.i32_1;  /* SLC */
> -       args[8] = ctx->ac.i32_0; /* TFE */
> -
> +       LLVMValueRef vtx_offset =
> +               LLVMBuildMul(ctx->builder, ctx->abi.vertex_id,
> +                            LLVMConstInt(ctx->ac.i32, 4, false), "");
>         int idx = 0;
>
>         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> @@ -7240,16 +7228,16 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context
> *ctx)
>                 }
>
>                 for (unsigned j = 0; j < length; j++) {
> -                       LLVMValueRef value;
> -                       args[2] = LLVMConstInt(ctx->ac.i32,
> +                       LLVMValueRef value, soffset;
> +
> +                       soffset = LLVMConstInt(ctx->ac.i32,
>                                                (slot * 4 + j) *
>                                                ctx->gs_max_out_vertices *
> 16 * 4, false);
>
> -                       value = ac_build_intrinsic(&ctx->ac,
> -
> "llvm.SI.buffer.load.dword.i32.i32",
> -                                                  ctx->ac.i32, args, 9,
> -                                                  AC_FUNC_ATTR_READONLY |
> -                                                  AC_FUNC_ATTR_LEGACY);
> +                       value = ac_build_buffer_load(&ctx->ac,
> ctx->gsvs_ring,
> +                                                    1, ctx->ac.i32_0,
> +                                                    vtx_offset, soffset,
> +                                                    0, 1, 1, true, false);
>
>                         LLVMBuildStore(ctx->builder,
>                                        ac_to_float(&ctx->ac, value),
> ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
> --
> 2.16.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180201/c9008b3f/attachment.html>


More information about the mesa-dev mailing list