[Mesa-dev] [PATCH 3/4] radeonsi: create load_const_buffer_desc_fast_path() helper

Marek Olšák maraeo at gmail.com
Fri Mar 30 02:29:44 UTC 2018


The driver should ignore the nir option on SI with LLVM <= 0x0500.

Marek

On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:

> This will be shared by the TGSI and NIR backends. For simplicity
> we leave the SI LLVM 5.0 and lower work around only in the TGSI
> backend.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 88
> ++++++++++++++++++--------------
>  1 file changed, 49 insertions(+), 39 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index d5607a99d32..62cb7ea7eb5 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2322,6 +2322,49 @@ void si_tgsi_declare_compute_memory(struct
> si_shader_context *ctx,
>         si_declare_compute_memory(ctx);
>  }
>
> +static LLVMValueRef load_const_buffer_desc_fast_path(struct
> si_shader_context *ctx)
> +{
> +       LLVMValueRef ptr =
> +               LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_
> buffers);
> +       struct si_shader_selector *sel = ctx->shader->selector;
> +
> +       /* Do the bounds checking with a descriptor, because
> +        * doing computation and manual bounds checking of 64-bit
> +        * addresses generates horrible VALU code with very high
> +        * VGPR usage and very low SIMD occupancy.
> +        */
> +       ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
> +
> +       LLVMValueRef desc0, desc1;
> +       if (HAVE_32BIT_POINTERS) {
> +               desc0 = ptr;
> +               desc1 = LLVMConstInt(ctx->i32,
> +                                    S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi),
> 0);
> +       } else {
> +               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32,
> "");
> +               desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr,
> ctx->i32_0, "");
> +               desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr,
> ctx->i32_1, "");
> +               /* Mask out all bits except BASE_ADDRESS_HI. */
> +               desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
> +                                    LLVMConstInt(ctx->i32,
> ~C_008F04_BASE_ADDRESS_HI, 0), "");
> +       }
> +
> +       LLVMValueRef desc_elems[] = {
> +               desc0,
> +               desc1,
> +               LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) *
> 16, 0),
> +               LLVMConstInt(ctx->i32,
> +                       S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> +                       S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> +                       S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> +                       S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> +                       S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT)
> |
> +                       S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32),
> 0)
> +       };
> +
> +       return ac_build_gather_values(&ctx->ac, desc_elems, 4);
> +}
> +
>  static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx,
> int i)
>  {
>         LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
> @@ -2400,8 +2443,6 @@ static LLVMValueRef fetch_constant(
>         /* Fast path when user data SGPRs point to constant buffer 0
> directly. */
>         if (sel->info.const_buffers_declared == 1 &&
>             sel->info.shader_buffers_declared == 0) {
> -               LLVMValueRef ptr =
> -                       LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
>                 /* This enables use of s_load_dword and flat_load_dword
> for const buffer 0
>                  * loads, and up to x4 load opcode merging. However, it
> leads to horrible
> @@ -2416,48 +2457,17 @@ static LLVMValueRef fetch_constant(
>                  * s_buffer_load_dword (that we have to prevent) is when
> we use use
>                  * a literal offset where we don't need bounds checking.
>                  */
> -               if (ctx->screen->info.chip_class == SI &&
> -                    HAVE_LLVM < 0x0600 &&
> -                    !reg->Register.Indirect) {
> +               if (ctx->screen->info.chip_class == SI && HAVE_LLVM <
> 0x0600 &&
> +                   !reg->Register.Indirect) {
> +                       LLVMValueRef ptr =
> +                               LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
> +
>                         addr = LLVMBuildLShr(ctx->ac.builder, addr,
> LLVMConstInt(ctx->i32, 2, 0), "");
>                         LLVMValueRef result =
> ac_build_load_invariant(&ctx->ac, ptr, addr);
>                         return bitcast(bld_base, type, result);
>                 }
>
> -               /* Do the bounds checking with a descriptor, because
> -                * doing computation and manual bounds checking of 64-bit
> -                * addresses generates horrible VALU code with very high
> -                * VGPR usage and very low SIMD occupancy.
> -                */
> -               ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr,
> ctx->ac.intptr, "");
> -
> -               LLVMValueRef desc0, desc1;
> -               if (HAVE_32BIT_POINTERS) {
> -                       desc0 = ptr;
> -                       desc1 = LLVMConstInt(ctx->i32,
> -                                            S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi),
> 0);
> -               } else {
> -                       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
> ctx->v2i32, "");
> -                       desc0 = LLVMBuildExtractElement(ctx->ac.builder,
> ptr, ctx->i32_0, "");
> -                       desc1 = LLVMBuildExtractElement(ctx->ac.builder,
> ptr, ctx->i32_1, "");
> -                       /* Mask out all bits except BASE_ADDRESS_HI. */
> -                       desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
> -                                            LLVMConstInt(ctx->i32,
> ~C_008F04_BASE_ADDRESS_HI, 0), "");
> -               }
> -
> -               LLVMValueRef desc_elems[] = {
> -                       desc0,
> -                       desc1,
> -                       LLVMConstInt(ctx->i32,
> (sel->info.const_file_max[0] + 1) * 16, 0),
> -                       LLVMConstInt(ctx->i32,
> -                               S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> -                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> -                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> -                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> -                               S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT)
> |
> -                               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32),
> 0)
> -               };
> -               LLVMValueRef desc = ac_build_gather_values(&ctx->ac,
> desc_elems, 4);
> +               LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
>                 LLVMValueRef result = buffer_load_const(ctx, desc, addr);
>                 return bitcast(bld_base, type, result);
>         }
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180329/a7f7c6be/attachment.html>


More information about the mesa-dev mailing list