[Mesa-stable] [PATCH] ac/nir: Add workaround for GFX9 buffer views.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Wed Apr 11 09:26:29 UTC 2018


On Wed, Apr 4, 2018 at 10:19 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On GFX9 whether the buffer size is interpreted as elements or bytes
> depends on whether IDXEN is enabled in the instruction. If the index
> is a constant zero, LLVM optimizes IDXEN to 0.
>
> Now the size in elements is interpreted in bytes which of course
> results in out of bounds accesses.
>
> The correct fix is most likely to disable the LLVM optimization,
> but we need something to work with LLVM <= 6.0.
>
> radeonsi does the max between stride and element count on the CPU
> but that results in the size intrinsics returning the wrong size
> for the buffer. This would cause CTS errors for radv.
>
> v2: Also include the store changes.
>
> Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
> (backported from 4503ff760c794c3bb15b978a47c530037d56498e for 17.3)
> ---
>  src/amd/common/ac_llvm_build.c  | 20 ++++++++++++++++++++
>  src/amd/common/ac_llvm_build.h  |  8 ++++++++
>  src/amd/common/ac_nir_to_llvm.c | 36 ++++++++++++++++++++++++++++++------
>  src/amd/common/ac_shader_abi.h  |  4 ++++
>  4 files changed, 62 insertions(+), 6 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index e5cd23e025..f193f71c3e 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -960,6 +960,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
>                                           AC_FUNC_ATTR_READONLY);
>  }
>
> +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
> +                                                  LLVMValueRef rsrc,
> +                                                  LLVMValueRef vindex,
> +                                                  LLVMValueRef voffset,
> +                                                  bool can_speculate)
> +{
> +       LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
> +       LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
> +       stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
> +
> +       LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
> +                                                     LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
> +                                                     elem_count, stride, "");
> +
> +       LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
> +                                                      LLVMConstInt(ctx->i32, 2, 0), "");
> +
> +       return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
> +}
> +
>  /**
>   * Set range metadata on an instruction.  This can only be used on load and
>   * call instructions.  If you know an instruction can only produce the values
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index aa2a2899ab..d4264f2879 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -188,6 +188,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
>                                          LLVMValueRef voffset,
>                                          bool can_speculate);
>
> +/* load_format that handles the stride & element count better if idxen is
> + * disabled by LLVM. */
> +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
> +                                                  LLVMValueRef rsrc,
> +                                                  LLVMValueRef vindex,
> +                                                  LLVMValueRef voffset,
> +                                                  bool can_speculate);
> +
>  LLVMValueRef
>  ac_get_thread_id(struct ac_llvm_context *ctx);
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index b696cb8a45..8ee80b1a67 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2257,11 +2257,19 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
>                                         struct ac_image_args *args)
>  {
>         if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
> -               return ac_build_buffer_load_format(&ctx->ac,
> -                                                  args->resource,
> -                                                  args->addr,
> -                                                  LLVMConstInt(ctx->ac.i32, 0, false),
> -                                                  true);
> +               if (ctx->abi->gfx9_stride_size_workaround) {
> +                       return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
> +                                                                    args->resource,
> +                                                                    args->addr,
> +                                                                    ctx->ac.i32_0,
> +                                                                    true);
> +               } else {
> +                       return ac_build_buffer_load_format(&ctx->ac,
> +                                                          args->resource,
> +                                                          args->addr,
> +                                                          ctx->ac.i32_0,
> +                                                          true);
> +               }
>         }
>
>         args->opcode = ac_image_sample;
> @@ -3613,8 +3621,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
>                 glc = i1true;
>
>         if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
> +               LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
> +
> +               if (ctx->abi->gfx9_stride_size_workaround) {
> +                       LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
> +                       LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
> +                       stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
> +
> +                       LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
> +                                                                     LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
> +                                                                     elem_count, stride, "");
> +
> +                       rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
> +                                                     LLVMConstInt(ctx->ac.i32, 2, 0), "");
> +               }
> +
>                 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
> -               params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
> +               params[1] = rsrc;
>                 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
>                                                     ctx->ac.i32_0, ""); /* vindex */
>                 params[3] = ctx->ac.i32_0; /* voffset */
> @@ -6645,6 +6668,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>         ctx.abi.load_ssbo = radv_load_ssbo;
>         ctx.abi.load_sampler_desc = radv_get_sampler_desc;
>         ctx.abi.clamp_shadow_reference = false;
> +       ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
>
>         if (shader_count >= 2)
>                 ac_init_exec_full_mask(&ctx.ac);
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 14517d5570..b04dc076de 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -92,6 +92,10 @@ struct ac_shader_abi {
>         /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
>          * uses it due to promoting D16 to D32, but radv needs it off. */
>         bool clamp_shadow_reference;
> +
> +       /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
> +        * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
> +       bool gfx9_stride_size_workaround;
>  };
>
>  #endif /* AC_SHADER_ABI_H */
> --
> 2.16.2
>


More information about the mesa-stable mailing list