[Mesa-dev] [PATCH v2] ac, radv: do not emit vec3 for raw load/store on SI

Marek Olšák maraeo at gmail.com
Mon Jun 3 20:49:57 UTC 2019


tbuffer loads and stores should set use_format=true, and the *_xyz variants
are supported. Other than that:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Mon, Jun 3, 2019 at 3:52 PM Samuel Pitoiset <samuel.pitoiset at gmail.com>
wrote:

> It's unsupported, only load/store format with vec3 are supported.
>
> v2: - allow to use load/store format with vec3
>
> Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")"
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/common/ac_llvm_build.c    | 12 ++++++------
>  src/amd/common/ac_llvm_util.h     | 11 +++++++++++
>  src/amd/common/ac_nir_to_llvm.c   |  3 ++-
>  src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
>  4 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> index 613c1eef942..d0e11141b81 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct
> ac_llvm_context *ctx,
>         args[idx++] = voffset ? voffset : ctx->i32_0;
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
> +       unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format)
> && num_channels == 3 ? 4 : num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context
> *ctx,
>  {
>         /* Split 3 channel stores, because only LLVM 9+ support 3-channel
>          * intrinsics. */
> -       if (num_channels == 3 && HAVE_LLVM < 0x900) {
> +       if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class,
> false)) {
>                 LLVMValueRef v[3], v01;
>
>                 for (int i = 0; i < 3; i++) {
> @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct
> ac_llvm_context *ctx,
>         args[idx++] = voffset ? voffset : ctx->i32_0;
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
> +       unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format)
> && num_channels == 3 ? 4 : num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
>                 if (num_channels == 1)
>                         return result[0];
>
> -               if (num_channels == 3 && HAVE_LLVM < 0x900)
> +               if (num_channels == 3 &&
> !ac_has_vec3_support(ctx->chip_class, false))
>                         result[num_channels++] = LLVMGetUndef(ctx->f32);
>                 return ac_build_gather_values(ctx, result, num_channels);
>         }
> @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context
> *ctx,
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
> +       unsigned func = !ac_has_vec3_support(ctx->chip_class, false) &&
> num_channels == 3 ? 4 : num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context
> *ctx,
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
> +       unsigned func = !ac_has_vec3_support(ctx->chip_class, false) &&
> num_channels == 3 ? 4 : num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index ca00540da80..0295e2bfd11 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -146,6 +146,17 @@ bool ac_compile_module_to_binary(struct
> ac_compiler_passes *p, LLVMModuleRef mod
>  void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
>  void ac_enable_global_isel(LLVMTargetMachineRef tm);
>
> +static inline bool
> +ac_has_vec3_support(enum chip_class chip, bool use_format)
> +{
> +       if (chip == GFX6 && !use_format) {
> +               /* GFX6 only supports vec3 with load/store format. */
> +               return false;
> +       }
> +
> +       return HAVE_LLVM >= 0x900;
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index 51f92a6b062..2bc32440bec 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct ac_nir_context
> *ctx,
>
>                 /* Due to an LLVM limitation with LLVM < 9, split 3-element
>                  * writes into a 2-element and a 1-element write. */
> -               if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM <
> 0x900)) {
> +               if (count == 3 &&
> +                   (elem_size_bytes != 4 ||
> !ac_has_vec3_support(ctx->ac.chip_class, false))) {
>                         writemask |= 1 << (start + 2);
>                         count = 2;
>                 }
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index dca4bebcdd1..d9d35d71258 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct radv_shader_context
> *ctx,
>                 /* fall through */
>         case 4: /* as v4i32 */
>                 vdata = ac_build_gather_values(&ctx->ac, out,
> -                                              HAVE_LLVM < 0x900 ?
> +
> !ac_has_vec3_support(ctx->ac.chip_class, false) ?
>
>  util_next_power_of_two(num_comps) :
>                                                num_comps);
>                 break;
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190603/d2ac4e8f/attachment.html>


More information about the mesa-dev mailing list