[Mesa-dev] [PATCH v3] ac, radv: remove the vec3 restriction with LLVM 9+

Marek Olšák maraeo at gmail.com
Tue May 7 23:00:59 UTC 2019


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Thu, May 2, 2019 at 10:12 AM Samuel Pitoiset <samuel.pitoiset at gmail.com>
wrote:

> This changes requires LLVM r356755.
>
> 32706 shaders in 16744 tests
> Totals:
> SGPRS: 1448848 -> 1455984 (0.49 %)
> VGPRS: 1016684 -> 1016220 (-0.05 %)
> Spilled SGPRs: 25871 -> 25815 (-0.22 %)
> Spilled VGPRs: 122 -> 122 (0.00 %)
> Scratch size: 11964 -> 11956 (-0.07 %) dwords per thread
> Code Size: 55324500 -> 55301152 (-0.04 %) bytes
> Max Waves: 235660 -> 235586 (-0.03 %)
>
> Totals from affected shaders:
> SGPRS: 293704 -> 300840 (2.43 %)
> VGPRS: 246716 -> 246252 (-0.19 %)
> Spilled SGPRs: 159 -> 103 (-35.22 %)
> Scratch size: 188 -> 180 (-4.26 %) dwords per thread
> Code Size: 8653664 -> 8630316 (-0.27 %) bytes
> Max Waves: 60811 -> 60737 (-0.12 %)
>
> v3: - rebase on top of master
>     - remove the restriction for SSBO stores as well
> v2: - fix llvm 8
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>
> I plan to run benchmarks with that change.
>
>  src/amd/common/ac_llvm_build.c    | 15 ++++++++-------
>  src/amd/common/ac_llvm_build.h    |  1 +
>  src/amd/common/ac_nir_to_llvm.c   |  9 ++++++---
>  src/amd/vulkan/radv_nir_to_llvm.c |  4 +++-
>  4 files changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> index 22b771db774..e191a64310f 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -84,6 +84,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>         ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
>         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
>         ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
> +       ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
>         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
>
> @@ -1167,7 +1168,7 @@ ac_build_llvm8_buffer_store_common(struct
> ac_llvm_context *ctx,
>         args[idx++] = voffset ? voffset : ctx->i32_0;
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = num_channels == 3 ? 4 : num_channels;
> +       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -1225,9 +1226,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context
> *ctx,
>                             bool writeonly_memory,
>                             bool swizzle_enable_hint)
>  {
> -       /* Split 3 channel stores, becase LLVM doesn't support 3-channel
> +       /* Split 3 channel stores, because only LLVM 9+ support 3-channel
>          * intrinsics. */
> -       if (num_channels == 3) {
> +       if (num_channels == 3 && HAVE_LLVM < 0x900) {
>                 LLVMValueRef v[3], v01;
>
>                 for (int i = 0; i < 3; i++) {
> @@ -1354,7 +1355,7 @@ ac_build_llvm8_buffer_load_common(struct
> ac_llvm_context *ctx,
>         args[idx++] = voffset ? voffset : ctx->i32_0;
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = num_channels == 3 ? 4 : num_channels;
> +       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -1420,7 +1421,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
>                 if (num_channels == 1)
>                         return result[0];
>
> -               if (num_channels == 3)
> +               if (num_channels == 3 && HAVE_LLVM < 0x900)
>                         result[num_channels++] = LLVMGetUndef(ctx->f32);
>                 return ac_build_gather_values(ctx, result, num_channels);
>         }
> @@ -1512,7 +1513,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context
> *ctx,
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = num_channels == 3 ? 4 : num_channels;
> +       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> @@ -1698,7 +1699,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context
> *ctx,
>         args[idx++] = soffset ? soffset : ctx->i32_0;
>         args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
>         args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -       unsigned func = num_channels == 3 ? 4 : num_channels;
> +       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 :
> num_channels;
>         const char *indexing_kind = structurized ? "struct" : "raw";
>         char name[256], type_name[8];
>
> diff --git a/src/amd/common/ac_llvm_build.h
> b/src/amd/common/ac_llvm_build.h
> index 98f856106d6..19db808a9a8 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -71,6 +71,7 @@ struct ac_llvm_context {
>         LLVMTypeRef v3i32;
>         LLVMTypeRef v4i32;
>         LLVMTypeRef v2f32;
> +       LLVMTypeRef v3f32;
>         LLVMTypeRef v4f32;
>         LLVMTypeRef v8i32;
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index c92eaaca31d..d0bfeb3efa9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1575,9 +1575,9 @@ static void visit_store_ssbo(struct ac_nir_context
> *ctx,
>
>                 u_bit_scan_consecutive_range(&writemask, &start, &count);
>
> -               /* Due to an LLVM limitation, split 3-element writes
> -                * into a 2-element and a 1-element write. */
> -               if (count == 3) {
> +               /* Due to an LLVM limitation with LLVM < 9, split 3-element
> +                * writes into a 2-element and a 1-element write. */
> +               if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM <
> 0x900)) {
>                         writemask |= 1 << (start + 2);
>                         count = 2;
>                 }
> @@ -1619,6 +1619,9 @@ static void visit_store_ssbo(struct ac_nir_context
> *ctx,
>                         case 16: /* v4f32 */
>                                 data_type = ctx->ac.v4f32;
>                                 break;
> +                       case 12: /* v3f32 */
> +                               data_type = ctx->ac.v3f32;
> +                               break;
>                         case 8: /* v2f32 */
>                                 data_type = ctx->ac.v2f32;
>                                 break;
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index b4a19aa2e5d..c40ea004831 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2748,7 +2748,9 @@ radv_emit_stream_output(struct radv_shader_context
> *ctx,
>                 /* fall through */
>         case 4: /* as v4i32 */
>                 vdata = ac_build_gather_values(&ctx->ac, out,
> -
> util_next_power_of_two(num_comps));
> +                                              HAVE_LLVM < 0x900 ?
> +
> util_next_power_of_two(num_comps) :
> +                                              num_comps);
>                 break;
>         }
>
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190507/a24092b8/attachment.html>


More information about the mesa-dev mailing list