<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 6/3/19 9:33 PM, Marek Olšák wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAAxE2A4skc9UpFBfGTCynndCEHqjxNNbkNZznbXjuOLf_57Dew@mail.gmail.com">
      <meta http-equiv="content-type" content="text/html; charset=UTF-8">
      <div dir="ltr">
        <div>SI doesn't support buffer_load_dwordx3 and
          buffer_store_dwordx3, but it supports buffer_load_format_xyz
          and buffer_store_format_xyz.</div>
      </div>
    </blockquote>
    OK, I will update.<br>
    <blockquote type="cite"
cite="mid:CAAxE2A4skc9UpFBfGTCynndCEHqjxNNbkNZznbXjuOLf_57Dew@mail.gmail.com">
      <div dir="ltr">
        <div><br>
        </div>
        <div>Marek<br>
        </div>
      </div>
      <br>
      <div class="gmail_quote">
        <div dir="ltr" class="gmail_attr">On Mon, Jun 3, 2019 at 9:09 AM
          Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com"
            moz-do-not-send="true">samuel.pitoiset@gmail.com</a>>
          wrote:<br>
        </div>
        <blockquote class="gmail_quote" style="margin:0px 0px 0px
          0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">I
          thought LLVM was able to handle that itself but actually it<br>
          does not. That means we shouldn't try to emit vec3 on SI
          because<br>
          it's unsupported.<br>
          <br>
          Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with
          LLVM 9+")"<br>
          Signed-off-by: Samuel Pitoiset <<a
            href="mailto:samuel.pitoiset@gmail.com" target="_blank"
            moz-do-not-send="true">samuel.pitoiset@gmail.com</a>><br>
          ---<br>
           src/amd/common/ac_llvm_build.c    | 12 ++++++------<br>
           src/amd/common/ac_llvm_util.h     |  9 +++++++++<br>
           src/amd/common/ac_nir_to_llvm.c   |  3 ++-<br>
           src/amd/vulkan/radv_nir_to_llvm.c |  2 +-<br>
           4 files changed, 18 insertions(+), 8 deletions(-)<br>
          <br>
          diff --git a/src/amd/common/ac_llvm_build.c
          b/src/amd/common/ac_llvm_build.c<br>
          index 613c1eef942..7f5c8ef873c 100644<br>
          --- a/src/amd/common/ac_llvm_build.c<br>
          +++ b/src/amd/common/ac_llvm_build.c<br>
          @@ -1167,7 +1167,7 @@
          ac_build_llvm8_buffer_store_common(struct ac_llvm_context
          *ctx,<br>
                  args[idx++] = voffset ? voffset : ctx->i32_0;<br>
                  args[idx++] = soffset ? soffset : ctx->i32_0;<br>
                  args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0)
          + (slc ? 2 : 0), 0);<br>
          -       unsigned func = HAVE_LLVM < 0x900 &&
          num_channels == 3 ? 4 : num_channels;<br>
          +       unsigned func =
          !ac_has_vec3_support(ctx->chip_class) &&
          num_channels == 3 ? 4 : num_channels;<br>
                  const char *indexing_kind = structurized ? "struct" :
          "raw";<br>
                  char name[256], type_name[8];<br>
          <br>
          @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct
          ac_llvm_context *ctx,<br>
           {<br>
                  /* Split 3 channel stores, because only LLVM 9+
          support 3-channel<br>
                   * intrinsics. */<br>
          -       if (num_channels == 3 && HAVE_LLVM < 0x900)
          {<br>
          +       if (num_channels == 3 &&
          !ac_has_vec3_support(ctx->chip_class)) {<br>
                          LLVMValueRef v[3], v01;<br>
          <br>
                          for (int i = 0; i < 3; i++) {<br>
          @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct
          ac_llvm_context *ctx,<br>
                  args[idx++] = voffset ? voffset : ctx->i32_0;<br>
                  args[idx++] = soffset ? soffset : ctx->i32_0;<br>
                  args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0)
          + (slc ? 2 : 0), 0);<br>
          -       unsigned func = HAVE_LLVM < 0x900 &&
          num_channels == 3 ? 4 : num_channels;<br>
          +       unsigned func =
          !ac_has_vec3_support(ctx->chip_class) &&
          num_channels == 3 ? 4 : num_channels;<br>
                  const char *indexing_kind = structurized ? "struct" :
          "raw";<br>
                  char name[256], type_name[8];<br>
          <br>
          @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct
          ac_llvm_context *ctx,<br>
                          if (num_channels == 1)<br>
                                  return result[0];<br>
          <br>
          -               if (num_channels == 3 && HAVE_LLVM
          < 0x900)<br>
          +               if (num_channels == 3 &&
          !ac_has_vec3_support(ctx->chip_class))<br>
                                  result[num_channels++] =
          LLVMGetUndef(ctx->f32);<br>
                          return ac_build_gather_values(ctx, result,
          num_channels);<br>
                  }<br>
          @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct
          ac_llvm_context *ctx,<br>
                  args[idx++] = soffset ? soffset : ctx->i32_0;<br>
                  args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt
          << 4), 0);<br>
                  args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0)
          + (slc ? 2 : 0), 0);<br>
          -       unsigned func = HAVE_LLVM < 0x900 &&
          num_channels == 3 ? 4 : num_channels;<br>
          +       unsigned func =
          !ac_has_vec3_support(ctx->chip_class) &&
          num_channels == 3 ? 4 : num_channels;<br>
                  const char *indexing_kind = structurized ? "struct" :
          "raw";<br>
                  char name[256], type_name[8];<br>
          <br>
          @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct
          ac_llvm_context *ctx,<br>
                  args[idx++] = soffset ? soffset : ctx->i32_0;<br>
                  args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt
          << 4), 0);<br>
                  args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0)
          + (slc ? 2 : 0), 0);<br>
          -       unsigned func = HAVE_LLVM < 0x900 &&
          num_channels == 3 ? 4 : num_channels;<br>
          +       unsigned func =
          !ac_has_vec3_support(ctx->chip_class) &&
          num_channels == 3 ? 4 : num_channels;<br>
                  const char *indexing_kind = structurized ? "struct" :
          "raw";<br>
                  char name[256], type_name[8];<br>
          <br>
          diff --git a/src/amd/common/ac_llvm_util.h
          b/src/amd/common/ac_llvm_util.h<br>
          index ca00540da80..a45647a3360 100644<br>
          --- a/src/amd/common/ac_llvm_util.h<br>
          +++ b/src/amd/common/ac_llvm_util.h<br>
          @@ -146,6 +146,15 @@ bool ac_compile_module_to_binary(struct
          ac_compiler_passes *p, LLVMModuleRef mod<br>
           void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef
          passmgr);<br>
           void ac_enable_global_isel(LLVMTargetMachineRef tm);<br>
          <br>
          +static inline bool<br>
          +ac_has_vec3_support(enum chip_class chip)<br>
          +{<br>
          +       if (chip == GFX6)<br>
          +               return false;<br>
          +<br>
          +       return HAVE_LLVM >= 0x900;<br>
          +}<br>
          +<br>
           #ifdef __cplusplus<br>
           }<br>
           #endif<br>
          diff --git a/src/amd/common/ac_nir_to_llvm.c
          b/src/amd/common/ac_nir_to_llvm.c<br>
          index 51f92a6b062..429dac63d63 100644<br>
          --- a/src/amd/common/ac_nir_to_llvm.c<br>
          +++ b/src/amd/common/ac_nir_to_llvm.c<br>
          @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct
          ac_nir_context *ctx,<br>
          <br>
                          /* Due to an LLVM limitation with LLVM < 9,
          split 3-element<br>
                           * writes into a 2-element and a 1-element
          write. */<br>
          -               if (count == 3 && (elem_size_bytes !=
          4 || HAVE_LLVM < 0x900)) {<br>
          +               if (count == 3 &&<br>
          +                   (elem_size_bytes != 4 ||
          !ac_has_vec3_support(ctx->ac.chip_class))) {<br>
                                  writemask |= 1 << (start + 2);<br>
                                  count = 2;<br>
                          }<br>
          diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
          b/src/amd/vulkan/radv_nir_to_llvm.c<br>
          index dca4bebcdd1..ab552ae34ab 100644<br>
          --- a/src/amd/vulkan/radv_nir_to_llvm.c<br>
          +++ b/src/amd/vulkan/radv_nir_to_llvm.c<br>
          @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct
          radv_shader_context *ctx,<br>
                          /* fall through */<br>
                  case 4: /* as v4i32 */<br>
                          vdata =
          ac_build_gather_values(&ctx->ac, out,<br>
          -                                              HAVE_LLVM <
          0x900 ?<br>
          +                                             
          !ac_has_vec3_support(ctx->ac.chip_class) ?<br>
                                                       
           util_next_power_of_two(num_comps) :<br>
                                                         num_comps);<br>
                          break;<br>
          -- <br>
          2.21.0<br>
          <br>
          _______________________________________________<br>
          mesa-dev mailing list<br>
          <a href="mailto:mesa-dev@lists.freedesktop.org"
            target="_blank" moz-do-not-send="true">mesa-dev@lists.freedesktop.org</a><br>
          <a
            href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev"
            rel="noreferrer" target="_blank" moz-do-not-send="true">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a></blockquote>
      </div>
    </blockquote>
  </body>
</html>