Mesa (main): ac/llvm: don't use tbuffer_store as a fallback for swizzled stores

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Apr 23 02:09:39 UTC 2022


Module: Mesa
Branch: main
Commit: 6698753cdb6d001669f51e23d42fec65d74e6b58
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6698753cdb6d001669f51e23d42fec65d74e6b58

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Apr 15 02:00:55 2022 -0400

ac/llvm: don't use tbuffer_store as a fallback for swizzled stores

This depends on the offset computation fix from:
   "ac/llvm: remove inst_offset parameter from ac_build_buffer_store_dword"

v2: The instruction type is changed to MUBUF, which requires us to clear
    DATA_FORMAT with ADD_TID_ENABLE.

Reviewed-by: Mihai Preda <mhpreda at gmail.com> (v1)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15966>

---

 src/amd/llvm/ac_llvm_build.c                     | 53 +-----------------------
 src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 13 +++++-
 2 files changed, 14 insertions(+), 52 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 55cefce89e4..306e23da4a0 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -51,11 +51,6 @@ struct ac_llvm_flow {
    LLVMBasicBlockRef loop_entry_block;
 };
 
-static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
-                                   LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
-                                   LLVMValueRef soffset, unsigned num_channels, unsigned dfmt,
-                                   unsigned nfmt, unsigned cache_policy);
-
 /* Initialize module-independent parts of the context.
  *
  * The caller is responsible for initializing ctx::module and ctx::builder.
@@ -1174,24 +1169,8 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
       return;
    }
 
-   /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
-    * (voffset is swizzled, but soffset isn't swizzled).
-    * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
-    */
-   if (!(cache_policy & ac_swizzled)) {
-      ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
-                                   cache_policy, false);
-      return;
-   }
-
-   static const unsigned dfmts[] = {V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
-                                    V_008F0C_BUF_DATA_FORMAT_32_32_32,
-                                    V_008F0C_BUF_DATA_FORMAT_32_32_32_32};
-   unsigned dfmt = dfmts[num_channels - 1];
-   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
-   ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, num_channels, dfmt,
-                          nfmt, cache_policy);
+   ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
+                                cache_policy, false);
 }
 
 static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
@@ -1659,34 +1638,6 @@ LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigne
    return ac_build_gather_values(ctx, loads, 4);
 }
 
-static void ac_build_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
-                                   LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset,
-                                   LLVMValueRef soffset, unsigned num_channels, unsigned dfmt,
-                                   unsigned nfmt, unsigned cache_policy)
-{
-   LLVMValueRef args[7];
-   int idx = 0;
-   args[idx++] = vdata;
-   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
-   if (vindex)
-      args[idx++] = vindex ? vindex : ctx->i32_0;
-   args[idx++] = voffset ? voffset : ctx->i32_0;
-   args[idx++] = soffset ? soffset : ctx->i32_0;
-   args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
-   args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
-   unsigned func =
-      !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
-   const char *indexing_kind = vindex ? "struct" : "raw";
-   char name[256], type_name[8];
-
-   LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
-   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
-   snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", indexing_kind, type_name);
-
-   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
 void ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
                                   LLVMValueRef vdata, LLVMValueRef voffset, LLVMValueRef soffset,
                                   unsigned cache_policy)
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index d5759026866..9362499b97e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -332,6 +332,13 @@ void si_preload_esgs_ring(struct si_shader_context *ctx)
                                                           S_008F0C_ELEMENT_SIZE(1) |
                                                           S_008F0C_INDEX_STRIDE(3) |
                                                           S_008F0C_ADD_TID_ENABLE(1), 0), "");
+
+         /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
+         if (ctx->screen->info.chip_class == GFX8) {
+            desc3 = LLVMBuildAnd(builder, desc3,
+                                 LLVMConstInt(ctx->ac.i32, C_008F0C_DATA_FORMAT, 0), "");
+         }
+
          ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc1, ctx->ac.i32_1, "");
          ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc3,
                                                  LLVMConstInt(ctx->ac.i32, 3, 0), "");
@@ -408,8 +415,12 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
          rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
       } else {
+         /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
+         unsigned data_format = ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ?
+                                   0 : V_008F0C_BUF_DATA_FORMAT_32;
+
          rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                  S_008F0C_DATA_FORMAT(data_format) |
                   S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
       }
 



More information about the mesa-commit mailing list