Mesa (master): Revert "radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+"

Tue Dec 1 13:58:24 UTC 2020

Module: Mesa
Branch: master
Commit: 3a858ecd406f27724c17d3fd409a558cb1cc4917
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a858ecd406f27724c17d3fd409a558cb1cc4917

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Tue Dec  1 13:00:00 2020 +0100

Revert "radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+"

It introduces regressions.

This reverts commit 6fb4babfe98e222e648eb2ab210136b7540bf039.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7852>

---

 src/amd/compiler/aco_instruction_selection.cpp | 10 ++--------
 src/amd/vulkan/radv_nir_to_llvm.c              | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 3732cfb8d42..d4261628fd9 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4609,17 +4609,11 @@ void visit_load_interpolated_input(isel_context *ctx, nir_intrinsic_instr *instr
 bool check_vertex_fetch_size(isel_context *ctx, const ac_data_format_info *vtx_info,
                              unsigned offset, unsigned stride, unsigned channels)
 {
+   unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
    if (vtx_info->chan_byte_size != 4 && channels == 3)
       return false;
-
-   /* Always split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
-    * alignment issues that triggers memory violations and eventually a GPU
-    * hang. This can happen if the stride (static or dynamic) is unaligned and
-    * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
-    * offset is 2 for R16G16B16A16_SNORM).
-    */
    return (ctx->options->chip_class >= GFX7 && ctx->options->chip_class <= GFX9) ||
-          (channels == 1);
+          (offset % vertex_byte_size == 0 && stride % vertex_byte_size == 0);
 }
 
 uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_info,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index c443a329adf..1b982e8e6b3 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1184,15 +1184,17 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 		t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
 		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
-		/* Always split typed vertex buffer loads on GFX6 and GFX10+
-		 * to avoid any alignment issues that triggers memory
-		 * violations and eventually a GPU hang. This can happen if
-		 * the stride (static or dynamic) is unaligned and also if the
-		 * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
-		 * offset is 2 for R16G16B16A16_SNORM).
+		/* Perform per-channel vertex fetch operations if unaligned
+		 * access are detected. Only GFX6 and GFX10 are affected.
 		 */
-		if (ctx->ac.chip_class == GFX6 ||
-		    ctx->ac.chip_class >= GFX10) {
+		bool unaligned_vertex_fetches = false;
+		if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) &&
+		    vtx_info->chan_format != data_format &&
+		    ((attrib_offset % vtx_info->element_size) ||
+		     (attrib_stride % vtx_info->element_size)))
+			unaligned_vertex_fetches = true;
+
+		if (unaligned_vertex_fetches) {
 			unsigned chan_format = vtx_info->chan_format;
 			LLVMValueRef values[4];