Mesa (master): ac: move ac_lower_indirect_derefs() outside of the LLVM dir

Fri Apr 23 10:19:23 UTC 2021

Module: Mesa
Branch: master
Commit: 1c702a82397bb0c84bee1478912c0e5b69f95eb5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c702a82397bb0c84bee1478912c0e5b69f95eb5

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Thu Apr 22 16:14:32 2021 +0200

ac: move ac_lower_indirect_derefs() outside of the LLVM dir

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10403>

---

 src/amd/Makefile.sources                     |  1 +
 src/amd/common/ac_nir.c                      | 67 ++++++++++++++++++++++++++++
 src/amd/common/ac_nir.h                      |  4 ++
 src/amd/common/meson.build                   |  1 +
 src/amd/llvm/ac_nir_to_llvm.c                | 41 -----------------
 src/amd/llvm/ac_nir_to_llvm.h                |  2 -
 src/amd/vulkan/radv_pipeline.c               |  9 ++--
 src/amd/vulkan/radv_shader.c                 |  2 +-
 src/gallium/drivers/radeonsi/si_shader_nir.c |  3 +-
 9 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources
index 881eb17e1f8..fc378929f6f 100644
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -44,6 +44,7 @@ AMD_COMMON_FILES = \
 	common/ac_gpu_info.h \
 	common/ac_msgpack.c \
 	common/ac_msgpack.h \
+	common/ac_nir.c \
 	common/ac_nir.h \
 	common/ac_nir_lower_esgs_io_to_mem.c \
 	common/ac_nir_lower_tess_io_to_mem.c \
diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c
new file mode 100644
index 00000000000..8a8e2020008
--- /dev/null
+++ b/src/amd/common/ac_nir.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "ac_nir.h"
+
+bool
+ac_nir_lower_indirect_derefs(nir_shader *shader,
+                             enum chip_class chip_class)
+{
+   bool progress = false;
+
+   /* Lower large variables to scratch first so that we won't bloat the
+    * shader by generating large if ladders for them. We later lower
+    * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+    */
+   NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
+            glsl_get_natural_size_align_bytes);
+
+   /* LLVM doesn't support VGPR indexing on GFX9. */
+   bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
+
+   /* TODO: Indirect indexing of GS inputs is unimplemented.
+    *
+    * TCS and TES load inputs directly from LDS or offchip memory, so
+    * indirect indexing is trivial.
+    */
+   nir_variable_mode indirect_mask = 0;
+   if (shader->info.stage == MESA_SHADER_GEOMETRY ||
+       (shader->info.stage != MESA_SHADER_TESS_CTRL && shader->info.stage != MESA_SHADER_TESS_EVAL &&
+        !llvm_has_working_vgpr_indexing)) {
+      indirect_mask |= nir_var_shader_in;
+   }
+   if (!llvm_has_working_vgpr_indexing && shader->info.stage != MESA_SHADER_TESS_CTRL)
+      indirect_mask |= nir_var_shader_out;
+
+   /* TODO: We shouldn't need to do this, however LLVM isn't currently
+    * smart enough to handle indirects without causing excess spilling
+    * causing the gpu to hang.
+    *
+    * See the following thread for more details of the problem:
+    * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+    */
+   indirect_mask |= nir_var_function_temp;
+
+   progress |= nir_lower_indirect_derefs(shader, indirect_mask, UINT32_MAX);
+   return progress;
+}
diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index a6847bf8a3c..b3d4c6b56f7 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -82,6 +82,10 @@ ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
                               enum chip_class chip_class,
                               unsigned num_reserved_es_outputs);
 
+bool
+ac_nir_lower_indirect_derefs(nir_shader *shader,
+                             enum chip_class chip_class);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index 885f88ebd4f..fc8d0fe7e49 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -87,6 +87,7 @@ amd_common_files = files(
   'ac_msgpack.c',
   'ac_msgpack.h',
   'ac_rgp_elf_object_pack.c',
+  'ac_nir.c',
   'ac_nir.h',
   'ac_nir_lower_esgs_io_to_mem.c',
   'ac_nir_lower_tess_io_to_mem.c',
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index cd1a612985f..359500cd2bb 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -5127,47 +5127,6 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
       ralloc_free(ctx.verified_interp);
 }
 
-bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
-{
-   bool progress = false;
-
-   /* Lower large variables to scratch first so that we won't bloat the
-    * shader by generating large if ladders for them. We later lower
-    * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
-    */
-   NIR_PASS(progress, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
-            glsl_get_natural_size_align_bytes);
-
-   /* LLVM doesn't support VGPR indexing on GFX9. */
-   bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
-
-   /* TODO: Indirect indexing of GS inputs is unimplemented.
-    *
-    * TCS and TES load inputs directly from LDS or offchip memory, so
-    * indirect indexing is trivial.
-    */
-   nir_variable_mode indirect_mask = 0;
-   if (nir->info.stage == MESA_SHADER_GEOMETRY ||
-       (nir->info.stage != MESA_SHADER_TESS_CTRL && nir->info.stage != MESA_SHADER_TESS_EVAL &&
-        !llvm_has_working_vgpr_indexing)) {
-      indirect_mask |= nir_var_shader_in;
-   }
-   if (!llvm_has_working_vgpr_indexing && nir->info.stage != MESA_SHADER_TESS_CTRL)
-      indirect_mask |= nir_var_shader_out;
-
-   /* TODO: We shouldn't need to do this, however LLVM isn't currently
-    * smart enough to handle indirects without causing excess spilling
-    * causing the gpu to hang.
-    *
-    * See the following thread for more details of the problem:
-    * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
-    */
-   indirect_mask |= nir_var_function_temp;
-
-   progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
-   return progress;
-}
-
 static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
 {
    if (intrin->intrinsic != nir_intrinsic_store_output)
diff --git a/src/amd/llvm/ac_nir_to_llvm.h b/src/amd/llvm/ac_nir_to_llvm.h
index eab16252174..2b7d825f846 100644
--- a/src/amd/llvm/ac_nir_to_llvm.h
+++ b/src/amd/llvm/ac_nir_to_llvm.h
@@ -47,8 +47,6 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
    return (index * 4) + chan;
 }
 
-bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
-
 bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
 
 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 9f4289c598b..ab2149f9ec5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -43,6 +43,7 @@
 #include "ac_exp_param.h"
 #include "ac_llvm_util.h"
 #include "ac_nir_to_llvm.h"
+#include "ac_nir.h"
 #include "ac_shader_util.h"
 #include "aco_interface.h"
 #include "sid.h"
@@ -2339,16 +2340,16 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
 
       if (progress) {
          if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
-            ac_lower_indirect_derefs(ordered_shaders[i],
-                                     pipeline->device->physical_device->rad_info.chip_class);
+            ac_nir_lower_indirect_derefs(ordered_shaders[i],
+                                         pipeline->device->physical_device->rad_info.chip_class);
             /* remove dead writes, which can remove input loads */
             nir_lower_vars_to_ssa(ordered_shaders[i]);
             nir_opt_dce(ordered_shaders[i]);
          }
 
          if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
-            ac_lower_indirect_derefs(ordered_shaders[i - 1],
-                                     pipeline->device->physical_device->rad_info.chip_class);
+            ac_nir_lower_indirect_derefs(ordered_shaders[i - 1],
+                                         pipeline->device->physical_device->rad_info.chip_class);
          }
       }
    }
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 3d0e8acddaf..ab1c6e9d156 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -644,7 +644,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
     * bloat the instruction count of the loop and cause it to be
     * considered too large for unrolling.
     */
-   if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
+   if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
        !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
        nir->info.stage != MESA_SHADER_COMPUTE) {
       /* Optimize the lowered code before the linking optimizations. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index c941fff6ee8..06df038598a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -23,6 +23,7 @@
  */
 
 #include "ac_nir_to_llvm.h"
+#include "ac_nir.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_deref.h"
@@ -871,7 +872,7 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
       NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
    }
 
-   changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
+   changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class);
    if (changed)
       si_nir_opts(sscreen, nir, false);