Mesa (main): radv,ac/nir: lower global access to _amd global access intrinsics

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 13 17:21:07 UTC 2022


Module: Mesa
Branch: main
Commit: 61ac5acca3b58ea4ebac45d1aa0732ae9c6517d5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=61ac5acca3b58ea4ebac45d1aa0732ae9c6517d5

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Thu Dec  2 14:35:15 2021 +0000

radv,ac/nir: lower global access to _amd global access intrinsics

fossil-db (Sienna Cichlid):
Totals from 400 (0.30% of 134621) affected shaders:
VGPRs: 18696 -> 18688 (-0.04%)
CodeSize: 2031348 -> 1946640 (-4.17%)
Instrs: 374703 -> 360226 (-3.86%)
Latency: 4200727 -> 4108628 (-2.19%); split: -2.20%, +0.01%
InvThroughput: 1059935 -> 1029441 (-2.88%); split: -2.88%, +0.00%
VClause: 5777 -> 5771 (-0.10%)
SClause: 11890 -> 10891 (-8.40%); split: -8.57%, +0.17%
Copies: 34035 -> 33259 (-2.28%); split: -2.98%, +0.70%
Branches: 11108 -> 11100 (-0.07%); split: -0.08%, +0.01%
PreSGPRs: 15999 -> 15942 (-0.36%); split: -0.44%, +0.08%
PreVGPRs: 16994 -> 16970 (-0.14%)

fossil-db (Polaris10):
Totals from 400 (0.29% of 135668) affected shaders:
SGPRs: 23799 -> 22919 (-3.70%); split: -4.30%, +0.61%
VGPRs: 18480 -> 18472 (-0.04%)
CodeSize: 2090316 -> 2041592 (-2.33%)
Instrs: 395461 -> 385747 (-2.46%); split: -2.46%, +0.00%
Latency: 5045768 -> 5020196 (-0.51%); split: -0.53%, +0.02%
InvThroughput: 2694320 -> 2689886 (-0.16%); split: -0.23%, +0.07%
VClause: 5982 -> 5968 (-0.23%)
SClause: 12064 -> 10823 (-10.29%); split: -10.33%, +0.04%
Copies: 48233 -> 48322 (+0.18%); split: -0.47%, +0.65%
PreSGPRs: 16409 -> 16358 (-0.31%); split: -0.39%, +0.08%

fossil-db (Pitcairn):
Totals from 400 (0.29% of 135668) affected shaders:
SGPRs: 22431 -> 22215 (-0.96%); split: -2.60%, +1.64%
VGPRs: 18776 -> 18560 (-1.15%); split: -1.21%, +0.06%
CodeSize: 2104440 -> 2017708 (-4.12%)
MaxWaves: 2363 -> 2367 (+0.17%)
Instrs: 413099 -> 397446 (-3.79%)
Latency: 5507707 -> 5450251 (-1.04%); split: -1.12%, +0.07%
InvThroughput: 2838867 -> 2786903 (-1.83%); split: -1.83%, +0.00%
VClause: 10334 -> 10097 (-2.29%)
SClause: 12346 -> 11005 (-10.86%); split: -10.89%, +0.02%
Copies: 54034 -> 52065 (-3.64%); split: -3.99%, +0.35%
PreSGPRs: 17916 -> 17857 (-0.33%); split: -0.40%, +0.07%
PreVGPRs: 16917 -> 16893 (-0.14%)

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14124>

---

 src/amd/common/ac_nir.h                     |   3 +
 src/amd/common/ac_nir_lower_global_access.c | 187 ++++++++++++++++++++++++++++
 src/amd/common/meson.build                  |   1 +
 src/amd/vulkan/radv_pipeline.c              |   2 +
 4 files changed, 193 insertions(+)

diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index faaf29d58f6..22159506a38 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -136,6 +136,9 @@ ac_nir_cull_triangle(nir_builder *b,
                      nir_ssa_def *initially_accepted,
                      nir_ssa_def *pos[3][4]);
 
+bool
+ac_nir_lower_global_access(nir_shader *shader);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_lower_global_access.c b/src/amd/common/ac_nir_lower_global_access.c
new file mode 100644
index 00000000000..22a36732e0b
--- /dev/null
+++ b/src/amd/common/ac_nir_lower_global_access.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2021 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "ac_nir.h"
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+try_extract_additions(nir_builder *b, nir_ssa_scalar scalar, uint64_t *out_const,
+                      nir_ssa_def **out_offset)
+{
+   if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
+      return NULL;
+
+   nir_alu_instr *alu = nir_instr_as_alu(scalar.def->parent_instr);
+   nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
+   nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
+
+   b->cursor = nir_before_instr(&alu->instr);
+
+   for (unsigned i = 0; i < 2; ++i) {
+      nir_ssa_scalar src = i ? src1 : src0;
+      if (nir_ssa_scalar_is_const(src)) {
+         *out_const += nir_ssa_scalar_as_uint(src);
+      } else if (nir_ssa_scalar_is_alu(src) && nir_ssa_scalar_alu_op(src) == nir_op_u2u64) {
+         nir_ssa_scalar offset_scalar = nir_ssa_scalar_chase_alu_src(src, 0);
+         nir_ssa_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
+         if (*out_offset)
+            *out_offset = nir_iadd(b, *out_offset, offset);
+         else
+            *out_offset = offset;
+      } else {
+         continue;
+      }
+
+      nir_ssa_def *replace_src =
+         try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset);
+      return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i);
+   }
+
+   nir_ssa_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
+   nir_ssa_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
+   if (!replace_src0 && !replace_src1)
+      return NULL;
+
+   b->cursor = nir_before_instr(&alu->instr);
+   replace_src0 = replace_src0 ? replace_src0 : nir_channel(b, src0.def, src0.comp);
+   replace_src1 = replace_src1 ? replace_src1 : nir_channel(b, src1.def, src1.comp);
+   return nir_iadd(b, replace_src0, replace_src1);
+}
+
+static bool
+process_instr(nir_builder *b, nir_instr *instr, void *_)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+   nir_intrinsic_op op;
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_load_global:
+   case nir_intrinsic_load_global_constant:
+      op = nir_intrinsic_load_global_amd;
+      break;
+   case nir_intrinsic_global_atomic_add:
+      op = nir_intrinsic_global_atomic_add_amd;
+      break;
+   case nir_intrinsic_global_atomic_imin:
+      op = nir_intrinsic_global_atomic_imin_amd;
+      break;
+   case nir_intrinsic_global_atomic_umin:
+      op = nir_intrinsic_global_atomic_umin_amd;
+      break;
+   case nir_intrinsic_global_atomic_imax:
+      op = nir_intrinsic_global_atomic_imax_amd;
+      break;
+   case nir_intrinsic_global_atomic_umax:
+      op = nir_intrinsic_global_atomic_umax_amd;
+      break;
+   case nir_intrinsic_global_atomic_and:
+      op = nir_intrinsic_global_atomic_and_amd;
+      break;
+   case nir_intrinsic_global_atomic_or:
+      op = nir_intrinsic_global_atomic_or_amd;
+      break;
+   case nir_intrinsic_global_atomic_xor:
+      op = nir_intrinsic_global_atomic_xor_amd;
+      break;
+   case nir_intrinsic_global_atomic_exchange:
+      op = nir_intrinsic_global_atomic_exchange_amd;
+      break;
+   case nir_intrinsic_global_atomic_fadd:
+      op = nir_intrinsic_global_atomic_fadd_amd;
+      break;
+   case nir_intrinsic_global_atomic_fmin:
+      op = nir_intrinsic_global_atomic_fmin_amd;
+      break;
+   case nir_intrinsic_global_atomic_fmax:
+      op = nir_intrinsic_global_atomic_fmax_amd;
+      break;
+   case nir_intrinsic_global_atomic_comp_swap:
+      op = nir_intrinsic_global_atomic_comp_swap_amd;
+      break;
+   case nir_intrinsic_global_atomic_fcomp_swap:
+      op = nir_intrinsic_global_atomic_fcomp_swap_amd;
+      break;
+   case nir_intrinsic_store_global:
+      op = nir_intrinsic_store_global_amd;
+      break;
+   default:
+      return false;
+   }
+   unsigned addr_src_idx = op == nir_intrinsic_store_global_amd ? 1 : 0;
+
+   nir_src *addr_src = &intrin->src[addr_src_idx];
+
+   uint64_t off_const = 0;
+   nir_ssa_def *offset = NULL;
+   nir_ssa_scalar src = {addr_src->ssa, 0};
+   nir_ssa_def *addr = try_extract_additions(b, src, &off_const, &offset);
+   addr = addr ? addr : addr_src->ssa;
+
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   if (off_const > UINT32_MAX)
+      addr = nir_iadd_imm(b, addr, off_const);
+
+   nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create(b->shader, op);
+
+   new_intrin->num_components = intrin->num_components;
+
+   if (op != nir_intrinsic_store_global_amd)
+      nir_ssa_dest_init(&new_intrin->instr, &new_intrin->dest, intrin->dest.ssa.num_components,
+                        intrin->dest.ssa.bit_size, NULL);
+
+   unsigned num_src = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
+   for (unsigned i = 0; i < num_src; i++)
+      new_intrin->src[i] = nir_src_for_ssa(intrin->src[i].ssa);
+   new_intrin->src[num_src] = nir_src_for_ssa(offset ? offset : nir_imm_zero(b, 1, 32));
+   new_intrin->src[addr_src_idx] = nir_src_for_ssa(addr);
+
+   if (nir_intrinsic_has_access(intrin))
+      nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin));
+   if (nir_intrinsic_has_align_mul(intrin))
+      nir_intrinsic_set_align_mul(new_intrin, nir_intrinsic_align_mul(intrin));
+   if (nir_intrinsic_has_align_offset(intrin))
+      nir_intrinsic_set_align_offset(new_intrin, nir_intrinsic_align_offset(intrin));
+   if (nir_intrinsic_has_write_mask(intrin))
+      nir_intrinsic_set_write_mask(new_intrin, nir_intrinsic_write_mask(intrin));
+   nir_intrinsic_set_base(new_intrin, off_const);
+
+   nir_builder_instr_insert(b, &new_intrin->instr);
+   if (op != nir_intrinsic_store_global_amd)
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, &new_intrin->dest.ssa);
+   nir_instr_remove(&intrin->instr);
+
+   return true;
+}
+
+bool
+ac_nir_lower_global_access(nir_shader *shader)
+{
+   return nir_shader_instructions_pass(shader, process_instr,
+                                       nir_metadata_block_index | nir_metadata_dominance, NULL);
+}
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index ff2788ec75a..2a39b03ec1c 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -92,6 +92,7 @@ amd_common_files = files(
   'ac_nir.h',
   'ac_nir_cull.c',
   'ac_nir_lower_esgs_io_to_mem.c',
+  'ac_nir_lower_global_access.c',
   'ac_nir_lower_tess_io_to_mem.c',
   'ac_nir_lower_ngg.c',
   'amd_family.c',
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 069b69d915c..7a139f73289 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4404,6 +4404,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
          if (lowered_ngg)
             radv_lower_ngg(device, &stages[i], pipeline_key);
 
+         ac_nir_lower_global_access(stages[i].nir);
+
          radv_optimize_nir_algebraic(stages[i].nir, io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE);
 
          if (stages[i].nir->info.bit_sizes_int & (8 | 16)) {



More information about the mesa-commit mailing list