Mesa (master): r600/sfn: eliminate loading unused component loads from shared memory

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Mar 5 18:30:18 UTC 2021


Module: Mesa
Branch: master
Commit: f3aa2f15c20d5fa0767dce99b54f74a7eab07b2e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3aa2f15c20d5fa0767dce99b54f74a7eab07b2e

Author: Gert Wollny <gert.wollny at collabora.com>
Date:   Thu Mar  4 19:39:52 2021 +0100

r600/sfn: eliminate loading unused component loads from shared memory

LDS loads are quite expensive, so try to eliminate as many as possible

Signed-off-by: Gert Wollny <gert.wollny at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9416>

---

 .../drivers/r600/sfn/sfn_nir_lower_tess_io.cpp     | 103 ++++++++++++++++++---
 1 file changed, 90 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
index 14d41f0cb9d..b1b86ffaaa7 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@@ -131,22 +131,99 @@ static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
    }
 }
 
+static nir_ssa_def *load_offset_group_from_mask(nir_builder *b, uint32_t mask)
+{
+   auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
+   return nir_channels(b, full_mask, mask);
+}
+
+struct MaskQuery {
+   uint32_t mask;
+   uint32_t ssa_index;
+   nir_alu_instr *alu;
+   int index;
+   uint32_t full_mask;
+};
+
+static bool update_alu_mask(nir_src *src, void *data)
+{
+   auto mq = reinterpret_cast<MaskQuery *>(data);
+
+   if (mq->ssa_index == src->ssa->index) {
+      mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
+   }
+   ++mq->index;
+
+   return mq->mask != mq->full_mask;
+}
+
+static uint32_t get_dest_usee_mask(nir_intrinsic_instr *op)
+{
+   assert(op->dest.is_ssa);
+
+   MaskQuery mq = {0};
+   mq.full_mask = (1 << nir_dest_num_components(op->dest)) - 1;
+
+   nir_foreach_use(use_src,  &op->dest.ssa) {
+      auto use_instr = use_src->parent_instr;
+      mq.ssa_index = use_src->ssa->index;
+
+      switch (use_instr->type) {
+      case nir_instr_type_alu: {
+         mq.alu = nir_instr_as_alu(use_instr);
+         mq.index = 0;
+         if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
+            return 0xf;
+         break;
+      }
+      case nir_instr_type_intrinsic:  {
+         auto intr = nir_instr_as_intrinsic(use_instr);
+         switch (intr->intrinsic) {
+         case nir_intrinsic_store_output:
+         case nir_intrinsic_store_per_vertex_output:
+            mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
+            break;
+         case nir_intrinsic_store_scratch:
+         case nir_intrinsic_store_local_shared_r600:
+            mq.mask |= nir_intrinsic_write_mask(intr);
+            break;
+         default:
+            return 0xf;
+         }
+         break;
+      }
+      default:
+         return 0xf;
+      }
+
+   }
+   return mq.mask;
+}
+
 static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
 {
-   nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
-   load_tcs_in->num_components = op->num_components;
-   nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
-                     load_tcs_in->num_components, 32, NULL);
-
-   nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
-   if (nir_intrinsic_component(op))
-      addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
-
-   load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
-   nir_builder_instr_insert(b, &load_tcs_in->instr);
-   nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
+   uint32_t mask = get_dest_usee_mask(op);
+   if (mask) {
+      nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
+      if (nir_intrinsic_component(op))
+         addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
+
+      auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
+
+      auto undef = nir_ssa_undef(b, 1, 32);
+      int comps = nir_dest_num_components(op->dest);
+      nir_ssa_def *remix[4] = {undef, undef, undef, undef};
+
+      int chan = 0;
+      for (int i = 0; i < comps; ++i) {
+         if (mask & (1 << i)) {
+            remix[i] = nir_channel(b, new_load, chan++);
+         }
+      }
+      auto new_load_remixed = nir_vec(b, remix, comps);
+      nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(new_load_remixed));
+   }
    nir_instr_remove(&op->instr);
-
 }
 
 static nir_ssa_def *



More information about the mesa-commit mailing list