Mesa (main): intel/compiler: use nir_shader_instructions_pass in brw_nir_opt_peephole_ffma

Tue Oct 5 10:29:39 UTC 2021

Module: Mesa
Branch: main
Commit: 71bec85db018e7b2cbb952469510e3360233049f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=71bec85db018e7b2cbb952469510e3360233049f

Author: Marcin Ślusarz <marcin.slusarz at intel.com>
Date:   Fri Aug  6 14:33:14 2021 +0200

intel/compiler: use nir_shader_instructions_pass in brw_nir_opt_peephole_ffma

No functional changes.

Signed-off-by: Marcin Ślusarz <marcin.slusarz at intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13189>

---

 src/intel/compiler/brw_nir_opt_peephole_ffma.c | 181 ++++++++++---------------
 1 file changed, 75 insertions(+), 106 deletions(-)

diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
index a5400a1fd91..fb5ff5c61f1 100644
--- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c
+++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
@@ -163,138 +163,107 @@ any_alu_src_is_a_constant(nir_alu_src srcs[])
 }
 
 static bool
-brw_nir_opt_peephole_ffma_block(nir_builder *b, nir_block *block)
+brw_nir_opt_peephole_ffma_instr(nir_builder *b,
+                                nir_instr *instr,
+                                UNUSED void *cb_data)
 {
-   bool progress = false;
-
-   nir_foreach_instr_safe(instr, block) {
-      if (instr->type != nir_instr_type_alu)
-         continue;
-
-      nir_alu_instr *add = nir_instr_as_alu(instr);
-      if (add->op != nir_op_fadd)
-         continue;
-
-      assert(add->dest.dest.is_ssa);
-      if (add->exact)
-         continue;
-
-      assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
-
-      /* This, is the case a + a.  We would rather handle this with an
-       * algebraic reduction than fuse it.  Also, we want to only fuse
-       * things where the multiply is used only once and, in this case,
-       * it would be used twice by the same instruction.
-       */
-      if (add->src[0].src.ssa == add->src[1].src.ssa)
-         continue;
+   if (instr->type != nir_instr_type_alu)
+      return false;
 
-      nir_alu_instr *mul;
-      uint8_t add_mul_src, swizzle[4];
-      bool negate, abs;
-      for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
-         for (unsigned i = 0; i < 4; i++)
-            swizzle[i] = i;
+   nir_alu_instr *add = nir_instr_as_alu(instr);
+   if (add->op != nir_op_fadd)
+      return false;
 
-         negate = false;
-         abs = false;
+   assert(add->dest.dest.is_ssa);
+   if (add->exact)
+      return false;
 
-         mul = get_mul_for_src(&add->src[add_mul_src],
-                               add->dest.dest.ssa.num_components,
-                               swizzle, &negate, &abs);
+   assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
 
-         if (mul != NULL)
-            break;
-      }
+   /* This, is the case a + a.  We would rather handle this with an
+    * algebraic reduction than fuse it.  Also, we want to only fuse
+    * things where the multiply is used only once and, in this case,
+    * it would be used twice by the same instruction.
+    */
+   if (add->src[0].src.ssa == add->src[1].src.ssa)
+      return false;
 
-      if (mul == NULL)
-         continue;
+   nir_alu_instr *mul;
+   uint8_t add_mul_src, swizzle[4];
+   bool negate, abs;
+   for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+      for (unsigned i = 0; i < 4; i++)
+         swizzle[i] = i;
 
-      unsigned bit_size = add->dest.dest.ssa.bit_size;
+      negate = false;
+      abs = false;
 
-      nir_ssa_def *mul_src[2];
-      mul_src[0] = mul->src[0].src.ssa;
-      mul_src[1] = mul->src[1].src.ssa;
+      mul = get_mul_for_src(&add->src[add_mul_src],
+                            add->dest.dest.ssa.num_components,
+                            swizzle, &negate, &abs);
 
-      /* If any of the operands of the fmul and any of the fadd is a constant,
-       * we bypass because it will be more efficient as the constants will be
-       * propagated as operands, potentially saving two load_const instructions.
-       */
-      if (any_alu_src_is_a_constant(mul->src) &&
-          any_alu_src_is_a_constant(add->src)) {
-         continue;
-      }
+      if (mul != NULL)
+         break;
+   }
 
-      b->cursor = nir_before_instr(&add->instr);
+   if (mul == NULL)
+      return false;
 
-      if (abs) {
-         for (unsigned i = 0; i < 2; i++)
-            mul_src[i] = nir_fabs(b, mul_src[i]);
-      }
+   unsigned bit_size = add->dest.dest.ssa.bit_size;
 
-      if (negate)
-         mul_src[0] = nir_fneg(b, mul_src[0]);
+   nir_ssa_def *mul_src[2];
+   mul_src[0] = mul->src[0].src.ssa;
+   mul_src[1] = mul->src[1].src.ssa;
 
-      nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
-      ffma->dest.saturate = add->dest.saturate;
-      ffma->dest.write_mask = add->dest.write_mask;
+   /* If any of the operands of the fmul and any of the fadd is a constant,
+    * we bypass because it will be more efficient as the constants will be
+    * propagated as operands, potentially saving two load_const instructions.
+    */
+   if (any_alu_src_is_a_constant(mul->src) &&
+       any_alu_src_is_a_constant(add->src)) {
+      return false;
+   }
 
-      for (unsigned i = 0; i < 2; i++) {
-         ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
-         for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
-            ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
-      }
-      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]);
+   b->cursor = nir_before_instr(&add->instr);
 
-      assert(add->dest.dest.is_ssa);
+   if (abs) {
+      for (unsigned i = 0; i < 2; i++)
+         mul_src[i] = nir_fabs(b, mul_src[i]);
+   }
 
-      nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
-                        add->dest.dest.ssa.num_components,
-                        bit_size, NULL);
-      nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
-                               &ffma->dest.dest.ssa);
+   if (negate)
+      mul_src[0] = nir_fneg(b, mul_src[0]);
 
-      nir_builder_instr_insert(b, &ffma->instr);
-      assert(list_is_empty(&add->dest.dest.ssa.uses));
-      nir_instr_remove(&add->instr);
+   nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
+   ffma->dest.saturate = add->dest.saturate;
+   ffma->dest.write_mask = add->dest.write_mask;
 
-      progress = true;
+   for (unsigned i = 0; i < 2; i++) {
+      ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+      for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+         ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
    }
+   nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]);
 
-   return progress;
-}
-
-static bool
-brw_nir_opt_peephole_ffma_impl(nir_function_impl *impl)
-{
-   bool progress = false;
+   assert(add->dest.dest.is_ssa);
 
-   nir_builder builder;
-   nir_builder_init(&builder, impl);
+   nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+                     add->dest.dest.ssa.num_components,
+                     bit_size, NULL);
+   nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, &ffma->dest.dest.ssa);
 
-   nir_foreach_block(block, impl) {
-      progress |= brw_nir_opt_peephole_ffma_block(&builder, block);
-   }
-
-   if (progress) {
-      nir_metadata_preserve(impl, nir_metadata_block_index |
-                                  nir_metadata_dominance);
-   } else {
-      nir_metadata_preserve(impl, nir_metadata_all);
-   }
+   nir_builder_instr_insert(b, &ffma->instr);
+   assert(list_is_empty(&add->dest.dest.ssa.uses));
+   nir_instr_remove(&add->instr);
 
-   return progress;
+   return true;
 }
 
 bool
 brw_nir_opt_peephole_ffma(nir_shader *shader)
 {
-   bool progress = false;
-
-   nir_foreach_function(function, shader) {
-      if (function->impl)
-         progress |= brw_nir_opt_peephole_ffma_impl(function->impl);
-   }
-
-   return progress;
+   return nir_shader_instructions_pass(shader, brw_nir_opt_peephole_ffma_instr,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance,
+                                       NULL);
 }