[Mesa-dev] [PATCH 2/2] nir: Add a global dead write var removal pass

Caio Marcelo de Oliveira Filho caio.oliveira at intel.com
Fri Jul 27 21:51:34 UTC 2018


Replaces the existing dead write var removal that is done as part of
nir_opt_copy_prop_vars pass. The previous pass was per-block, so
it would not remove the dead write in a program like

    int total = gl_VertexIndex * 10;
    float r = gl_VertexIndex;

    for (int i = 0; i < total; i++) {
        arr[i] = i;                     // DEAD.
        if ((i % 2) == 0) {
            arr[i] = i * 3.0;
        } else {
            arr[i] = i * 5.0;
        }
        r = arr[i];
    }

    out_color = vec4(r,r,r,r);

The new pass will identify the first write to arr[i] as dead and
remove it.

The pass works by walking through the control flow nodes, and traverse
the instructions keeping track of the write instructions whose
destination were not overwritten by other instructions (called "live
writes"). Reading from the destinations cause the writes to be marked
as "used". If statements and loops are handled specially to take into
account the different codepaths. The writes that are not "used" are
removed.

The reason to keep this as a separate pass is to unclutter the copy
propagation code -- as later it will be changed to also do more than
the per-block propagation.
---
 src/compiler/Makefile.sources              |   1 +
 src/compiler/nir/meson.build               |   1 +
 src/compiler/nir/nir.h                     |   2 +
 src/compiler/nir/nir_opt_copy_prop_vars.c  |  67 +---
 src/compiler/nir/nir_opt_dead_write_vars.c | 379 +++++++++++++++++++++
 src/intel/compiler/brw_nir.c               |   1 +
 6 files changed, 387 insertions(+), 64 deletions(-)
 create mode 100644 src/compiler/nir/nir_opt_dead_write_vars.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 908508adffb..046a2f99bd6 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -273,6 +273,7 @@ NIR_FILES = \
 	nir/nir_opt_cse.c \
 	nir/nir_opt_dce.c \
 	nir/nir_opt_dead_cf.c \
+	nir/nir_opt_dead_write_vars.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
 	nir/nir_opt_if.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index a1bb19356ce..93fd542ce7e 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -158,6 +158,7 @@ files_libnir = files(
   'nir_opt_cse.c',
   'nir_opt_dce.c',
   'nir_opt_dead_cf.c',
+  'nir_opt_dead_write_vars.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
   'nir_opt_if.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 2ca92e8f34e..04a3b98853b 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2919,6 +2919,8 @@ bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
 
+bool nir_opt_dead_write_vars(nir_shader *shader);
+
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
 bool nir_opt_if(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c
index 9fecaf0eeec..b6a5b9c2bb4 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -38,10 +38,7 @@
  *  1) Copy-propagation on variables that have indirect access.  This includes
  *     propagating from indirect stores into indirect loads.
  *
- *  2) Dead code elimination of store_var and copy_var intrinsics based on
- *     killed destination values.
- *
- *  3) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
+ *  2) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
  *     to do this because it isn't aware of variable writes that may alias the
  *     value and make the former load invalid.
  *
@@ -51,6 +48,8 @@
  * rapidly get out of hand.  Fortunately, for anything that is only ever
  * accessed directly, we get SSA based copy-propagation which is extremely
  * powerful so this isn't that great a loss.
+ *
+ * Removal of dead writes to variables is handled by another pass.
  */
 
 struct value {
@@ -66,7 +65,6 @@ struct copy_entry {
 
    nir_instr *store_instr[4];
 
-   unsigned comps_may_be_read;
    struct value src;
 
    nir_deref_instr *dst;
@@ -114,44 +112,6 @@ copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry *entry)
    list_add(&entry->link, &state->copy_free_list);
 }
 
-static void
-remove_dead_writes(struct copy_prop_var_state *state,
-                   struct copy_entry *entry, unsigned write_mask)
-{
-   /* We're overwriting another entry.  Some of it's components may not
-    * have been read yet and, if that's the case, we may be able to delete
-    * some instructions but we have to be careful.
-    */
-   unsigned dead_comps = write_mask & ~entry->comps_may_be_read;
-
-   for (unsigned mask = dead_comps; mask;) {
-      unsigned i = u_bit_scan(&mask);
-
-      nir_instr *instr = entry->store_instr[i];
-
-      /* We may have already deleted it on a previous iteration */
-      if (!instr)
-         continue;
-
-      /* See if this instr is used anywhere that it's not dead */
-      bool keep = false;
-      for (unsigned j = 0; j < 4; j++) {
-         if (entry->store_instr[j] == instr) {
-            if (dead_comps & (1 << j)) {
-               entry->store_instr[j] = NULL;
-            } else {
-               keep = true;
-            }
-         }
-      }
-
-      if (!keep) {
-         nir_instr_remove(instr);
-         state->progress = true;
-      }
-   }
-}
-
 static struct copy_entry *
 lookup_entry_for_deref(struct copy_prop_var_state *state,
                        nir_deref_instr *deref,
@@ -165,16 +125,6 @@ lookup_entry_for_deref(struct copy_prop_var_state *state,
    return NULL;
 }
 
-static void
-mark_aliased_entries_as_read(struct copy_prop_var_state *state,
-                             nir_deref_instr *deref, unsigned components)
-{
-   list_for_each_entry(struct copy_entry, iter, &state->copies, link) {
-      if (nir_compare_derefs(iter->dst, deref) & nir_derefs_may_alias_bit)
-         iter->comps_may_be_read |= components;
-   }
-}
-
 static struct copy_entry *
 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
                            nir_deref_instr *deref,
@@ -191,11 +141,6 @@ get_entry_and_kill_aliases(struct copy_prop_var_state *state,
       }
 
       nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref);
-      /* This is a store operation.  If we completely overwrite some value, we
-       * want to delete any dead writes that may be present.
-       */
-      if (comp & nir_derefs_b_contains_a_bit)
-         remove_dead_writes(state, iter, write_mask);
 
       if (comp & nir_derefs_equal_bit) {
          assert(entry == NULL);
@@ -231,7 +176,6 @@ store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
                const struct value *value, unsigned write_mask,
                nir_instr *store_instr)
 {
-   entry->comps_may_be_read &= ~write_mask;
    if (value->is_ssa) {
       entry->src.is_ssa = true;
       /* Only overwrite the written components */
@@ -490,9 +434,6 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
       case nir_intrinsic_load_deref: {
          nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
 
-         uint8_t comps_read = nir_ssa_def_components_read(&intrin->dest.ssa);
-         mark_aliased_entries_as_read(state, src, comps_read);
-
          struct copy_entry *src_entry =
             lookup_entry_for_deref(state, src, nir_derefs_a_contains_b_bit);
          struct value value;
@@ -579,8 +520,6 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
             continue;
          }
 
-         mark_aliased_entries_as_read(state, src, 0xf);
-
          struct copy_entry *src_entry =
             lookup_entry_for_deref(state, src, nir_derefs_a_contains_b_bit);
          struct value value;
diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c
new file mode 100644
index 00000000000..bee07ddde7f
--- /dev/null
+++ b/src/compiler/nir/nir_opt_dead_write_vars.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+#include "util/bitscan.h"
+#include "util/set.h"
+
+/**
+ * Elimination of dead copies and stores based on deref destinations.
+ *
+ * First walks the cf_node tree marking the used writes in each block, then
+ * removes those that are not used.  Takes aliasing into account, so an
+ * indirect read into an array will mark a previous direct write as used (and
+ * not remove it).
+ *
+ * The usage information is kept in the instruction pass_flags.  The tree
+ * traversal keeps track of a table of live_writes, that maps write
+ * instructions to their current masks.
+ */
+
+/* Traces the relevant instructions being processed, with changes to live
+ * writes tables and the used state of the writes.
+ */
+static const bool debug = false;
+
+static void
+print_instr(const char *prefix, nir_instr *instr)
+{
+   printf(prefix);
+   nir_print_instr(instr, stdout);
+   putchar('\n');
+}
+
+/* States used in the instr->pass_flags. */
+enum {
+   WRITE_INSTR = (1 << 0),
+   USED_INSTR  = (1 << 1),
+};
+
+static void
+hash_table_extend(struct hash_table *a, struct hash_table *b)
+{
+   struct hash_entry *entry;
+   hash_table_foreach(b, entry)
+      _mesa_hash_table_insert_pre_hashed(a, entry->hash, entry->key, entry->data);
+}
+
+static nir_deref_instr *
+get_dst_deref_from_intrin(nir_intrinsic_instr *intrin)
+{
+   assert(intrin->intrinsic == nir_intrinsic_store_deref
+          || intrin->intrinsic == nir_intrinsic_copy_deref);
+   /* Both store_deref and copy_deref intrinsics have the destination stored
+    * in src[0]. */
+   return nir_src_as_deref(intrin->src[0]);
+}
+
+static void
+mark_entry_as_used(struct hash_table *live_writes, struct hash_entry *entry)
+{
+   assert(entry != NULL && entry->key != NULL);
+   nir_intrinsic_instr *intrin = (nir_intrinsic_instr *) entry->key;
+   intrin->instr.pass_flags |= USED_INSTR;
+   _mesa_hash_table_remove(live_writes, entry);
+   if (debug) {
+      print_instr("      marked as used:    ", &intrin->instr);
+      print_instr("      removed from live: ", &intrin->instr);
+   }
+}
+
+static void
+add_to_live_writes(struct hash_table *live_writes,
+                   nir_intrinsic_instr *intrin_to_add, uintptr_t mask_to_add, nir_deref_instr *dst_to_add)
+{
+   /* Update the list of live writes to remove other writes (or update its
+    * masks when appropriate).  After this there'll be no overlapping writes
+    * in the live_writes table.
+    *
+    * When the same write is processed again when handling loops, we
+    * still process it regularly since it might causing the mask to be
+    * updated.
+    */
+   struct hash_entry *entry;
+   hash_table_foreach(live_writes, entry) {
+      nir_intrinsic_instr *intrin = (nir_intrinsic_instr *) entry->key;
+      nir_deref_instr *dst = get_dst_deref_from_intrin(intrin);
+
+      nir_deref_compare_result comp = nir_compare_derefs(dst_to_add, dst);
+      if (comp & nir_derefs_a_contains_b_bit) {
+         uintptr_t mask = (uintptr_t)entry->data & ~mask_to_add;
+         if (mask == 0) {
+            _mesa_hash_table_remove(live_writes, entry);
+            if (debug)
+               print_instr("      removed from live: ", &intrin->instr);
+         } else {
+            entry->data = (void *)mask;
+            if (debug)
+               print_instr("      updated mask:      ", &intrin->instr);
+         }
+      } else {
+         if (debug)
+            print_instr("      not touching:      ", &intrin->instr);
+      }
+   }
+
+   _mesa_hash_table_insert(live_writes, intrin_to_add, (void*)mask_to_add);
+}
+
+static void
+mark_writes_for_modes(struct hash_table *live_writes, nir_variable_mode modes)
+{
+   struct hash_entry *entry;
+   hash_table_foreach(live_writes, entry) {
+      nir_intrinsic_instr *intrin = (nir_intrinsic_instr *) entry->key;
+      nir_deref_instr *dst = get_dst_deref_from_intrin(intrin);
+      nir_variable *dst_var = nir_deref_instr_get_variable(dst);
+
+      if (dst_var->data.mode & modes)
+         mark_entry_as_used(live_writes, entry);
+   }
+}
+
+static void
+mark_writes_for_src(struct hash_table *live_writes, nir_deref_instr *src)
+{
+   struct hash_entry *entry;
+   hash_table_foreach(live_writes, entry) {
+      nir_intrinsic_instr *intrin = (nir_intrinsic_instr *) entry->key;
+      nir_deref_instr *dst = get_dst_deref_from_intrin(intrin);
+      /* If we are using a deref that may alias with the deref previously
+       * written, consider that write used.
+       */
+      if (nir_compare_derefs(src, dst) & nir_derefs_may_alias_bit)
+         mark_entry_as_used(live_writes, entry);
+   }
+}
+
+static void
+mark_used_writes_in_block(struct hash_table *live_writes, nir_block *block)
+{
+   nir_foreach_instr_safe(instr, block) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_barrier:
+      case nir_intrinsic_memory_barrier:
+         /* If we hit a barrier, we need to trash everything that may possibly
+          * be accessible to another thread.  Locals, globals, and things of
+          * the like are safe, however.
+          */
+         if (debug)
+            print_instr("\n  ", instr);
+         mark_writes_for_modes(live_writes,
+                               ~(nir_var_local | nir_var_global |
+                                 nir_var_shader_in | nir_var_uniform));
+         break;
+
+      case nir_intrinsic_emit_vertex:
+      case nir_intrinsic_emit_vertex_with_counter:
+         if (debug)
+            print_instr("\n  ", instr);
+         mark_writes_for_modes(live_writes, nir_var_shader_out);
+         break;
+
+      case nir_intrinsic_load_deref: {
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
+         if (debug) {
+            print_instr("\n  ", instr);
+            print_instr("    src: ", &src->instr);
+         }
+         mark_writes_for_src(live_writes, src);
+         break;
+      }
+
+      case nir_intrinsic_store_deref: {
+         instr->pass_flags |= WRITE_INSTR;
+
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+         if (debug) {
+            print_instr("\n  ", instr);
+            print_instr("    dst: ", &dst->instr);
+         }
+
+         add_to_live_writes(live_writes, intrin, nir_intrinsic_write_mask(intrin), dst);
+         break;
+      }
+
+      case nir_intrinsic_copy_deref: {
+         instr->pass_flags |= WRITE_INSTR;
+
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+
+         if (debug) {
+            print_instr("\n  ", instr);
+            print_instr("    src: ", &src->instr);
+            print_instr("    dst: ", &dst->instr);
+         }
+
+         if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
+            /* This a self-copy, so don't add it to the live writes, so it
+             * will never be marked as used.
+             */
+            break;
+         }
+
+         mark_writes_for_src(live_writes, src);
+         add_to_live_writes(live_writes, intrin, ~(1 << NIR_MAX_VEC_COMPONENTS), dst);
+         break;
+      }
+
+      default:
+         break;
+      }
+   }
+}
+
+static void
+mark_used_writes_in_node(void *mem_ctx, struct hash_table *live_writes, nir_cf_node *cf_node)
+{
+   switch (cf_node->type) {
+   case nir_cf_node_function: {
+      assert(live_writes == NULL);
+      nir_function_impl *func = nir_cf_node_as_function(cf_node);
+
+      live_writes = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
+
+      foreach_list_typed_safe(nir_cf_node, cf_node, node, &func->body)
+         mark_used_writes_in_node(mem_ctx, live_writes, cf_node);
+
+      /* By the end of the function, consider all the live writes as used. No
+       * other writes made them dead.
+       */
+      struct hash_entry *entry;
+      hash_table_foreach(live_writes, entry) {
+         nir_instr *instr = (nir_instr *) entry->key;
+         instr->pass_flags |= USED_INSTR;
+      }
+      break;
+   }
+
+   case nir_cf_node_block: {
+      nir_block *block = nir_cf_node_as_block(cf_node);
+      mark_used_writes_in_block(live_writes, block);
+      break;
+   }
+
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+
+      /* Each code path gets its own clone of the live writes, that will later
+       * be merged in the end of the if-statement into a single one.  The
+       * merge makes possible to a later write make writes in the then/else
+       * blocks invalid.
+       *
+       * Being used by either path is sufficient to consider the write used,
+       * this is stored in the instruction itself (pass_flags).
+       */
+
+      struct hash_table *then_live_writes = _mesa_hash_table_clone(live_writes, mem_ctx);
+      struct hash_table *else_live_writes = _mesa_hash_table_clone(live_writes, mem_ctx);
+
+      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
+         mark_used_writes_in_node(mem_ctx, then_live_writes, cf_node);
+
+      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
+         mark_used_writes_in_node(mem_ctx, else_live_writes, cf_node);
+
+      _mesa_hash_table_clear(live_writes, NULL);
+      hash_table_extend(live_writes, then_live_writes);
+      hash_table_extend(live_writes, else_live_writes);
+
+      break;
+   }
+
+   case nir_cf_node_loop: {
+      nir_loop *loop = nir_cf_node_as_loop(cf_node);
+
+      /* For tracking used variables in a loop, there are three cases: (a) the
+       * body is not entered; (b) the body is executed once; (c) the body is
+       * executed more than once.
+       *
+       * The case (c) is exemplified below:
+       *
+       *     c = x
+       *     while condition
+       *         use(c)
+       *         c = y
+       *     c = z
+       *     use(c)
+       *
+       * All writes to c must be considered used.  This is achieved by
+       * performing a second pass in the loop body, with the live write table
+       * produced by a first pass on it.
+       */
+
+      struct hash_table *loop_live_writes = _mesa_hash_table_clone(live_writes, mem_ctx);
+      for (int i = 0; i < 2; i++) {
+         foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
+            mark_used_writes_in_node(mem_ctx, loop_live_writes, cf_node);
+      }
+
+      /* Keep around live writes for the case loop is not entered. */
+      hash_table_extend(live_writes, loop_live_writes);
+
+      break;
+   }
+   }
+}
+
+bool
+nir_opt_dead_write_vars(nir_shader *shader)
+{
+   void *mem_ctx = ralloc_context(NULL);
+
+   if (debug)
+      nir_print_shader(shader, stdout);
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block)
+            instr->pass_flags = 0;
+      }
+
+      mark_used_writes_in_node(mem_ctx, NULL, &function->impl->cf_node);
+
+      bool local_progress = false;
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->pass_flags == WRITE_INSTR) {
+               nir_instr_remove(instr);
+               if (debug)
+                  print_instr("removed dead write: ", instr);
+               local_progress = true;
+            }
+         }
+      }
+
+      if (local_progress) {
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   ralloc_free(mem_ctx);
+
+   return progress;
+}
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 5990427b731..cba87a677a6 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -543,6 +543,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
       progress = false;
       OPT(nir_lower_vars_to_ssa);
       OPT(nir_opt_copy_prop_vars);
+      OPT(nir_opt_dead_write_vars);
 
       if (is_scalar) {
          OPT(nir_lower_alu_to_scalar);
-- 
2.18.0



More information about the mesa-dev mailing list