[Mesa-dev] [PATCH 6/9] nir: Make dead_write_vars pass global

Caio Marcelo de Oliveira Filho caio.oliveira at intel.com
Wed Aug 15 21:56:37 UTC 2018


The pass will remove not only writes that are proven to be dead in
block, but also writes that can only be considered dead by looking at
multiple blocks.  Such global analysis is necessary to remove dead
writes such as the one marked below:

    int total = gl_VertexIndex * 10;
    float r = gl_VertexIndex;

    for (int i = 0; i < total; i++) {
        arr[i] = i;                     /* DEAD WRITE. */
        if ((i % 2) == 0) {
            arr[i] = i * 3.0;
        } else {
            arr[i] = i * 5.0;
        }
        r = arr[i];
    }

    out_color = vec4(r,r,r,r);

Without knowing that both sucessor blocks will overwrite the value,
it was not possible to remove the the "arr[i] = i" write.

The local pass was incremented with some extra data collected to allow
a iterative (backward) data-flow analysis to run.  The analysis is
used to find out, for each block, what derefs are used after the
block.  This information is then used to decide what unused writes in
the end of the block can be removed.  In a local-only analysis we
couldn't remove any, because they could be used later.
---
 src/compiler/nir/nir_opt_dead_write_vars.c | 546 ++++++++++++++++++++-
 1 file changed, 532 insertions(+), 14 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c
index 822bfa5595d..fe72ab784a8 100644
--- a/src/compiler/nir/nir_opt_dead_write_vars.c
+++ b/src/compiler/nir/nir_opt_dead_write_vars.c
@@ -27,6 +27,85 @@
 
 #include "util/u_dynarray.h"
 
+/**
+ * Elimination of dead writes based on derefs.
+ *
+ * Dead writes are stores and copies that write to a deref, which then gets
+ * another write before it was used (read or sourced for a copy).  Those
+ * writes can be removed since they don't affect anything.
+ *
+ * For derefs that refer to a memory area that can be read after the program,
+ * the last write is considered used.  The presence of certain instructions
+ * may also cause writes to be considered used, e.g. memory barrier (in this case
+ * the value must be written as other thread might use it).
+ *
+ * The write mask for store instructions is considered, so it is possible that
+ * a store is removed because of the combination of other stores overwritten
+ * its value.
+ *
+ * The pass works in three steps:
+ *
+ * (1) For each block, perform a local dead write removal.  Removing only
+ *     writes we are sure are dead, so keeping around the last unused writes
+ *     at the end of the block.  These last unused will be revisited in (3).
+ *
+ * (2) Perform a data-flow analysis to identify for each block the "uses
+ *     before write" of successor blocks.  E.g. given the blocks below (block0
+ *     precedes block1, that precedes block2)
+ *
+ *       block0:            block1:            block2:
+ *         write A            load B             load D
+ *         write B            write C            load E
+ *         write C            load C
+ *         write D            write D
+ *         write E
+ *         load A
+ *
+ *    the goal is know that B and E are "used before write" after block0; D
+ *    and E are "used before write" after block1.  Note that usage of E
+ *    doesn't come from an immediate succesor of block0.  The "used before
+ *    written" information will decided later what unused writes are removed.
+ *
+ *    The data-flow analysis is implemented by starting with per-block sets
+ *    and iterate fixing the content in those sets.  The nature of the
+ *    "fixing" ensures that the sets get stable (i.e. no infinite loops).
+ *    Each block gets two sets representing its local information:
+ *
+ *    - GEN: the derefs used before write by the block.  E.g. GEN(block1) = {B}
+ *    - KILL: the derefs overwritten by the block.  E.g. KILL(block1) = {C, D}
+ *
+ *    And two other sets representing its current state:
+ *
+ *    - IN: the derefs used before write for this block and all successors.
+ *      E.g. at the end of the analysis, IN(block1) = {B, E}.  Note how E is
+ *      present but not D.
+ *    - OUT: the derefs used before write for the successors of the block.
+ *
+ *    In this pass the iteration happens "backwards", so a block uses the
+ *    succesor information to improve its own information.  The IN and OUT
+ *    sets are updated as follows:
+ *
+ *        OUT = union IN of all successors
+ *        IN  = GEN union (OUT minus KILL)
+ *
+ * 3) Remove the last unused writes for each block that are not used by
+ *    successor blocks.  After the analysis in (2), this information is
+ *    available in the OUT set of each block.
+ *
+ * One extra factor in play is that not trivial to decide whether two derefs
+ * are referring to the same storage or not.  Currently array indirects are
+ * assumed to alias with each other, i.e. "write A[i]" is considered used by a
+ * later "load A[j]".  This logic is mostly handled as part of
+ * nir_compare_deref_paths() function.  This prevents us from representing the
+ * data-flow sets as bitsets.
+ *
+ * Data-flow analysis code here is inspired by Muchnick's "Advanced Compiler
+ * Design and Implementation" and by the data-flow analysis implementation in
+ * src/intel/compiler/brw_fs_copy_propagation.cpp.
+ */
+
+static const bool debug = false;
+
 struct state {
    void *mem_ctx;
 
@@ -34,6 +113,24 @@ struct state {
     * rebuilding the paths for the same deref. */
    struct hash_table *paths;
    void *path_lin_ctx;
+
+   struct block_data *block_data;
+};
+
+struct block_data {
+   struct deref_map *in;
+   struct deref_map *out;
+   struct deref_map *gen;
+   struct deref_map *kill;
+
+   struct deref_map *kill_before_memory_barrier;
+   struct deref_map *kill_before_emit_vertex;
+
+   /* All the writes not used by the end of the block.  Used after the
+    * data-flow analysis, to identify globally unused writes, that can be
+    * removed.
+    */
+   struct util_dynarray unused_writes;
 };
 
 static nir_deref_path *
@@ -50,6 +147,23 @@ get_path(struct state *state, nir_deref_instr *deref)
    }
 }
 
+static void
+print_instr(const char *prefix, const nir_instr *instr, const char *suffix)
+{
+   if (prefix) fputs(prefix, stdout);
+   nir_print_instr(instr, stdout);
+   if (suffix) fputs(suffix, stdout);
+}
+
+static void
+print_path(const char *prefix, nir_deref_path *path, const char *suffix)
+{
+   int i;
+   for (i = 0; path->path[i + 1]; i++);
+   nir_deref_instr *deref = path->path[i];
+   print_instr(prefix, &deref->instr, suffix);
+}
+
 /* Entry for unused_writes arrays. */
 struct write_entry {
    /* If NULL indicates the entry is free to be reused. */
@@ -58,6 +172,161 @@ struct write_entry {
    nir_deref_path *path;
 };
 
+struct deref_map_entry {
+   nir_deref_path *path;
+   uintptr_t mask;
+};
+
+/* Maps derefs to a (write) mask value.  Used by the various data-flow
+ * analysis sets.
+ *
+ * TODO: Consider a tree-based data structure to represent this instead of
+ * arrays of paths.
+ */
+struct deref_map {
+   struct util_dynarray entries;
+   void *mem_ctx;
+};
+
+static struct deref_map *
+deref_map_create(void *mem_ctx)
+{
+   struct deref_map *map = rzalloc(mem_ctx, struct deref_map);
+   map->mem_ctx = mem_ctx;
+   util_dynarray_init(&map->entries, mem_ctx);
+   return map;
+}
+
+static bool
+deref_map_empty(struct deref_map *map)
+{
+   return map->entries.size == 0;
+}
+
+/* Add a deref (represented by its path) to a deref_map, with a given mask.
+ *
+ * Returns whether the maps was changed or not.
+ */
+static bool
+deref_map_add(struct deref_map *map, nir_deref_path *path, uintptr_t mask)
+{
+   util_dynarray_foreach (&map->entries, struct deref_map_entry, map_entry) {
+      nir_deref_compare_result comp = nir_compare_deref_paths(path, map_entry->path);
+      if (comp & nir_derefs_equal_bit) {
+         /* The deref is already present, no new entry needed. */
+         uintptr_t old_mask = map_entry->mask;
+         map_entry->mask |= mask;
+         return old_mask != map_entry->mask;
+      }
+   }
+
+   struct deref_map_entry map_entry = {
+      .path = path,
+      .mask = mask,
+   };
+   util_dynarray_append(&map->entries, struct deref_map_entry, map_entry);
+
+   return true;
+}
+
+static bool
+deref_map_contains(struct deref_map *map, nir_deref_path *path, uintptr_t mask)
+{
+   util_dynarray_foreach (&map->entries, struct deref_map_entry, map_entry) {
+      nir_deref_compare_result comp = nir_compare_deref_paths(path, map_entry->path);
+      /* All the bits in the mask must be present in the map. */
+      if ((comp & nir_derefs_b_contains_a_bit) && ((mask & map_entry->mask) == mask))
+         return true;
+   }
+   return false;
+}
+
+static bool
+deref_map_may_alias(struct deref_map *map, nir_deref_path *path, uintptr_t mask)
+{
+   util_dynarray_foreach (&map->entries, struct deref_map_entry, map_entry) {
+      nir_deref_compare_result comp = nir_compare_deref_paths(path, map_entry->path);
+      /* A single overlapping bit is enough to characterize an alias. */
+      if ((comp & nir_derefs_may_alias_bit) && (mask & map_entry->mask))
+         return true;
+   }
+   return false;
+}
+
+#define deref_map_foreach(map, elem) \
+   util_dynarray_foreach(&map->entries, struct deref_map_entry, elem)
+
+static bool
+deref_map_add_map(struct deref_map *map, struct deref_map *to_add)
+{
+   bool changed = false;
+   deref_map_foreach(to_add, map_entry)
+      changed |= deref_map_add(map, map_entry->path, map_entry->mask);
+   return changed;
+}
+
+
+static struct deref_map *
+deref_map_clone(struct deref_map *from, void *mem_ctx)
+{
+   struct deref_map *map = deref_map_create(mem_ctx);
+   util_dynarray_clone(&map->entries, mem_ctx, &from->entries);
+   return map;
+}
+
+/* The zero mask to indicate entry should be removed. */
+static void
+deref_map_remove_zero_masks(struct deref_map *map)
+{
+   deref_map_foreach(map, map_entry) {
+      if (map_entry->mask)
+         continue;
+
+      /* Replace the deleted entry with the last element. */
+      struct deref_map_entry *last =
+         util_dynarray_pop_ptr(&map->entries, struct deref_map_entry);
+      if (map_entry != last)
+         *map_entry = *last;
+   }
+}
+
+/* Removes from map all derefs that are contained by any deref in to_sub. */
+static bool
+deref_map_sub_map(struct deref_map *map, struct deref_map *to_sub)
+{
+   bool changed = false;
+   deref_map_foreach(to_sub, sub_entry) {
+      nir_deref_path *path = sub_entry->path;
+      uintptr_t mask = sub_entry->mask;
+
+      deref_map_foreach(map, map_entry) {
+         nir_deref_compare_result comp = nir_compare_deref_paths(path, map_entry->path);
+         if (comp & nir_derefs_a_contains_b_bit) {
+            uintptr_t new_mask = map_entry->mask & ~mask;
+            if (new_mask != map_entry->mask)
+               changed = true;
+            map_entry->mask = new_mask;
+         }
+      }
+   }
+
+   deref_map_remove_zero_masks(map);
+
+   return changed;
+}
+
+/* Removes from map all derefs that the variable matches the given modes. */
+static void
+deref_map_sub_modes(struct deref_map *map, nir_variable_mode modes)
+{
+   deref_map_foreach(map, map_entry) {
+      nir_variable *var = map_entry->path->path[0]->var;
+      if (var->data.mode & modes)
+         map_entry->mask = 0;
+   }
+   deref_map_remove_zero_masks(map);
+}
+
 static void
 clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode modes)
 {
@@ -96,10 +365,15 @@ update_unused_writes_with_dst(struct util_dynarray *unused_writes,
          empty_entry = entry;
          continue;
       }
+
       nir_deref_compare_result comp = nir_compare_deref_paths(dst_path, entry->path);
       if (comp & nir_derefs_a_contains_b_bit) {
          entry->mask &= ~mask;
          if (entry->mask == 0) {
+            if (debug) {
+               print_instr("REMOVE: ", &entry->intrin->instr,
+                           "\n  (value overwritten)\n");
+            }
             nir_instr_remove(&entry->intrin->instr);
             entry->intrin = NULL;
             empty_entry = entry;
@@ -123,13 +397,36 @@ update_unused_writes_with_dst(struct util_dynarray *unused_writes,
    return progress;
 }
 
+static void
+tag_use_before_write(struct deref_map *gen, struct deref_map *kill,
+                     nir_deref_path *path)
+{
+   if (deref_map_contains(kill, path, ~(1 << NIR_MAX_VEC_COMPONENTS)))
+      return;
+   deref_map_add(gen, path, ~(1 << NIR_MAX_VEC_COMPONENTS));
+}
+
+static void
+tag_write_before_use(struct deref_map *gen, struct deref_map *kill,
+                     nir_deref_path *path, uintptr_t mask)
+{
+   deref_map_add(kill, path, mask);
+}
+
+static const nir_variable_mode memory_barrier_modes =
+   ~(nir_var_local | nir_var_global | nir_var_shader_in | nir_var_uniform);
+
+static const nir_variable_mode emit_vertex_modes = nir_var_shader_out;
+
+static const nir_variable_mode end_of_program_modes =
+   ~(nir_var_local | nir_var_shader_in | nir_var_uniform);
+
 static bool
-remove_dead_write_vars_local(struct state *state, nir_block *block)
+remove_dead_write_vars_local(struct state *state, nir_block *block, struct block_data *bd)
 {
    bool progress = false;
 
-   struct util_dynarray unused_writes;
-   util_dynarray_init(&unused_writes, state->mem_ctx);
+   util_dynarray_init(&bd->unused_writes, state->mem_ctx);
 
    nir_foreach_instr_safe(instr, block) {
       if (instr->type != nir_instr_type_intrinsic)
@@ -139,16 +436,21 @@ remove_dead_write_vars_local(struct state *state, nir_block *block)
       switch (intrin->intrinsic) {
       case nir_intrinsic_barrier:
       case nir_intrinsic_memory_barrier: {
-         nir_variable_mode modes = ~(nir_var_local | nir_var_global |
-                                     nir_var_shader_in | nir_var_uniform);
-         clear_unused_for_modes(&unused_writes, modes);
+         clear_unused_for_modes(&bd->unused_writes, memory_barrier_modes);
+         if (!bd->kill_before_memory_barrier) {
+            bd->kill_before_memory_barrier = deref_map_clone(bd->kill, state->mem_ctx);
+            deref_map_sub_modes(bd->kill_before_memory_barrier, memory_barrier_modes);
+         }
          break;
       }
 
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter: {
-         nir_variable_mode modes = nir_var_shader_out;
-         clear_unused_for_modes(&unused_writes, modes);
+         clear_unused_for_modes(&bd->unused_writes, emit_vertex_modes);
+         if (!bd->kill_before_emit_vertex) {
+            bd->kill_before_emit_vertex = deref_map_clone(bd->kill, state->mem_ctx);
+            deref_map_sub_modes(bd->kill_before_emit_vertex, emit_vertex_modes);
+         }
          break;
       }
 
@@ -156,7 +458,8 @@ remove_dead_write_vars_local(struct state *state, nir_block *block)
          nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
          nir_deref_path *src_path = get_path(state, src);
 
-         clear_unused_for_src(&unused_writes, src_path);
+         clear_unused_for_src(&bd->unused_writes, src_path);
+         tag_use_before_write(bd->gen, bd->kill, src_path);
          break;
       }
 
@@ -165,7 +468,8 @@ remove_dead_write_vars_local(struct state *state, nir_block *block)
          nir_deref_path *dst_path = get_path(state, dst);
 
          uintptr_t mask = nir_intrinsic_write_mask(intrin);
-         progress |= update_unused_writes_with_dst(&unused_writes, intrin, dst_path, mask);
+         progress |= update_unused_writes_with_dst(&bd->unused_writes, intrin, dst_path, mask);
+         tag_write_before_use(bd->gen, bd->kill, dst_path, mask);
          break;
       }
 
@@ -178,14 +482,20 @@ remove_dead_write_vars_local(struct state *state, nir_block *block)
 
          /* Self-copy is removed. */
          if (src == dst || (nir_compare_deref_paths(src_path, dst_path) & nir_derefs_equal_bit)) {
+            if (debug) {
+               print_instr("REMOVE: ", instr,
+                           "\n  (self-copy)\n");
+            }
             nir_instr_remove(instr);
             progress = true;
             break;
          }
 
          uintptr_t mask = ~(1 << NIR_MAX_VEC_COMPONENTS);
-         clear_unused_for_src(&unused_writes, src_path);
-         progress |= update_unused_writes_with_dst(&unused_writes, intrin, dst_path, mask);
+         clear_unused_for_src(&bd->unused_writes, src_path);
+         progress |= update_unused_writes_with_dst(&bd->unused_writes, intrin, dst_path, mask);
+         tag_use_before_write(bd->gen, bd->kill, src_path);
+         tag_write_before_use(bd->gen, bd->kill, dst_path, mask);
          break;
       }
 
@@ -201,6 +511,60 @@ remove_dead_write_vars_local(struct state *state, nir_block *block)
    return progress;
 }
 
+enum {
+   DUMP_GEN           = 1 << 0,
+   DUMP_KILL          = 1 << 1,
+   DUMP_IN            = 1 << 2,
+   DUMP_OUT           = 1 << 3,
+   DUMP_UNUSED_WRITES = 1 << 4,
+};
+
+static void
+dump_block(struct state *state, unsigned block_index, unsigned flags)
+{
+   struct block_data *bd = &state->block_data[block_index];
+   printf("--- block%d\n", block_index);
+
+   if (bd->kill_before_memory_barrier)
+      printf("  has memory barrier\n");
+   if (bd->kill_before_emit_vertex)
+      printf("  has emit vertex\n");
+
+   if ((flags & DUMP_GEN) && !deref_map_empty(bd->gen)) {
+      printf("  gen\n");
+      deref_map_foreach(bd->gen, map_entry)
+         print_path("    ", map_entry->path, "\n");
+   }
+
+   if ((flags & DUMP_KILL) && !deref_map_empty(bd->kill)) {
+      printf("  kill\n");
+      deref_map_foreach(bd->kill, map_entry)
+         print_path("    ", map_entry->path, "\n");
+   }
+
+   if ((flags & DUMP_IN) && !deref_map_empty(bd->in)) {
+      printf("  in\n");
+      deref_map_foreach(bd->in, map_entry)
+         print_path("    ", map_entry->path, "\n");
+   }
+
+   if ((flags & DUMP_OUT) && !deref_map_empty(bd->out)) {
+      printf("  out\n");
+      deref_map_foreach(bd->out, map_entry)
+         print_path("    ", map_entry->path, "\n");
+   }
+
+   if ((flags & DUMP_UNUSED_WRITES) && bd->unused_writes.size != 0) {
+      printf("  unused writes\n");
+      util_dynarray_foreach(&bd->unused_writes, struct write_entry, entry) {
+         if (!entry->intrin)
+            continue;
+         print_instr("    ", &entry->intrin->instr, "");
+         print_path(" -- ", entry->path, "\n");
+      }
+   }
+}
+
 static bool
 remove_dead_write_vars_impl(void *mem_ctx, nir_function_impl *func)
 {
@@ -211,10 +575,161 @@ remove_dead_write_vars_impl(void *mem_ctx, nir_function_impl *func)
 
       .path_lin_ctx = linear_alloc_parent(mem_ctx, 0),
       .paths = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal),
+
+      /* Count one extra data element for the end_block. */
+      .block_data = rzalloc_array(mem_ctx, struct block_data, func->num_blocks + 1),
    };
 
-   nir_foreach_block(block, func)
-      progress |= remove_dead_write_vars_local(&state, block);
+   /* Step 1.  Perform local optimization and initialize GEN, KILL and
+    * UNUSED_WRITES sets.
+    */
+   bool all_gens_empty = true;
+   nir_foreach_block(block, func) {
+      struct block_data *bd = &state.block_data[block->index];
+      bd->gen  = deref_map_create(mem_ctx);
+      bd->kill = deref_map_create(mem_ctx);
+      bd->out  = deref_map_create(mem_ctx);
+
+      progress |= remove_dead_write_vars_local(&state, block, bd);
+
+      if (!deref_map_empty(bd->gen))
+         all_gens_empty = false;
+   }
+
+   if (func->num_blocks < 2 || all_gens_empty)
+      return progress;
+
+   /* Identify the derefs that are considered used by other intrinsics (or the
+    * end of the program), then include them as part of the GEN of the blocks
+    * that contains those intrinsics.
+    */
+   struct deref_map *used_by_memory_barrier = deref_map_create(mem_ctx);
+   struct deref_map *used_by_emit_vertex    = deref_map_create(mem_ctx);
+   struct deref_map *used_by_end_of_program = deref_map_create(mem_ctx);
+
+   for (int i = 0; i < func->num_blocks; i++) {
+      struct block_data *bd = &state.block_data[i];
+
+      util_dynarray_foreach(&bd->unused_writes, struct write_entry, entry) {
+         if (!entry->intrin)
+            continue;
+
+         nir_deref_path *dst_path = entry->path;
+         nir_variable *var = dst_path->path[0]->var;
+
+         if (var->data.mode & memory_barrier_modes)
+            deref_map_add(used_by_memory_barrier, dst_path, entry->mask);
+
+         if (var->data.mode & emit_vertex_modes)
+            deref_map_add(used_by_emit_vertex, dst_path, entry->mask);
+
+         if (var->data.mode & end_of_program_modes)
+            deref_map_add(used_by_end_of_program, dst_path, entry->mask);
+      }
+   }
+
+   /* Update GEN to include extra uses based on the presence of other
+    * intrinsics.  Make sure not include uses for derefs we already overwrite
+    * (kill) before the intrinsic in the block.
+    */
+   for (int i = 0; i < func->num_blocks; i++) {
+      struct block_data *bd = &state.block_data[i];
+      if (bd->kill_before_memory_barrier) {
+         struct deref_map *temp = deref_map_clone(used_by_memory_barrier, mem_ctx);
+         deref_map_sub_map(temp, bd->kill_before_memory_barrier);
+         deref_map_add_map(bd->gen, temp);
+      }
+      if (bd->kill_before_emit_vertex) {
+         struct deref_map *temp = deref_map_clone(used_by_emit_vertex, mem_ctx);
+         deref_map_sub_map(temp, bd->kill_before_emit_vertex);
+         deref_map_add_map(bd->gen, temp);
+      }
+
+      /* Initialize the IN set with GEN, since we don't have out information yet. */
+      bd->in = deref_map_clone(bd->gen, mem_ctx);
+   }
+
+   /* Since end_block is used as successor for the last blocks executed, we
+    * can conveniently fill its IN set to propagate the usage.
+    */
+   struct block_data *end_bd = &state.block_data[func->end_block->index];
+   end_bd->in = used_by_end_of_program;
+
+   if (debug) {
+      printf("\n=== Before data-flow analysis\n");
+      for (int i = 0; i < func->num_blocks; i++)
+         dump_block(&state, i, DUMP_IN | DUMP_KILL | DUMP_UNUSED_WRITES);
+      dump_block(&state, func->end_block->index, DUMP_IN);
+   }
+
+   /* Step 2.  Iterate backwards computing the OUT and IN sets.
+    *
+    *     OUT = union IN of all successors
+    *     IN  = GEN union (OUT minus KILL)
+    *
+    * The sets will always grow, so we can reuse the IN instead of repeatedly
+    * calculate it.  This helps identifying whether the set changed or not.
+    */
+   bool flow_progress;
+   int iterations = 0;
+   do {
+      flow_progress = false;
+
+      if (debug)
+         printf("\n=== Blocks with IN/OUT changed in iteration %d\n", ++iterations);
+
+      nir_foreach_block_reverse(block, func) {
+         if (block == func->end_block)
+            continue;
+
+         struct block_data *bd = &state.block_data[block->index];
+
+         bool out_changed = false;
+         for (int i = 0; i < 2; i++) {
+            nir_block *successor = block->successors[i];
+            if (!successor)
+               continue;
+            struct block_data *successor_bd = &state.block_data[successor->index];
+            out_changed |= deref_map_add_map(bd->out, successor_bd->in);
+         }
+
+         if (!out_changed)
+            continue;
+
+         struct deref_map *temp = deref_map_clone(bd->out, mem_ctx);
+         deref_map_sub_map(temp, bd->kill);
+
+         bool in_changed = deref_map_add_map(bd->in, temp);
+         flow_progress |= in_changed;
+
+         if (debug && in_changed)
+            dump_block(&state, block->index, DUMP_IN | DUMP_OUT);
+      }
+   } while (flow_progress);
+
+   if (debug)
+      printf("\n=== After data flow analysis (%d iterations)\n", iterations);
+
+   /* Step 3.  Remove unused writes based on the data-flow analysis. */
+   nir_foreach_block(block, func) {
+      struct block_data *bd = &state.block_data[block->index];
+
+      if (debug)
+         dump_block(&state, block->index, DUMP_OUT | DUMP_UNUSED_WRITES);
+
+      util_dynarray_foreach(&bd->unused_writes, struct write_entry, entry) {
+         if (!entry->intrin)
+            continue;
+         if (!deref_map_may_alias(bd->out, entry->path, entry->mask)) {
+            if (debug) {
+               print_instr("REMOVE: ", &entry->intrin->instr,
+                           "\n  (unused after data flow analysis)\n");
+            }
+            nir_instr_remove(&entry->intrin->instr);
+            progress = true;
+         }
+      }
+   }
 
    return progress;
 }
@@ -225,6 +740,9 @@ nir_opt_dead_write_vars(nir_shader *shader)
    void *mem_ctx = ralloc_context(NULL);
    bool progress = false;
 
+   if (debug)
+      nir_print_shader(shader, stdout);
+
    nir_foreach_function(function, shader) {
       if (!function->impl)
          continue;
-- 
2.18.0



More information about the mesa-dev mailing list