[Mesa-dev] [PATCH v2 06/11] nir: Add an array splitting pass

Jason Ekstrand jason at jlekstrand.net
Sun Jul 29 05:44:37 UTC 2018


This pass looks for array variables where at least one level of the
array is never indirected and splits it into multiple smaller variables.

This pass doesn't really do much now because nir_lower_vars_to_ssa can
already see through arrays of arrays and can detect indirects on just
one level or even see that arr[i][0][5] does not alias arr[i][1][j].
This pass exists to help other passes more easily see through arrays of
arrays.  If a back-end does implement arrays using scratch or indirects
on registers, having more smaller arrays is likely to have better memory
efficiency.

v2 (Jason Ekstrand):
 - Better comments and naming (some from Caio)
 - Rework to use one hash map instead of two
---
 src/compiler/nir/nir.h            |   1 +
 src/compiler/nir/nir_split_vars.c | 583 ++++++++++++++++++++++++++++++
 2 files changed, 584 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4af7166f25b..c6ed5bb5358 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2607,6 +2607,7 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);
 
 int nir_gs_count_vertices(const nir_shader *shader);
 
+bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
 bool nir_split_var_copies(nir_shader *shader);
 bool nir_split_per_member_structs(nir_shader *shader);
 bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
diff --git a/src/compiler/nir/nir_split_vars.c b/src/compiler/nir/nir_split_vars.c
index 1f59ac2f5e7..f55f5c5eb7f 100644
--- a/src/compiler/nir/nir_split_vars.c
+++ b/src/compiler/nir/nir_split_vars.c
@@ -24,6 +24,7 @@
 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_deref.h"
+#include "nir_vla.h"
 
 struct split_var_state {
    void *mem_ctx;
@@ -57,6 +58,23 @@ wrap_type_in_array(const struct glsl_type *type,
    return glsl_array_type(elem_type, glsl_get_length(array_type));
 }
 
+static int
+num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
+{
+   int num_levels = 0;
+   while (true) {
+      if (glsl_type_is_array_or_matrix(type)) {
+         num_levels++;
+         type = glsl_get_array_element(type);
+      } else if (glsl_type_is_vector_or_scalar(type)) {
+         return num_levels;
+      } else {
+         /* Not an array of vectors */
+         return -1;
+      }
+   }
+}
+
 static void
 init_field_for_type(struct field *field, struct field *parent,
                     const struct glsl_type *type,
@@ -269,3 +287,568 @@ nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
 
    return progress;
 }
+
+struct array_level_info {
+   unsigned array_len;
+   bool split;
+};
+
+struct array_split {
+   /* Only set if this is the tail end of the splitting */
+   nir_variable *var;
+
+   unsigned num_splits;
+   struct array_split *splits;
+};
+
+struct array_var_info {
+   nir_variable *base_var;
+
+   const struct glsl_type *split_var_type;
+
+   bool split_var;
+   struct array_split root_split;
+
+   unsigned num_levels;
+   struct array_level_info levels[0];
+};
+
+static bool
+init_var_list_array_infos(struct exec_list *vars,
+                          struct hash_table *var_info_map,
+                          void *mem_ctx)
+{
+   bool has_array = false;
+
+   nir_foreach_variable(var, vars) {
+      int num_levels = num_array_levels_in_array_of_vector_type(var->type);
+      if (num_levels <= 0)
+         continue;
+
+      struct array_var_info *info =
+         rzalloc_size(mem_ctx, sizeof(*info) +
+                               num_levels * sizeof(info->levels[0]));
+
+      info->base_var = var;
+      info->num_levels = num_levels;
+
+      const struct glsl_type *type = var->type;
+      for (int i = 0; i < num_levels; i++) {
+         info->levels[i].array_len = glsl_get_length(type);
+         type = glsl_get_array_element(type);
+
+         /* All levels start out initially as split */
+         info->levels[i].split = true;
+      }
+
+      _mesa_hash_table_insert(var_info_map, var, info);
+      has_array = true;
+   }
+
+   return has_array;
+}
+
+static struct array_var_info *
+get_array_var_info(nir_variable *var,
+                   struct hash_table *var_info_map)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(var_info_map, var);
+   return entry ? entry->data : NULL;
+}
+
+static struct array_var_info *
+get_array_deref_info(nir_deref_instr *deref,
+                     struct hash_table *var_info_map,
+                     nir_variable_mode modes)
+{
+   if (!(deref->mode & modes))
+      return NULL;
+
+   return get_array_var_info(nir_deref_instr_get_variable(deref),
+                             var_info_map);
+}
+
+static void
+mark_array_deref_used(nir_deref_instr *deref,
+                      struct hash_table *var_info_map,
+                      nir_variable_mode modes,
+                      void *mem_ctx)
+{
+   struct array_var_info *info =
+      get_array_deref_info(deref, var_info_map, modes);
+   if (!info)
+      return;
+
+   nir_deref_path path;
+   nir_deref_path_init(&path, deref, mem_ctx);
+
+   /* Walk the path and look for indirects.  If we have an array deref with an
+    * indirect, mark the given level as not being split.
+    */
+   for (unsigned i = 0; i < info->num_levels; i++) {
+      nir_deref_instr *p = path.path[i + 1];
+      if (p->deref_type == nir_deref_type_array &&
+          nir_src_as_const_value(p->arr.index) == NULL)
+         info->levels[i].split = false;
+   }
+}
+
+static void
+mark_array_usage_impl(nir_function_impl *impl,
+                      struct hash_table *var_info_map,
+                      nir_variable_mode modes,
+                      void *mem_ctx)
+{
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_copy_deref:
+            mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
+                                  var_info_map, modes, mem_ctx);
+            /* Fall Through */
+
+         case nir_intrinsic_load_deref:
+         case nir_intrinsic_store_deref:
+            mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
+                                  var_info_map, modes, mem_ctx);
+            break;
+
+         default:
+            break;
+         }
+      }
+   }
+}
+
+static void
+create_split_array_vars(struct array_var_info *var_info,
+                        unsigned level,
+                        struct array_split *split,
+                        const char *name,
+                        nir_shader *shader,
+                        nir_function_impl *impl,
+                        void *mem_ctx)
+{
+   while (level < var_info->num_levels && !var_info->levels[level].split) {
+      name = ralloc_asprintf(mem_ctx, "%s[*]", name);
+      level++;
+   }
+
+   if (level == var_info->num_levels) {
+      /* We add parens to the variable name so it looks like "(foo[2][*])" so
+       * that further derefs will look like "(foo[2][*])[ssa_6]"
+       */
+      name = ralloc_asprintf(mem_ctx, "(%s)", name);
+
+      nir_variable_mode mode = var_info->base_var->data.mode;
+      if (mode == nir_var_local) {
+         split->var = nir_local_variable_create(impl,
+                                                var_info->split_var_type, name);
+      } else {
+         split->var = nir_variable_create(shader, mode,
+                                          var_info->split_var_type, name);
+      }
+   } else {
+      assert(var_info->levels[level].split);
+      split->num_splits = var_info->levels[level].array_len;
+      split->splits = rzalloc_array(mem_ctx, struct array_split,
+                                    split->num_splits);
+      for (unsigned i = 0; i < split->num_splits; i++) {
+         create_split_array_vars(var_info, level + 1, &split->splits[i],
+                                 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
+                                 shader, impl, mem_ctx);
+      }
+   }
+}
+
+static bool
+split_var_list_arrays(nir_shader *shader,
+                      nir_function_impl *impl,
+                      struct exec_list *vars,
+                      struct hash_table *var_info_map,
+                      void *mem_ctx)
+{
+   struct exec_list split_vars;
+   exec_list_make_empty(&split_vars);
+
+   nir_foreach_variable(var, &split_vars) {
+      struct array_var_info *info = get_array_var_info(var, var_info_map);
+      if (!info)
+         continue;
+
+      bool has_split = false;
+      const struct glsl_type *split_type =
+         glsl_without_array_or_matrix(var->type);
+      for (int i = info->num_levels - 1; i >= 0; i--) {
+         if (info->levels[i].split)
+            continue;
+
+         has_split = true;
+
+         /* If the original type was a matrix type, we'd like to keep that so
+          * we don't convert matrices into arrays.
+          */
+         if (i == info->num_levels - 1 &&
+             glsl_type_is_matrix(glsl_without_array(var->type))) {
+            split_type = glsl_matrix_type(glsl_get_base_type(split_type),
+                                          glsl_get_components(split_type),
+                                          info->levels[i].array_len);
+         } else {
+            split_type = glsl_array_type(split_type, info->levels[i].array_len);
+         }
+      }
+
+      if (has_split) {
+         info->split_var_type = split_type;
+         /* To avoid list confusion (we'll be adding things as we split
+          * variables), pull all of the variables we plan to split off of the
+          * main variable list.
+          */
+         exec_node_remove(&var->node);
+         exec_list_push_tail(&split_vars, &var->node);
+      } else {
+         assert(split_type == var->type);
+         /* If we're not modifying this variable, delete the info so we skip
+          * it faster in later passes.
+          */
+         _mesa_hash_table_remove_key(var_info_map, var);
+      }
+   }
+
+   nir_foreach_variable(var, &split_vars) {
+      struct array_var_info *info = get_array_var_info(var, var_info_map);
+      create_split_array_vars(info, 0, &info->root_split, var->name,
+                              shader, impl, mem_ctx);
+   }
+
+   return !exec_list_is_empty(&split_vars);
+}
+
+static bool
+deref_has_split_wildcard(nir_deref_path *path,
+                         struct array_var_info *info)
+{
+   if (info == NULL)
+      return false;
+
+   assert(path->path[0]->var == info->base_var);
+   for (unsigned i = 0; i < info->num_levels; i++) {
+      if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
+          info->levels[i].split)
+         return true;
+   }
+
+   return false;
+}
+
+static bool
+array_path_is_out_of_bounds(nir_deref_path *path,
+                            struct array_var_info *info)
+{
+   if (info == NULL)
+      return false;
+
+   assert(path->path[0]->var == info->base_var);
+   for (unsigned i = 0; i < info->num_levels; i++) {
+      nir_deref_instr *p = path->path[i + 1];
+      if (p->deref_type == nir_deref_type_array_wildcard)
+         continue;
+
+      nir_const_value *const_index = nir_src_as_const_value(p->arr.index);
+      if (const_index && const_index->u32[0] >= info->levels[i].array_len)
+         return true;
+   }
+
+   return false;
+}
+
+static void
+emit_split_copies(nir_builder *b,
+                  struct array_var_info *dst_info, nir_deref_path *dst_path,
+                  unsigned dst_level, nir_deref_instr *dst,
+                  struct array_var_info *src_info, nir_deref_path *src_path,
+                  unsigned src_level, nir_deref_instr *src)
+{
+   nir_deref_instr *dst_p, *src_p;
+
+   while ((dst_p = dst_path->path[dst_level + 1])) {
+      if (dst_p->deref_type == nir_deref_type_array_wildcard)
+         break;
+
+      dst = nir_build_deref_follower(b, dst, dst_p);
+      dst_level++;
+   }
+
+   while ((src_p = src_path->path[src_level + 1])) {
+      if (src_p->deref_type == nir_deref_type_array_wildcard)
+         break;
+
+      src = nir_build_deref_follower(b, src, src_p);
+      src_level++;
+   }
+
+   if (src_p == NULL || dst_p == NULL) {
+      assert(src_p == NULL && dst_p == NULL);
+      nir_copy_deref(b, dst, src);
+   } else {
+      assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
+             src_p->deref_type == nir_deref_type_array_wildcard);
+
+      if ((dst_info && dst_info->levels[dst_level].split) ||
+          (src_info && src_info->levels[src_level].split)) {
+         /* There are no indirects at this level on one of the source or the
+          * destination so we are lowering it.
+          */
+         assert(glsl_get_length(dst_path->path[dst_level]->type) ==
+                glsl_get_length(src_path->path[src_level]->type));
+         unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
+         for (unsigned i = 0; i < len; i++) {
+            nir_ssa_def *idx = nir_imm_int(b, i);
+            emit_split_copies(b, dst_info, dst_path, dst_level + 1,
+                              nir_build_deref_array(b, dst, idx),
+                              src_info, src_path, src_level + 1,
+                              nir_build_deref_array(b, src, idx));
+         }
+      } else {
+         /* Neither side is being split so we just keep going */
+         emit_split_copies(b, dst_info, dst_path, dst_level + 1,
+                           nir_build_deref_array_wildcard(b, dst),
+                           src_info, src_path, src_level + 1,
+                           nir_build_deref_array_wildcard(b, src));
+      }
+   }
+}
+
+static void
+split_array_copies_impl(nir_function_impl *impl,
+                        struct hash_table *var_info_map,
+                        nir_variable_mode modes,
+                        void *mem_ctx)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+         if (copy->intrinsic != nir_intrinsic_copy_deref)
+            continue;
+
+         nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
+         nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
+
+         struct array_var_info *dst_info =
+            get_array_deref_info(dst_deref, var_info_map, modes);
+         struct array_var_info *src_info =
+            get_array_deref_info(src_deref, var_info_map, modes);
+
+         if (!src_info && !dst_info)
+            continue;
+
+         nir_deref_path dst_path, src_path;
+         nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
+         nir_deref_path_init(&src_path, src_deref, mem_ctx);
+
+         if (!deref_has_split_wildcard(&dst_path, dst_info) &&
+             !deref_has_split_wildcard(&src_path, src_info))
+            continue;
+
+         b.cursor = nir_instr_remove(&copy->instr);
+
+         emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
+                               src_info, &src_path, 0, src_path.path[0]);
+      }
+   }
+}
+
+static void
+split_array_access_impl(nir_function_impl *impl,
+                        struct hash_table *var_info_map,
+                        nir_variable_mode modes,
+                        void *mem_ctx)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_deref) {
+            /* Clean up any dead derefs we find lying around.  They may refer
+             * to variables we're planning to split.
+             */
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+            if (deref->mode & modes)
+               nir_deref_instr_remove_if_unused(deref);
+            continue;
+         }
+
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         if (intrin->intrinsic != nir_intrinsic_load_deref &&
+             intrin->intrinsic != nir_intrinsic_store_deref &&
+             intrin->intrinsic != nir_intrinsic_copy_deref)
+            continue;
+
+         const unsigned num_derefs =
+            intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
+
+         for (unsigned d = 0; d < num_derefs; d++) {
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
+
+            struct array_var_info *info =
+               get_array_deref_info(deref, var_info_map, modes);
+            if (!info)
+               continue;
+
+            nir_deref_path path;
+            nir_deref_path_init(&path, deref, mem_ctx);
+
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            if (array_path_is_out_of_bounds(&path, info)) {
+               /* If one of the derefs is out-of-bounds, we just delete the
+                * instruction.  If a destination is out of bounds, then it may
+                * have been in-bounds prior to shrinking so we don't want to
+                * accidentally stomp something.  However, we've already proven
+                * that it will never be read so it's safe to delete.  If a
+                * source is out of bounds then it is loading random garbage.
+                * For loads, we replace their uses with an undef instruction
+                * and for copies we just delete the copy since it was writing
+                * undefined garbage anyway and we may as well leave the random
+                * garbage in the destination alone.
+                */
+               if (intrin->intrinsic == nir_intrinsic_load_deref) {
+                  nir_ssa_def *u =
+                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+                                       intrin->dest.ssa.bit_size);
+                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                           nir_src_for_ssa(u));
+               }
+               nir_instr_remove(&intrin->instr);
+               for (unsigned i = 0; i < num_derefs; i++)
+                  nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
+               break;
+            }
+
+            struct array_split *split = &info->root_split;
+            for (unsigned i = 0; i < info->num_levels; i++) {
+               if (info->levels[i].split) {
+                  nir_deref_instr *p = path.path[i + 1];
+                  unsigned index = nir_src_as_const_value(p->arr.index)->u32[0];
+                  assert(index < info->levels[i].array_len);
+                  split = &split->splits[index];
+               }
+            }
+            assert(!split->splits && split->var);
+
+            nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
+            for (unsigned i = 0; i < info->num_levels; i++) {
+               if (!info->levels[i].split) {
+                  new_deref = nir_build_deref_array(&b, new_deref,
+                     nir_ssa_for_src(&b, path.path[i + 1]->arr.index, 1));
+               }
+            }
+            assert(new_deref->type == deref->type);
+
+            /* Rewrite the deref source to point to the split one */
+            nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
+                                  nir_src_for_ssa(&new_deref->dest.ssa));
+            nir_deref_instr_remove_if_unused(deref);
+         }
+      }
+   }
+}
+
+/** A pass for splitting arrays of vectors into multiple variables
+ *
+ * This pass looks at arrays (possibly multiple levels) of vectors (not
+ * structures or other types) and tries to split them into piles of variables,
+ * one for each array element.  The heuristic used is simple: If a given array
+ * level is never used with an indirect, that array level will get split.
+ *
+ * This pass probably could handles structures easily enough but making a pass
+ * that could see through an array of structures of arrays would be difficult
+ * so it's best to just run nir_split_struct_vars first.
+ */
+bool
+nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
+{
+   void *mem_ctx = ralloc_context(NULL);
+   struct hash_table *var_info_map =
+      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   assert((modes & (nir_var_global | nir_var_local)) == modes);
+
+   bool has_global_array = false;
+   if (modes & nir_var_global) {
+      has_global_array = init_var_list_array_infos(&shader->globals,
+                                                   var_info_map, mem_ctx);
+   }
+
+   bool has_any_array = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      bool has_local_array = false;
+      if (modes & nir_var_local) {
+         has_local_array = init_var_list_array_infos(&function->impl->locals,
+                                                     var_info_map, mem_ctx);
+      }
+
+      if (has_global_array || has_local_array) {
+         has_any_array = true;
+         mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
+      }
+   }
+
+   /* If we failed to find any arrays of arrays, bail early. */
+   if (!has_any_array) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+
+   bool has_global_splits = false;
+   if (modes & nir_var_global) {
+      has_global_splits = split_var_list_arrays(shader, NULL,
+                                                &shader->globals,
+                                                var_info_map, mem_ctx);
+   }
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      bool has_local_splits = false;
+      if (modes & nir_var_local) {
+         has_local_splits = split_var_list_arrays(shader, function->impl,
+                                                  &function->impl->locals,
+                                                  var_info_map, mem_ctx);
+      }
+
+      if (has_global_splits || has_local_splits) {
+         split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
+         split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
+
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   ralloc_free(mem_ctx);
+
+   return progress;
+}
-- 
2.17.1



More information about the mesa-dev mailing list