<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Fri, Jul 27, 2018 at 4:17 PM Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com">tarceri@itsqueeze.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On 27/07/18 02:00, Jason Ekstrand wrote:<br>
> This pass looks for array variables where at least one level of the<br>
> array is never indirected and splits it into multiple smaller variables.<br>
> ---<br>
>   src/compiler/nir/nir.h            |   1 +<br>
>   src/compiler/nir/nir_split_vars.c | 525 ++++++++++++++++++++++++++++++<br>
>   2 files changed, 526 insertions(+)<br>
> <br>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
> index 4af7166f25b..c6ed5bb5358 100644<br>
> --- a/src/compiler/nir/nir.h<br>
> +++ b/src/compiler/nir/nir.h<br>
> @@ -2607,6 +2607,7 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);<br>
>   <br>
>   int nir_gs_count_vertices(const nir_shader *shader);<br>
>   <br>
> +bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);<br>
>   bool nir_split_var_copies(nir_shader *shader);<br>
>   bool nir_split_per_member_structs(nir_shader *shader);<br>
>   bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);<br>
> diff --git a/src/compiler/nir/nir_split_vars.c b/src/compiler/nir/nir_split_vars.c<br>
> index 1f59ac2f5e7..394ed2be622 100644<br>
> --- a/src/compiler/nir/nir_split_vars.c<br>
> +++ b/src/compiler/nir/nir_split_vars.c<br>
> @@ -269,3 +269,528 @@ nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)<br>
>   <br>
>      return progress;<br>
>   }<br>
> +<br>
> +static int<br>
> +num_arrays_in_type(const struct glsl_type *type)<br>
> +{<br>
> +   int num_arrays = 0;<br>
> +   while (true) {<br>
> +      if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {<br>
> +         num_arrays++;<br>
> +         type = glsl_get_array_element(type);<br>
> +      } else if (glsl_type_is_vector_or_scalar(type)) {<br>
> +         return num_arrays;<br>
> +      } else {<br>
> +         /* Unknown type */<br>
> +         return -1;<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +static bool<br>
> +init_var_list_array_indirects(struct exec_list *vars,<br>
> +                              struct hash_table *var_indirect_map,<br>
> +                              void *mem_ctx)<br>
> +{<br>
> +   bool has_aoa = false;<br>
> +<br>
> +   nir_foreach_variable(var, vars) {<br>
> +      int num_arrays = num_arrays_in_type(var->type);<br>
> +      if (num_arrays > 0) {<br>
> +         BITSET_WORD *indirects = rzalloc_array(mem_ctx, BITSET_WORD,<br>
> +                                                BITSET_WORDS(num_arrays));<br>
> +         _mesa_hash_table_insert(var_indirect_map, var, indirects);<br>
> +         has_aoa = true;<br>
> +      }<br>
> +   }<br>
> +<br>
> +   return has_aoa;<br>
> +}<br>
> +<br>
> +static void<br>
> +mark_indirects_impl(nir_function_impl *impl,<br>
> +                    struct hash_table *var_indirect_map,<br>
> +                    nir_variable_mode modes,<br>
> +                    void *mem_ctx)<br>
> +{<br>
> +   nir_foreach_block(block, impl) {<br>
> +      nir_foreach_instr(instr, block) {<br>
> +         if (instr->type != nir_instr_type_deref)<br>
> +            continue;<br>
> +<br>
> +         nir_deref_instr *deref = nir_instr_as_deref(instr);<br>
> +         if (!(deref->mode & modes))<br>
> +            continue;<br>
> +<br>
> +         if (!glsl_type_is_vector_or_scalar(deref->type))<br>
> +            continue;<br>
> +<br>
> +         nir_variable *base_var = nir_deref_instr_get_variable(deref);<br>
> +         struct hash_entry *entry =<br>
> +            _mesa_hash_table_search(var_indirect_map, base_var);<br>
> +         if (!entry)<br>
> +            continue;<br>
> +<br>
> +         BITSET_WORD *indirects = entry->data;<br>
> +<br>
> +         nir_deref_path path;<br>
> +         nir_deref_path_init(&path, deref, mem_ctx);<br>
> +<br>
> +         for (unsigned i = 1; path.path[i]; i++) {<br>
> +            /* We already know this is an AoA variable */<br>
> +            assert(path.path[i]->deref_type == nir_deref_type_array_wildcard ||<br>
> +                   path.path[i]->deref_type == nir_deref_type_array);<br>
> +<br>
> +            if (path.path[i]->deref_type == nir_deref_type_array &&<br>
> +                nir_src_as_const_value(path.path[i]->arr.index) == NULL)<br>
> +               BITSET_SET(indirects, i - 1);<br>
> +         }<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +struct elem {<br>
> +   struct elem *parent;<br>
> +<br>
> +   const struct glsl_type *type;<br>
> +<br>
> +   nir_variable *var;<br>
> +   struct elem *ind_child;<br>
> +   struct elem *children;<br>
> +};<br>
> +<br>
> +static void<br>
> +create_split_array_vars(struct elem *elem, struct elem *parent,<br>
> +                        BITSET_WORD *indirects, unsigned depth,<br>
> +                        const struct glsl_type *type,<br>
> +                        const char *name,<br>
> +                        struct split_var_state *state)<br>
> +{<br>
> +   *elem = (struct elem) {<br>
> +      .parent = parent,<br>
> +      .type = type,<br>
> +   };<br>
> +<br>
> +   if (glsl_type_is_vector_or_scalar(type)) {<br>
> +      const struct glsl_type *var_type = type;<br>
> +      for (struct elem *e = elem->parent; e; e = e->parent) {<br>
> +         if (e->ind_child)<br>
> +            var_type = glsl_array_type(var_type, glsl_get_length(e->type));<br>
> +      }<br>
> +<br>
> +      /* We add parens to the variable name so it looks like "(foo[2][*])" so<br>
> +       * that further derefs will look like "(foo[2][*])[ssa_6]"<br>
> +       */<br>
> +      name = ralloc_asprintf(state->mem_ctx, "(%s)", name);<br>
> +<br>
> +      nir_variable_mode mode = state->base_var->data.mode;<br>
> +      if (mode == nir_var_local) {<br>
> +         elem->var = nir_local_variable_create(state->impl, var_type, name);<br>
> +      } else {<br>
> +         elem->var = nir_variable_create(state->shader, mode, var_type, name);<br>
> +      }<br>
> +   } else if (BITSET_TEST(indirects, depth)) {<br>
> +      elem->ind_child = ralloc(state->mem_ctx, struct elem);<br>
> +      create_split_array_vars(elem->ind_child, elem,<br>
> +                              indirects, depth + 1,<br>
> +                              glsl_get_array_element(type),<br>
> +                              ralloc_asprintf(state->mem_ctx, "%s[*]", name),<br>
> +                              state);<br>
> +   } else {<br>
> +      unsigned array_len = glsl_get_length(type);<br>
> +      elem->children = ralloc_array(state->mem_ctx, struct elem, array_len);<br>
> +      for (unsigned i = 0; i < array_len; i++) {<br>
> +         create_split_array_vars(&elem->children[i], elem,<br>
> +                                 indirects, depth + 1,<br>
> +                                 glsl_get_array_element(type),<br>
> +                                 ralloc_asprintf(state->mem_ctx,<br>
> +                                                 "%s[%d]", name, i),<br>
> +                                 state);<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +static bool<br>
> +split_var_list_arrays(nir_shader *shader,<br>
> +                      nir_function_impl *impl,<br>
> +                      struct exec_list *vars,<br>
> +                      struct hash_table *var_indirect_map,<br>
> +                      struct hash_table *var_elem_map,<br>
> +                      void *mem_ctx)<br>
> +{<br>
> +   struct split_var_state state = {<br>
> +      .mem_ctx = mem_ctx,<br>
> +      .shader = shader,<br>
> +      .impl = impl,<br>
> +   };<br>
> +<br>
> +   struct exec_list split_vars;<br>
> +   exec_list_make_empty(&split_vars);<br>
> +<br>
> +   /* To avoid list confusion (we'll be adding things as we split variables),<br>
> +    * pull all of the variables we plan to split off of the list<br>
> +    */<br>
> +   nir_foreach_variable_safe(var, vars) {<br>
> +      struct hash_entry *entry =<br>
> +         _mesa_hash_table_search(var_indirect_map, var);<br>
> +      if (!entry)<br>
> +         continue;<br>
> +<br>
> +      BITSET_WORD *indirects = entry->data;<br>
> +<br>
> +      int num_arrays = num_arrays_in_type(var->type);<br>
> +      bool has_non_indirect = false;<br>
> +      for (int i = 0; i < num_arrays; i++) {<br>
> +         if (!BITSET_TEST(indirects, i)) {<br>
> +            has_non_indirect = true;<br>
> +            break;<br>
> +         }<br>
> +      }<br>
> +<br>
> +      if (has_non_indirect) {<br>
> +         /* We have something to split */<br>
> +         exec_node_remove(&var->node);<br>
> +         exec_list_push_tail(&split_vars, &var->node);<br>
> +      } else {<br>
> +         /* It's easier in split_array_copies_impl if var_indirect_map<br>
> +          * contains only the things we plan to split.<br>
> +          */<br>
> +         _mesa_hash_table_remove(var_indirect_map, entry);<br>
> +      }<br>
> +   }<br>
> +<br>
> +   nir_foreach_variable(var, &split_vars) {<br>
> +      struct hash_entry *entry =<br>
> +         _mesa_hash_table_search(var_indirect_map, var);<br>
> +      assert(entry);<br>
> +<br>
> +      BITSET_WORD *indirects = entry->data;<br>
> +<br>
> +      state.base_var = var;<br>
> +<br>
> +      struct elem *root_elem = ralloc(mem_ctx, struct elem);<br>
> +      create_split_array_vars(root_elem, NULL, indirects, 0, var->type,<br>
> +                              ralloc_asprintf(mem_ctx, "(%s)", var->name),<br>
> +                              &state);<br>
> +      _mesa_hash_table_insert(var_elem_map, var, root_elem);<br>
> +   }<br>
> +<br>
> +   return !exec_list_is_empty(&split_vars);<br>
> +}<br>
> +<br>
> +static bool<br>
> +deref_has_split_wildcard(nir_deref_path *path, BITSET_WORD *indirects)<br>
> +{<br>
> +   if (indirects == NULL)<br>
> +      return false;<br>
> +<br>
> +   for (unsigned i = 0; path->path[i]; i++) {<br>
> +      if (path->path[i]->deref_type != nir_deref_type_array_wildcard)<br>
> +         continue;<br>
> +<br>
> +      assert(i > 0);<br>
> +      if (!BITSET_TEST(indirects, i - 1))<br>
> +         return true;<br>
> +   }<br>
> +<br>
> +   return false;<br>
> +}<br>
> +<br>
> +static void<br>
> +emit_split_copies(nir_builder *b,<br>
> +                  nir_deref_instr *dst, nir_deref_path *dst_path,<br>
> +                  unsigned dst_depth, BITSET_WORD *dst_indirects,<br>
> +                  nir_deref_instr *src, nir_deref_path *src_path,<br>
> +                  unsigned src_depth, BITSET_WORD *src_indirects)<br>
> +{<br>
> +   nir_deref_instr *dst_p, *src_p;<br>
> +<br>
> +   while ((dst_p = dst_path->path[dst_depth])) {<br>
> +      if (dst_p->deref_type == nir_deref_type_array_wildcard)<br>
> +         break;<br>
> +<br>
> +      dst = nir_build_deref_follower(b, dst, dst_p);<br>
> +      dst_depth++;<br>
> +   }<br>
> +<br>
> +   while ((src_p = src_path->path[src_depth])) {<br>
> +      if (src_p->deref_type == nir_deref_type_array_wildcard)<br>
> +         break;<br>
> +<br>
> +      src = nir_build_deref_follower(b, src, src_p);<br>
> +      src_depth++;<br>
> +   }<br>
> +<br>
> +   if (src_p == NULL || dst_p == NULL) {<br>
> +      assert(src_p == NULL && dst_p == NULL);<br>
> +      nir_copy_deref(b, dst, src);<br>
> +   } else {<br>
> +      assert(dst_p->deref_type == nir_deref_type_array_wildcard &&<br>
> +             src_p->deref_type == nir_deref_type_array_wildcard);<br>
> +<br>
> +      if ((dst_indirects && !BITSET_TEST(dst_indirects, dst_depth - 1)) ||<br>
> +          (src_indirects && !BITSET_TEST(src_indirects, src_depth - 1))) {<br>
> +         /* There are no indirects at this level on one of the source or the<br>
> +          * destination so we are lowering it.<br>
> +          */<br>
> +         assert(glsl_get_length(dst_path->path[dst_depth - 1]->type) ==<br>
> +                glsl_get_length(src_path->path[src_depth - 1]->type));<br>
> +         unsigned len = glsl_get_length(dst_path->path[dst_depth - 1]->type);<br>
> +         for (unsigned i = 0; i < len; i++) {<br>
> +            nir_ssa_def *idx = nir_imm_int(b, i);<br>
> +            emit_split_copies(b, nir_build_deref_array(b, dst, idx),<br>
> +                              dst_path, dst_depth + 1, dst_indirects,<br>
> +                              nir_build_deref_array(b, src, idx),<br>
> +                              src_path, src_depth + 1, src_indirects);<br>
> +         }<br>
> +      } else {<br>
> +         /* Both sides either aren't being lowered at all or have indirects on<br>
> +          * this level so we just keep going<br>
> +          */<br>
> +         emit_split_copies(b, nir_build_deref_follower(b, dst, dst_p),<br>
> +                           dst_path, dst_depth + 1, dst_indirects,<br>
> +                           nir_build_deref_follower(b, src, src_p),<br>
> +                           src_path, src_depth + 1, src_indirects);<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +static void<br>
> +split_array_copies_impl(nir_function_impl *impl,<br>
> +                        struct hash_table *var_indirect_map,<br>
> +                        nir_variable_mode modes,<br>
> +                        void *mem_ctx)<br>
> +{<br>
> +   nir_builder b;<br>
> +   nir_builder_init(&b, impl);<br>
> +<br>
> +   nir_foreach_block(block, impl) {<br>
> +      nir_foreach_instr_safe(instr, block) {<br>
> +         if (instr->type != nir_instr_type_intrinsic)<br>
> +            continue;<br>
> +<br>
> +         nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);<br>
> +         if (copy->intrinsic != nir_intrinsic_copy_deref)<br>
> +            continue;<br>
> +<br>
> +         nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);<br>
> +         nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);<br>
> +<br>
> +         BITSET_WORD *dst_indirects = NULL;<br>
> +         if (dst_deref->mode & modes) {<br>
> +            nir_variable *dst_var = nir_deref_instr_get_variable(dst_deref);<br>
> +            struct hash_entry *entry =<br>
> +               _mesa_hash_table_search(var_indirect_map, dst_var);<br>
> +            if (entry)<br>
> +               dst_indirects = entry->data;<br>
> +         }<br>
> +<br>
> +         BITSET_WORD *src_indirects = NULL;<br>
> +         if (src_deref->mode & modes) {<br>
> +            nir_variable *src_var = nir_deref_instr_get_variable(src_deref);<br>
> +            struct hash_entry *entry =<br>
> +               _mesa_hash_table_search(var_indirect_map, src_var);<br>
> +            if (entry)<br>
> +               src_indirects = entry->data;<br>
> +         }<br>
> +<br>
> +         if (!src_indirects && !dst_indirects)<br>
> +            continue;<br>
> +<br>
> +         nir_deref_path dst_path, src_path;<br>
> +         nir_deref_path_init(&dst_path, dst_deref, mem_ctx);<br>
> +         nir_deref_path_init(&src_path, src_deref, mem_ctx);<br>
> +<br>
> +         if (!deref_has_split_wildcard(&dst_path, dst_indirects) &&<br>
> +             !deref_has_split_wildcard(&src_path, src_indirects))<br>
> +            continue;<br>
> +<br>
> +         b.cursor = nir_before_instr(&copy->instr);<br>
> +<br>
> +         emit_split_copies(&b, dst_path.path[0], &dst_path, 1, dst_indirects,<br>
> +                           src_path.path[0], &src_path, 1, src_indirects);<br>
> +<br>
> +         nir_instr_remove(&copy->instr);<br>
> +         nir_deref_instr_remove_if_unused(dst_deref);<br>
> +         nir_deref_instr_remove_if_unused(src_deref);<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +static void<br>
> +split_array_derefs_impl(nir_function_impl *impl,<br>
> +                        struct hash_table *var_elem_map,<br>
> +                        nir_variable_mode modes,<br>
> +                        void *mem_ctx)<br>
> +{<br>
> +   nir_builder b;<br>
> +   nir_builder_init(&b, impl);<br>
> +<br>
> +   nir_foreach_block(block, impl) {<br>
> +      nir_foreach_instr_safe(instr, block) {<br>
> +         if (instr->type != nir_instr_type_deref)<br>
> +            continue;<br>
> +<br>
> +         nir_deref_instr *deref = nir_instr_as_deref(instr);<br>
> +         if (!(deref->mode & modes))<br>
> +            continue;<br>
> +<br>
> +         /* Clean up any dead derefs we find lying around.  They may refer to<br>
> +          * variables we're planning to split.<br>
> +          */<br>
> +         if (nir_deref_instr_remove_if_unused(deref))<br>
> +            continue;<br>
> +<br>
> +         if (!glsl_type_is_vector_or_scalar(deref->type))<br>
> +            continue;<br>
> +<br>
> +         nir_variable *base_var = nir_deref_instr_get_variable(deref);<br>
> +         struct hash_entry *entry =<br>
> +            _mesa_hash_table_search(var_elem_map, base_var);<br>
> +         if (!entry)<br>
> +            continue;<br>
> +<br>
> +         struct elem *root_elem = entry->data;<br>
> +<br>
> +         nir_deref_path path;<br>
> +         nir_deref_path_init(&path, deref, mem_ctx);<br>
> +<br>
> +         struct elem *tail_elem = root_elem;<br>
> +         for (unsigned i = 1; path.path[i]; i++) {<br>
> +            nir_deref_instr *p = path.path[i];<br>
> +            assert(path.path[i - 1]->type == tail_elem->type);<br>
> +            assert(p->deref_type == nir_deref_type_array ||<br>
> +                   p->deref_type == nir_deref_type_array_wildcard);<br>
> +<br>
> +            if (tail_elem->children) {<br>
> +               assert(p->deref_type == nir_deref_type_array);<br>
> +               unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0];<br>
> +               if (idx >= glsl_get_length(tail_elem->type))<br>
> +                  idx = 0; /* Overflow; we can give them anything */<br>
> +               tail_elem = &tail_elem->children[idx];<br>
> +            } else {<br>
> +               tail_elem = tail_elem->ind_child;<br>
> +            }<br>
> +         }<br>
> +         nir_variable *split_var = tail_elem->var;<br>
> +<br>
> +         b.cursor = nir_before_instr(&deref->instr);<br>
> +<br>
> +         nir_deref_instr *new_deref = nir_build_deref_var(&b, split_var);<br>
> +<br>
> +         tail_elem = root_elem;<br>
> +         for (unsigned i = 1; path.path[i]; i++) {<br>
> +            nir_deref_instr *p = path.path[i];<br>
> +            assert(path.path[i - 1]->type == tail_elem->type);<br>
> +            assert(p->deref_type == nir_deref_type_array ||<br>
> +                   p->deref_type == nir_deref_type_array_wildcard);<br>
> +<br>
> +            if (tail_elem->children) {<br>
> +               /* This level is split, just advance to the next element */<br>
> +               assert(p->deref_type == nir_deref_type_array);<br>
> +               unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0];<br>
> +               tail_elem = &tail_elem->children[idx];<br>
> +            } else {<br>
> +               /* This level isn't split, build a deref */<br>
> +               new_deref = nir_build_deref_follower(&b, new_deref, p);<br>
> +               tail_elem = tail_elem->ind_child;<br>
> +            }<br>
> +         }<br>
> +<br>
> +         assert(new_deref->type == deref->type);<br>
> +         nir_ssa_def_rewrite_uses(&deref->dest.ssa,<br>
> +                                  nir_src_for_ssa(&new_deref->dest.ssa));<br>
> +         nir_deref_instr_remove_if_unused(deref);<br>
> +      }<br>
> +   }<br>
> +}<br>
> +<br>
> +/** A pass for splitting arrays into multiple variables<br>
> + *<br>
> + * This pass looks at arrays (possibly multiple levels) and tries to split<br>
> + * them into piles of variables, one for each array element.  The heuristic<br>
> + * used is simple: If a given array level is never used with an indirect, that<br>
> + * array level will get split.<br>
> + */<br>
> +bool<br>
> +nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)<br>
> +{<br>
> +   void *mem_ctx = ralloc_context(NULL);<br>
> +   struct hash_table *var_indirect_map =<br>
> +      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,<br>
> +                              _mesa_key_pointer_equal);<br>
> +<br>
> +   assert((modes & (nir_var_global | nir_var_local)) == modes);<br>
> +<br>
> +   bool has_global_aoa = false;<br>
> +   if (modes & nir_var_global) {<br>
> +      has_global_aoa = init_var_list_array_indirects(&shader->globals,<br>
> +                                                     var_indirect_map, mem_ctx);<br>
> +   }<br>
> +<br>
> +   bool has_any_aoa = false;<br>
> +   nir_foreach_function(function, shader) {<br>
> +      if (!function->impl)<br>
> +         continue;<br>
> +<br>
> +      bool has_local_aoa;<br>
<br>
Maybe already caught in your version 2 but this should be initialised to <br>
false.<br></blockquote><div><br></div><div>Yup.  Fixed in v2. <br></div></div></div>