[Mesa-dev] [PATCH 091/133] nir: Add a pass to lower local variable accesses to SSA values

Jason Ekstrand jason at jlekstrand.net
Wed Dec 17 19:50:22 PST 2014


On Wed, Dec 17, 2014 at 7:13 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:
>
> On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
> > This pass analizes all of the load/store operations and, when a variable
> is
> > never aliased (potentially used by an indirect operation), it is lowered
> > directly to an SSA value.  This pass translates to SSA directly and does
> > not require any fixup by the original to-SSA pass.
> > ---
> >  src/glsl/Makefile.sources          |    1 +
> >  src/glsl/nir/nir.h                 |    2 +
> >  src/glsl/nir/nir_lower_variables.c | 1071
> ++++++++++++++++++++++++++++++++++++
> >  3 files changed, 1074 insertions(+)
> >  create mode 100644 src/glsl/nir/nir_lower_variables.c
> >
> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> > index 84245bc..1d3b049 100644
> > --- a/src/glsl/Makefile.sources
> > +++ b/src/glsl/Makefile.sources
> > @@ -24,6 +24,7 @@ NIR_FILES = \
> >         $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \
> >         $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \
> >         $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \
> > +       $(GLSL_SRCDIR)/nir/nir_lower_variables.c \
> >         $(GLSL_SRCDIR)/nir/nir_lower_variables_scalar.c \
> >         $(GLSL_SRCDIR)/nir/nir_lower_vec_to_movs.c \
> >         $(GLSL_SRCDIR)/nir/nir_metadata.c \
> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> > index 86cda07..b3abfb9 100644
> > --- a/src/glsl/nir/nir.h
> > +++ b/src/glsl/nir/nir.h
> > @@ -1358,6 +1358,8 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);
> >
> >  void nir_split_var_copies(nir_shader *shader);
> >
> > +void nir_lower_variables(nir_shader *shader);
> > +
> >  void nir_lower_variables_scalar(nir_shader *shader, bool lower_globals,
> >                                  bool lower_io, bool add_names,
> >                                  bool native_integers);
> > diff --git a/src/glsl/nir/nir_lower_variables.c
> b/src/glsl/nir/nir_lower_variables.c
> > new file mode 100644
> > index 0000000..052b021
> > --- /dev/null
> > +++ b/src/glsl/nir/nir_lower_variables.c
> > @@ -0,0 +1,1071 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + * Authors:
> > + *    Jason Ekstrand (jason at jlekstrand.net)
> > + *
> > + */
> > +
> > +#include "nir.h"
> > +
> > +struct deref_node {
> > +   struct deref_node *parent;
> > +   const struct glsl_type *type;
> > +
> > +   bool lower_to_ssa;
> > +
> > +   struct set *loads;
> > +   struct set *stores;
> > +   struct set *copies;
> > +
> > +   nir_ssa_def **def_stack;
> > +   nir_ssa_def **def_stack_tail;
> > +
> > +   struct deref_node *wildcard;
> > +   struct deref_node *indirect;
> > +   struct deref_node *children[0];
> > +};
>
> I think this should be a typedef'd struct, that's the way everything
> else is in NIR, including all the other pass-specific structures (all
> bikesheds aside).
>

Sure, and I think I certainly will if it gets moved to a header file.  In
general, I haven't been typdef'ing pass-specific stuff.  Maybe you were,
but I haven't.


>
> > +
> > +struct lower_variables_state {
> > +   void *mem_ctx;
> > +   void *dead_ctx;
> > +   nir_function_impl *impl;
> > +
> > +   /* A hash table mapping variables to deref_node data */
> > +   struct hash_table *deref_var_nodes;
> > +   /* A hash table mapping dereference leaves to deref_node data */
> > +   struct hash_table *deref_leaves;
> > +
> > +   /* A hash table mapping phi nodes to deref_state data */
> > +   struct hash_table *phi_table;
> > +};
> > +
> > +/* The following two functions implement a hash and equality check for
> > + * variable dreferences.  When the hash or equality function encounters
> an
> > + * array, all indirects are treated as equal and are never equal to a
> > + * direct dereference or a wildcard.
> > + */
> > +static uint32_t
> > +hash_deref(const void *void_deref)
> > +{
> > +   const nir_deref *deref = void_deref;
> > +
> > +   uint32_t hash;
> > +   if (deref->child) {
> > +      hash = hash_deref(deref->child);
> > +   } else {
> > +      hash = 2166136261ul;
> > +   }
> > +
> > +   switch (deref->deref_type) {
> > +   case nir_deref_type_var:
> > +      hash ^= _mesa_hash_pointer(nir_deref_as_var(deref)->var);
> > +      break;
> > +   case nir_deref_type_array: {
> > +      nir_deref_array *array = nir_deref_as_array(deref);
> > +      hash += 268435183 * array->deref_array_type;
> > +      if (array->deref_array_type == nir_deref_array_type_direct)
> > +         hash ^= array->base_offset; /* Some prime */
> > +      break;
> > +   }
> > +   case nir_deref_type_struct:
> > +      hash ^= nir_deref_as_struct(deref)->index;
> > +      break;
> > +   }
> > +
> > +   return hash * 0x01000193;
> > +}
>
> I seem to recall you saying these magic numbers aren't really
> necessary. If they are, can we document why you chose them and what
> they're doing?
>

They're some primes I grabbed off the internet.  Hashing is magic.  I don't
claim that this hash is good.  Just that it's a hash.


>
> > +
> > +static bool
> > +derefs_equal(const void *void_a, const void *void_b)
> > +{
> > +   const nir_deref *a = void_a;
> > +   const nir_deref *b = void_b;
> > +
> > +   if (a->deref_type != b->deref_type)
> > +      return false;
> > +
> > +   switch (a->deref_type) {
> > +   case nir_deref_type_var:
> > +      if (nir_deref_as_var(a)->var != nir_deref_as_var(b)->var)
> > +         return false;
> > +      break;
> > +   case nir_deref_type_array: {
> > +      nir_deref_array *a_arr = nir_deref_as_array(a);
> > +      nir_deref_array *b_arr = nir_deref_as_array(b);
> > +
> > +      if (a_arr->deref_array_type != b_arr->deref_array_type)
> > +         return false;
> > +
> > +      if (a_arr->deref_array_type == nir_deref_array_type_direct &&
> > +          a_arr->base_offset != b_arr->base_offset)
> > +         return false;
> > +      break;
> > +   }
> > +   case nir_deref_type_struct:
> > +      if (nir_deref_as_struct(a)->index !=
> nir_deref_as_struct(b)->index)
> > +         return false;
> > +      break;
> > +   default:
> > +      unreachable("Invalid dreference type");
> > +   }
> > +
> > +   assert((a->child == NULL) == (b->child == NULL));
> > +   if (a->child)
> > +      return derefs_equal(a->child, b->child);
> > +   else
> > +      return true;
> > +}
> > +
> > +static int
> > +type_get_length(const struct glsl_type *type)
> > +{
> > +   switch (glsl_get_base_type(type)) {
> > +   case GLSL_TYPE_STRUCT:
> > +   case GLSL_TYPE_ARRAY:
> > +      return glsl_get_length(type);
> > +   case GLSL_TYPE_FLOAT:
> > +   case GLSL_TYPE_INT:
> > +   case GLSL_TYPE_UINT:
> > +   case GLSL_TYPE_BOOL:
> > +      if (glsl_type_is_matrix(type))
> > +         return glsl_get_matrix_columns(type);
> > +      else
> > +         return glsl_get_vector_elements(type);
> > +   default:
> > +      unreachable("Invalid deref base type");
> > +   }
> > +}
> > +
> > +static struct deref_node *
> > +deref_node_create(struct deref_node *parent,
> > +                  const struct glsl_type *type, void *mem_ctx)
> > +{
> > +   size_t size = sizeof(struct deref_node) +
> > +                 type_get_length(type) * sizeof(struct deref_node *);
> > +
> > +   struct deref_node *node = rzalloc_size(mem_ctx, size);
> > +   node->type = type;
> > +   node->parent = parent;
> > +
> > +   return node;
> > +}
> > +
> > +static struct deref_node *
> > +get_deref_node(nir_deref_var *deref, bool add_to_leaves,
> > +               struct lower_variables_state *state)
> > +{
> > +   bool is_leaf = true;
> > +   struct deref_node *parent = NULL;
> > +   nir_deref *tail = &deref->deref;
> > +   while (tail) {
> > +      struct deref_node *node;
> > +
> > +      switch (tail->deref_type) {
> > +      case nir_deref_type_var: {
> > +         assert(tail == &deref->deref);
> > +         assert(parent == NULL);
> > +
> > +         uint32_t var_hash = _mesa_hash_pointer(deref->var);
> > +         struct hash_entry *entry =
> > +            _mesa_hash_table_search(state->deref_var_nodes,
> > +                                    var_hash, deref->var);
> > +         if (entry) {
> > +            node = entry->data;
> > +         } else {
> > +            node = deref_node_create(NULL, tail->type, state->dead_ctx);
> > +            _mesa_hash_table_insert(state->deref_var_nodes,
> > +                                    var_hash, deref->var, node);
> > +         }
> > +         break;
> > +      }
> > +
> > +      case nir_deref_type_struct: {
> > +         assert(parent != NULL);
> > +
> > +         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
> > +         assert(deref_struct->index < type_get_length(parent->type));
> > +         if (parent->children[deref_struct->index]) {
> > +            node = parent->children[deref_struct->index];
> > +         } else {
> > +            node = deref_node_create(parent, tail->type,
> state->dead_ctx);
> > +            parent->children[deref_struct->index] = node;
> > +         }
> > +         break;
> > +      }
> > +
> > +      case nir_deref_type_array: {
> > +         assert(parent != NULL);
> > +
> > +         nir_deref_array *arr = nir_deref_as_array(tail);
> > +         switch (arr->deref_array_type) {
> > +         case nir_deref_array_type_direct:
> > +            if (arr->base_offset >= type_get_length(parent->type)) {
> > +               /* This is possible if a loop unrolls and generates an
> > +                * out-of-bounds offset.  We need to handle this at least
> > +                * somewhat gracefully.
> > +                */
> > +               return NULL;
> > +            } else if (parent->children[arr->base_offset]) {
> > +               node = parent->children[arr->base_offset];
> > +            } else {
> > +               node = deref_node_create(parent, tail->type,
> state->dead_ctx);
> > +               parent->children[arr->base_offset] = node;
> > +            }
> > +            break;
> > +         case nir_deref_array_type_indirect:
> > +            if (parent->indirect) {
> > +               node = parent->indirect;
> > +            } else {
> > +               node = deref_node_create(parent, tail->type,
> state->dead_ctx);
> > +               parent->indirect = node;
> > +            }
> > +            is_leaf = false;
>
> Maybe I'm not understanding something, but I don't think how you're
> setting is_leaf makes sense. For example, if I have
>
> vec4 foo[10];
>
> and I have a dereference like foo[i], then that sounds like it should
> be a leaf dereference (since it's not dereferencing a composite type
> like a struct or array). But here you'd set is_leaf to false in that
> case. I also see nothing that would set is_leaf to false if I just
> dereference foo by itself. On second thought, by that definition all
> of our derefs should be leaf dereferences, so maybe is_leaf is
> supposed to mean something else?
>
> > +            break;
> > +         case nir_deref_array_type_wildcard:
> > +            if (parent->wildcard) {
> > +               node = parent->wildcard;
> > +            } else {
> > +               node = deref_node_create(parent, tail->type,
> state->dead_ctx);
> > +               parent->wildcard = node;
> > +            }
> > +            is_leaf = false;
> > +            break;
> > +         default:
> > +            unreachable("Invalid array deref type");
> > +         }
> > +         break;
> > +      }
> > +      default:
> > +         unreachable("Invalid deref type");
> > +      }
> > +
> > +      parent = node;
> > +      tail = tail->child;
> > +   }
> > +
> > +   assert(parent);
> > +
> > +   if (is_leaf && add_to_leaves)
> > +      _mesa_hash_table_insert(state->deref_leaves,
> > +                              hash_deref(deref), deref, parent);
> > +
> > +   return parent;
> > +}
> > +
> > +static void
> > +register_load_instr(nir_intrinsic_instr *load_instr, bool create_node,
> > +                    struct lower_variables_state *state)
> > +{
> > +   struct deref_node *node = get_deref_node(load_instr->variables[0],
> > +                                            create_node, state);
> > +   if (node == NULL)
> > +      return;
> > +
> > +   if (node->loads == NULL)
> > +      node->loads = _mesa_set_create(state->dead_ctx,
> > +                                     _mesa_key_pointer_equal);
> > +
> > +   _mesa_set_add(node->loads, _mesa_hash_pointer(load_instr),
> load_instr);
> > +}
> > +
> > +static void
> > +register_store_instr(nir_intrinsic_instr *store_instr, bool create_node,
> > +                     struct lower_variables_state *state)
> > +{
> > +   struct deref_node *node = get_deref_node(store_instr->variables[0],
> > +                                            create_node, state);
> > +   if (node == NULL)
> > +      return;
> > +
> > +   if (node->stores == NULL)
> > +      node->stores = _mesa_set_create(state->dead_ctx,
> > +                                     _mesa_key_pointer_equal);
> > +
> > +   _mesa_set_add(node->stores, _mesa_hash_pointer(store_instr),
> store_instr);
> > +}
> > +
> > +static void
> > +register_copy_instr(nir_intrinsic_instr *copy_instr, bool create_node,
> > +                    struct lower_variables_state *state)
> > +{
> > +   for (unsigned idx = 0; idx < 2; idx++) {
> > +      struct deref_node *node =
> get_deref_node(copy_instr->variables[idx],
> > +                                               create_node, state);
> > +      if (node == NULL)
> > +         continue;
> > +
> > +      if (node->copies == NULL)
> > +         node->copies = _mesa_set_create(state->dead_ctx,
> > +                                         _mesa_key_pointer_equal);
> > +
> > +      _mesa_set_add(node->copies, _mesa_hash_pointer(copy_instr),
> copy_instr);
> > +   }
> > +}
> > +
> > +static bool
> > +foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
> > +                          bool (* cb)(struct deref_node *node,
> > +                                      struct lower_variables_state
> *state),
> > +                          struct lower_variables_state *state)
> > +{
> > +   if (deref->child == NULL) {
> > +      return cb(node, state);
> > +   } else {
> > +      switch (deref->child->deref_type) {
> > +      case nir_deref_type_array: {
> > +         nir_deref_array *arr = nir_deref_as_array(deref->child);
> > +         assert(arr->deref_array_type == nir_deref_array_type_direct);
> > +         if (node->children[arr->base_offset] &&
> > +
>  !foreach_deref_node_worker(node->children[arr->base_offset],
> > +                                        deref->child, cb, state))
> > +            return false;
> > +
> > +         if (node->wildcard &&
> > +             !foreach_deref_node_worker(node->wildcard,
> > +                                        deref->child, cb, state))
> > +            return false;
> > +
> > +         return true;
> > +      }
> > +
> > +      case nir_deref_type_struct: {
> > +         nir_deref_struct *str = nir_deref_as_struct(deref->child);
> > +         return foreach_deref_node_worker(node->children[str->index],
> > +                                          deref->child, cb, state);
> > +      }
> > +
> > +      default:
> > +         unreachable("Invalid deref child type");
> > +      }
> > +   }
> > +}
> > +
> > +static bool
> > +foreach_deref_node_match(nir_deref_var *deref,
> > +                         bool (* cb)(struct deref_node *node,
> > +                                     struct lower_variables_state
> *state),
> > +                         struct lower_variables_state *state)
> > +{
> > +   nir_deref_var var_deref = *deref;
> > +   var_deref.deref.child = NULL;
> > +   struct deref_node *node = get_deref_node(&var_deref, false, state);
> > +
> > +   if (node == NULL)
> > +      return false;
> > +
> > +   return foreach_deref_node_worker(node, &deref->deref, cb, state);
> > +}
>
> Hmm, it seems we never use this function... and even if we did,
> get_deref_node() seems to provide basically the same functionality.
> Remove them?
>

We do use it.


>
> > +
> > +/* This question can only be asked about leaves.  Searching down the
> tree
> > + * is much harder than searching up.
> > + */
> > +static bool
> > +deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
> > +                          struct lower_variables_state *state)
> > +{
> > +   if (deref->child == NULL) {
> > +      return false;
> > +   } else {
> > +      switch (deref->child->deref_type) {
> > +      case nir_deref_type_array: {
> > +         nir_deref_array *arr = nir_deref_as_array(deref->child);
> > +         if (arr->deref_array_type == nir_deref_array_type_indirect)
> > +            return true;
> > +
> > +         assert(arr->deref_array_type == nir_deref_array_type_direct);
>
> Why is this assert here? You're handling the wildcard case a few lines
> down.
>

The assert is here because you can only ask the "is this aliased" question
(at the moment) about an entirely direct reference. The stuff below is for
if there is a wildcard reference of this somewhere.


>
> > +
> > +         if (node->children[arr->base_offset] &&
> > +             deref_may_be_aliased_node(node->children[arr->base_offset],
> > +                                       deref->child, state))
> > +            return true;
> > +
> > +         if (node->wildcard &&
> > +             deref_may_be_aliased_node(node->wildcard, deref->child,
> state))
> > +            return true;
> > +
> > +         return false;
> > +      }
> > +
> > +      case nir_deref_type_struct: {
> > +         nir_deref_struct *str = nir_deref_as_struct(deref->child);
> > +         if (node->children[str->index]) {
> > +             return
> deref_may_be_aliased_node(node->children[str->index],
> > +                                              deref->child, state);
> > +         } else {
> > +            return false;
> > +         }
> > +      }
> > +
> > +      default:
> > +         unreachable("Invalid nir_deref child type");
> > +      }
> > +   }
> > +}
> > +
> > +static bool
> > +deref_may_be_aliased(nir_deref_var *deref,
> > +                     struct lower_variables_state *state)
> > +{
> > +   nir_deref_var var_deref = *deref;
> > +   var_deref.deref.child = NULL;
> > +   struct deref_node *node = get_deref_node(&var_deref, false, state);
>
> Instead of doing all these shenanigans, can we just look up the hash
> table itself? You're only using a very small part of get_deref_node()
> here.
>

Nope.  Every time we hit an array reference, a wildcard may be floating
around somewhere and, if it is, we have to take two paths: One for the
direct reference and one for the wildcard.


>
> > +
> > +   /* An invalid dereference can't be aliased. */
> > +   if (node == NULL)
> > +      return false;
> > +
> > +   return deref_may_be_aliased_node(node, &deref->deref, state);
> > +}
> > +
> > +static bool
> > +fill_deref_tables_block(nir_block *block, void *void_state)
> > +{
> > +   struct lower_variables_state *state = void_state;
> > +
> > +   nir_foreach_instr_safe(block, instr) {
> > +      if (instr->type != nir_instr_type_intrinsic)
> > +         continue;
> > +
> > +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> > +
> > +      switch (intrin->intrinsic) {
> > +      case nir_intrinsic_load_var_vec1:
> > +      case nir_intrinsic_load_var_vec2:
> > +      case nir_intrinsic_load_var_vec3:
> > +      case nir_intrinsic_load_var_vec4:
> > +         register_load_instr(intrin, true, state);
> > +         break;
> > +
> > +      case nir_intrinsic_store_var_vec1:
> > +      case nir_intrinsic_store_var_vec2:
> > +      case nir_intrinsic_store_var_vec3:
> > +      case nir_intrinsic_store_var_vec4:
> > +         register_store_instr(intrin, true, state);
> > +         break;
> > +
> > +      case nir_intrinsic_copy_var:
> > +         register_copy_instr(intrin, true, state);
> > +         break;
> > +
> > +      default:
> > +         continue;
> > +      }
> > +   }
> > +
> > +   return true;
> > +}
> > +
> > +static nir_deref *
> > +deref_next_wildcard_parent(nir_deref *deref)
> > +{
> > +   for (nir_deref *tail = deref; tail->child; tail = tail->child) {
> > +      if (tail->child->deref_type != nir_deref_type_array)
> > +         continue;
> > +
> > +      nir_deref_array *arr = nir_deref_as_array(tail->child);
> > +
> > +      if (arr->deref_array_type == nir_deref_array_type_wildcard)
> > +         return tail;
> > +   }
> > +
> > +   return NULL;
> > +}
> > +
> > +static nir_deref *
> > +get_deref_tail(nir_deref *deref)
> > +{
> > +   while (deref->child)
> > +      deref = deref->child;
> > +
> > +   return deref;
> > +}
>
> I'm getting some deja vu, it's like I've seen this function before...
> ;) really, need to move it to nir.c.
>

Yeah.  nir_deref needs some helpers.  That can be done.


>
> > +
> > +static void
> > +emit_copy_load_store(nir_intrinsic_instr *copy_instr,
> > +                     nir_deref_var *dest_head, nir_deref_var *src_head,
> > +                     nir_deref *dest_tail, nir_deref *src_tail,
> > +                     struct lower_variables_state *state)
> > +{
> > +   nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail);
> > +   nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail);
> > +
> > +   if (src_arr_parent || dest_arr_parent) {
> > +      assert(dest_arr_parent && dest_arr_parent);
> > +
> > +      nir_deref_array *src_arr =
> nir_deref_as_array(src_arr_parent->child);
> > +      nir_deref_array *dest_arr =
> nir_deref_as_array(dest_arr_parent->child);
> > +
> > +      unsigned length = type_get_length(src_arr_parent->type);
> > +      assert(length == type_get_length(dest_arr_parent->type));
> > +      assert(length > 0);
> > +
> > +      src_arr->deref_array_type = nir_deref_array_type_direct;
> > +      dest_arr->deref_array_type = nir_deref_array_type_direct;
> > +      for (unsigned i = 0; i < length; i++) {
> > +         src_arr->base_offset = i;
> > +         dest_arr->base_offset = i;
> > +         emit_copy_load_store(copy_instr, dest_head, src_head,
> > +                              &dest_arr->deref, &src_arr->deref, state);
> > +      }
> > +      src_arr->deref_array_type = nir_deref_array_type_wildcard;
> > +      dest_arr->deref_array_type = nir_deref_array_type_wildcard;
> > +   } else {
> > +      /* Base case. Actually do the copy */
> > +      src_tail = get_deref_tail(src_tail);
> > +      dest_tail = get_deref_tail(dest_tail);
> > +
> > +      assert(src_tail->type == dest_tail->type);
> > +
> > +      unsigned num_components =
> glsl_get_vector_elements(src_tail->type);
> > +
> > +      nir_deref *src_deref = nir_copy_deref(state->mem_ctx,
> &src_head->deref);
> > +      nir_deref *dest_deref = nir_copy_deref(state->mem_ctx,
> &dest_head->deref);
> > +
> > +      nir_intrinsic_op load_op;
> > +      switch (num_components) {
> > +         case 1: load_op = nir_intrinsic_load_var_vec1; break;
> > +         case 2: load_op = nir_intrinsic_load_var_vec2; break;
> > +         case 3: load_op = nir_intrinsic_load_var_vec3; break;
> > +         case 4: load_op = nir_intrinsic_load_var_vec4; break;
> > +         default: unreachable("Invalid number of components"); break;
> > +      }
> > +
> > +      nir_intrinsic_instr *load =
> nir_intrinsic_instr_create(state->mem_ctx,
> > +                                                             load_op);
> > +      load->variables[0] = nir_deref_as_var(src_deref);
> > +      load->dest.is_ssa = true;
> > +      nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components,
> NULL);
> > +
> > +      nir_instr_insert_before(&copy_instr->instr, &load->instr);
> > +      register_load_instr(load, false, state);
> > +
> > +      nir_intrinsic_op store_op;
> > +      switch (num_components) {
> > +         case 1: store_op = nir_intrinsic_store_var_vec1; break;
> > +         case 2: store_op = nir_intrinsic_store_var_vec2; break;
> > +         case 3: store_op = nir_intrinsic_store_var_vec3; break;
> > +         case 4: store_op = nir_intrinsic_store_var_vec4; break;
> > +         default: unreachable("Invalid number of components"); break;
> > +      }
> > +
> > +      nir_intrinsic_instr *store =
> nir_intrinsic_instr_create(state->mem_ctx,
> > +                                                              store_op);
> > +      store->variables[0] = nir_deref_as_var(dest_deref);
> > +      store->src[0].is_ssa = true;
> > +      store->src[0].ssa = &load->dest.ssa;
> > +
> > +      if (copy_instr->has_predicate) {
> > +         store->has_predicate = true;
> > +         store->predicate = nir_src_copy(copy_instr->predicate,
> state->mem_ctx);
> > +      }
> > +
> > +      nir_instr_insert_before(&copy_instr->instr, &store->instr);
> > +      register_store_instr(store, false, state);
> > +   }
> > +}
> > +
> > +static bool
> > +lower_copies_to_load_store(struct deref_node *node,
> > +                           struct lower_variables_state *state)
>
> This never returns false, so just make it return void.
>

No, we do use the return value.  It's passed into foreach_deref_match down
there


>
> > +{
> > +   if (!node->copies)
> > +      return true;
> > +
> > +   struct set_entry *copy_entry;
> > +   set_foreach(node->copies, copy_entry) {
> > +      nir_intrinsic_instr *copy = (void *)copy_entry->key;
> > +
> > +      emit_copy_load_store(copy, copy->variables[0], copy->variables[1],
> > +                           &copy->variables[0]->deref,
> > +                           &copy->variables[1]->deref,
> > +                           state);
> > +
> > +      for (unsigned i = 0; i < 2; ++i) {
> > +         struct deref_node *arg_node =
> get_deref_node(copy->variables[i],
> > +                                                      false, state);
> > +         if (arg_node == NULL)
> > +            continue;
> > +
> > +         struct set_entry *arg_entry =
> _mesa_set_search(arg_node->copies,
> > +
> copy_entry->hash,
> > +                                                        copy);
> > +         assert(arg_entry);
> > +         _mesa_set_remove(node->copies, arg_entry);
> > +      }
> > +
> > +      nir_instr_remove(&copy->instr);
> > +   }
> > +
> > +   return true;
> > +}
> > +
> > +static nir_load_const_instr *
> > +get_const_initializer_load(const nir_deref_var *deref,
> > +                           struct lower_variables_state *state)
> > +{
> > +   nir_constant *constant = deref->var->constant_initializer;
> > +   const nir_deref *tail = &deref->deref;
> > +   unsigned matrix_offset = 0;
> > +   while (tail->child) {
> > +      switch (tail->child->deref_type) {
> > +      case nir_deref_type_array: {
> > +         nir_deref_array *arr = nir_deref_as_array(tail->child);
> > +         assert(arr->deref_array_type == nir_deref_array_type_direct);
> > +         if (glsl_type_is_matrix(tail->type)) {
> > +            assert(arr->deref.child == NULL);
> > +            matrix_offset = arr->base_offset;
> > +         } else {
> > +            constant = constant->elements[arr->base_offset];
> > +         }
> > +         break;
> > +      }
> > +
> > +      case nir_deref_type_struct: {
> > +         constant =
> constant->elements[nir_deref_as_struct(tail->child)->index];
> > +         break;
> > +      }
> > +
> > +      default:
> > +         unreachable("Invalid deref child type");
> > +      }
> > +
> > +      tail = tail->child;
> > +   }
> > +
> > +   nir_load_const_instr *load =
> nir_load_const_instr_create(state->mem_ctx);
> > +   load->array_elems = 0;
> > +   load->num_components = glsl_get_vector_elements(tail->type);
> > +
> > +   matrix_offset *= load->num_components;
> > +   for (unsigned i = 0; i < load->num_components; i++) {
> > +      switch (glsl_get_base_type(tail->type)) {
> > +      case GLSL_TYPE_FLOAT:
> > +      case GLSL_TYPE_INT:
> > +      case GLSL_TYPE_UINT:
> > +         load->value.u[i] = constant->value.u[matrix_offset + i];
> > +         break;
> > +      case GLSL_TYPE_BOOL:
> > +         load->value.u[i] = constant->value.u[matrix_offset + i] ?
> > +                             NIR_TRUE : NIR_FALSE;
> > +         break;
> > +      default:
> > +         unreachable("Invalid immediate type");
> > +      }
> > +   }
> > +
> > +   return load;
> > +}
> > +
> > +static void
> > +def_stack_push(struct deref_node *node, nir_ssa_def *def,
> > +               struct lower_variables_state *state)
> > +{
> > +   if (node->def_stack == NULL) {
> > +      node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
> > +                                     state->impl->num_blocks);
> > +      node->def_stack_tail = node->def_stack - 1;
> > +   }
> > +
> > +   if (node->def_stack_tail >= node->def_stack) {
> > +      nir_ssa_def *top_def = *node->def_stack_tail;
> > +
> > +      if (def->parent_instr->block == top_def->parent_instr->block) {
> > +         /* They're in the same block, just replace the top */
> > +         *node->def_stack_tail = def;
> > +         return;
> > +      }
> > +   }
> > +
> > +   *(++node->def_stack_tail) = def;
> > +}
> > +
> > +static nir_ssa_def *
> > +get_ssa_def_for_block(struct deref_node *node, nir_block *block,
> > +                      struct lower_variables_state *state)
> > +{
> > +   if (node->def_stack) {
> > +      while (node->def_stack_tail >= node->def_stack) {
> > +         nir_ssa_def *def = *node->def_stack_tail;
> > +
> > +         for (nir_block *dom = block; dom != NULL; dom = dom->imm_dom) {
> > +            if (def->parent_instr->block == dom)
> > +               return def;
> > +         }
> > +
> > +         node->def_stack_tail--;
> > +      }
> > +   }
> > +
> > +   /* If we got here then we don't have a definition that dominates the
> > +    * given block.  This means that we need to add an undef and use
> that.
> > +    */
> > +   nir_ssa_undef_instr *undef =
> nir_ssa_undef_instr_create(state->mem_ctx);
> > +   nir_ssa_def_init(&undef->instr, &undef->def,
> > +                    glsl_get_vector_elements(node->type), NULL);
> > +   nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
> > +   def_stack_push(node, &undef->def, state);
> > +   return &undef->def;
> > +}
>
> I've said this before, but I really think we should be doing exactly
> what the paper says. It might be just an optimization, but it gives
> paranoid people like me much greater confidence that what we're doing
> is actually correct. Also, we should be referencing the paper in a
> comment somewhere...
>
> Here are the changes I think you'll need to make:
>
> -Add a hash table to the state that maps from SSA definition to deref
> node, and make def_stack_push() update it (but only when we don't
> replace the top of the stack to prevent popping more than once per
> stack)
> -Add the for loop in deref_to_ssa_block() that goes backwards, looking
> at each SSA definition and if it corresponds to a deref node, pops
> that node's stack
> -Make get_ssa_def_for_block() not pop the stack
>
> I don't think it'll be that difficult, and you may even end up with
> less code after doing the third step.
>

Given that we know that, at any given point, the top of the stack belongs
to either the current block or one of its parents in the dominance tree,
why can't we just do "if (stack_top->block == block) pop()" after walking
the children?


>
> > +
> > +static void
> > +add_phi_sources(nir_block *block, nir_block *pred,
> > +                struct lower_variables_state *state)
> > +{
> > +   nir_foreach_instr(block, instr) {
> > +      if (instr->type != nir_instr_type_phi)
> > +         break;
> > +
> > +      nir_phi_instr *phi = nir_instr_as_phi(instr);
> > +
> > +      struct hash_entry *entry =
> > +            _mesa_hash_table_search(state->phi_table,
> > +                                    _mesa_hash_pointer(phi), phi);
> > +      if (!entry)
> > +         continue;
> > +
> > +      struct deref_node *node = entry->data;
> > +
> > +      nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
> > +      src->pred = pred;
> > +      src->src.is_ssa = true;
> > +      src->src.ssa = get_ssa_def_for_block(node, pred, state);
> > +
> > +      _mesa_set_add(src->src.ssa->uses, _mesa_hash_pointer(instr),
> instr);
> > +
> > +      exec_list_push_tail(&phi->srcs, &src->node);
> > +   }
> > +}
> > +
> > +static bool
> > +lower_deref_to_ssa_block(nir_block *block, void *void_state)
> > +{
> > +   struct lower_variables_state *state = void_state;
> > +
> > +   nir_foreach_instr_safe(block, instr) {
> > +      if (instr->type == nir_instr_type_phi) {
> > +         nir_phi_instr *phi = nir_instr_as_phi(instr);
> > +
> > +         struct hash_entry *entry =
> > +            _mesa_hash_table_search(state->phi_table,
> > +                                    _mesa_hash_pointer(phi), phi);
> > +
> > +         /* This can happen if we already have phi nodes in the program
> > +          * that were not created in this pass.
> > +          */
> > +         if (!entry)
> > +            continue;
> > +
> > +         struct deref_node *node = entry->data;
> > +
> > +         def_stack_push(node, &phi->dest.ssa, state);
> > +      } else if (instr->type == nir_instr_type_intrinsic) {
> > +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> > +
> > +         switch (intrin->intrinsic) {
> > +         case nir_intrinsic_load_var_vec1:
> > +         case nir_intrinsic_load_var_vec2:
> > +         case nir_intrinsic_load_var_vec3:
> > +         case nir_intrinsic_load_var_vec4: {
> > +            struct deref_node *node =
> get_deref_node(intrin->variables[0],
> > +                                                     false, state);
> > +            unsigned num_chans =
> > +               nir_intrinsic_infos[intrin->intrinsic].dest_components;
> > +
> > +            if (node == NULL) {
> > +               /* If we hit this path then we are referencing an invalid
> > +                * value.  Most likely, we unrolled something and are
> > +                * reading past the end of some array.  In any case, this
> > +                * should result in an undefined value.
> > +                */
> > +               nir_ssa_undef_instr *undef =
> > +                  nir_ssa_undef_instr_create(state->mem_ctx);
> > +               nir_ssa_def_init(&undef->instr, &undef->def, num_chans,
> NULL);
> > +
> > +               nir_instr_insert_before(&intrin->instr, &undef->instr);
> > +               nir_instr_remove(&intrin->instr);
> > +
> > +               nir_src new_src = {
> > +                  .is_ssa = true,
> > +                  .ssa = &undef->def,
> > +               };
> > +
> > +               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src,
> > +                                        state->mem_ctx);
> > +               continue;
> > +            }
> > +
> > +            if (!node->lower_to_ssa)
> > +               continue;
> > +
> > +            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
> > +                                                      nir_op_imov);
> > +            mov->src[0].src.is_ssa = true;
> > +            mov->src[0].src.ssa = get_ssa_def_for_block(node, block,
> state);
> > +            for (unsigned i = num_chans; i < 4; i++)
> > +               mov->src[0].swizzle[i] = 0;
> > +
> > +            assert(intrin->dest.is_ssa);
> > +
> > +            mov->dest.write_mask = (1 << num_chans) - 1;
> > +            mov->dest.dest.is_ssa = true;
> > +            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
> num_chans, NULL);
> > +
> > +            nir_instr_insert_before(&intrin->instr, &mov->instr);
> > +            nir_instr_remove(&intrin->instr);
> > +
> > +            nir_src new_src = {
> > +               .is_ssa = true,
> > +               .ssa = &mov->dest.dest.ssa,
> > +            };
> > +
> > +            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src,
> > +                                     state->mem_ctx);
> > +            break;
> > +         }
> > +
> > +         case nir_intrinsic_store_var_vec1:
> > +         case nir_intrinsic_store_var_vec2:
> > +         case nir_intrinsic_store_var_vec3:
> > +         case nir_intrinsic_store_var_vec4: {
> > +            struct deref_node *node =
> get_deref_node(intrin->variables[0],
> > +                                                     false, state);
> > +
> > +            if (node == NULL) {
> > +               /* Probably an out-of-bounds array store.  That should
> be a
> > +                * no-op. */
> > +               nir_instr_remove(&intrin->instr);
> > +               continue;
> > +            }
> > +
> > +            if (!node->lower_to_ssa)
> > +               continue;
> > +
> > +            unsigned num_chans = glsl_get_vector_elements(node->type);
> > +
> > +            assert(intrin->src[0].is_ssa);
> > +
> > +            nir_alu_instr *mov;
> > +            if (intrin->has_predicate) {
> > +               mov = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
> > +               mov->src[0].src = nir_src_copy(intrin->predicate,
> > +                                              state->mem_ctx);
> > +               memset(mov->src[0].swizzle, 0, sizeof
> mov->src[0].swizzle);
> > +
> > +               mov->src[1].src.is_ssa = true;
> > +               mov->src[1].src.ssa = intrin->src[0].ssa;
> > +               for (unsigned i = num_chans; i < 4; i++)
> > +                  mov->src[1].swizzle[i] = 0;
> > +
> > +               mov->src[2].src.is_ssa = true;
> > +               mov->src[2].src.ssa = get_ssa_def_for_block(node, block,
> state);
> > +               for (unsigned i = num_chans; i < 4; i++)
> > +                  mov->src[2].swizzle[i] = 0;
> > +
> > +            } else {
> > +               mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
> > +
> > +               mov->src[0].src.is_ssa = true;
> > +               mov->src[0].src.ssa = intrin->src[0].ssa;
> > +               for (unsigned i = num_chans; i < 4; i++)
> > +                  mov->src[0].swizzle[i] = 0;
> > +            }
> > +
> > +            mov->dest.write_mask = (1 << num_chans) - 1;
> > +            mov->dest.dest.is_ssa = true;
> > +            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
> num_chans, NULL);
> > +
> > +            nir_instr_insert_before(&intrin->instr, &mov->instr);
> > +            nir_instr_remove(&intrin->instr);
> > +
> > +            def_stack_push(node, &mov->dest.dest.ssa, state);
> > +            break;
> > +         }
> > +
> > +         default:
> > +            break;
> > +         }
> > +      }
> > +   }
> > +
> > +   if (block->successors[0])
> > +      add_phi_sources(block->successors[0], block, state);
> > +   if (block->successors[1])
> > +      add_phi_sources(block->successors[1], block, state);
> > +
> > +   return true;
> > +}
> > +
> > +static void
> > +insert_phi_nodes(struct lower_variables_state *state)
> > +{
> > +   unsigned work[state->impl->num_blocks];
> > +   unsigned has_already[state->impl->num_blocks];
> > +   nir_block *W[state->impl->num_blocks];
> > +
> > +   memset(work, 0, sizeof work);
> > +   memset(has_already, 0, sizeof has_already);
> > +
> > +   unsigned w_start, w_end;
> > +   unsigned iter_count = 0;
> > +
> > +   struct hash_entry *deref_entry;
> > +   hash_table_foreach(state->deref_leaves, deref_entry) {
> > +      struct deref_node *node = deref_entry->data;
> > +
> > +      if (node->stores == NULL)
> > +         continue;
> > +
> > +      if (!node->lower_to_ssa)
> > +         continue;
> > +
> > +      w_start = w_end = 0;
> > +      iter_count++;
> > +
> > +      struct set_entry *store_entry;
> > +      set_foreach(node->stores, store_entry) {
> > +         nir_intrinsic_instr *store = (nir_intrinsic_instr
> *)store_entry->key;
> > +         if (work[store->instr.block->index] < iter_count)
> > +            W[w_end++] = store->instr.block;
> > +         work[store->instr.block->index] = iter_count;
> > +      }
> > +
> > +      while (w_start != w_end) {
> > +         nir_block *cur = W[w_start++];
> > +         struct set_entry *dom_entry;
> > +         set_foreach(cur->dom_frontier, dom_entry) {
> > +            nir_block *next = (nir_block *) dom_entry->key;
> > +
> > +            /*
> > +             * If there's more than one return statement, then the end
> block
> > +             * can be a join point for some definitions. However, there
> are
> > +             * no instructions in the end block, so nothing would use
> those
> > +             * phi nodes. Of course, we couldn't place those phi nodes
> > +             * anyways due to the restriction of having no instructions
> in the
> > +             * end block...
> > +             */
> > +            if (next == state->impl->end_block)
> > +               continue;
> > +
> > +            if (has_already[next->index] < iter_count) {
> > +               nir_phi_instr *phi =
> nir_phi_instr_create(state->mem_ctx);
> > +               phi->dest.is_ssa = true;
> > +               nir_ssa_def_init(&phi->instr, &phi->dest.ssa,
> > +                                glsl_get_vector_elements(node->type),
> NULL);
> > +               nir_instr_insert_before_block(next, &phi->instr);
> > +
> > +               _mesa_hash_table_insert(state->phi_table,
> > +                                       _mesa_hash_pointer(phi), phi,
> node);
> > +
> > +               has_already[next->index] = iter_count;
> > +               if (work[next->index] < iter_count) {
> > +                  work[next->index] = iter_count;
> > +                  W[w_end++] = next;
> > +               }
> > +            }
> > +         }
> > +      }
> > +   }
> > +}
> > +
> > +static bool
> > +nir_lower_variables_impl(nir_function_impl *impl)
> > +{
> > +   struct lower_variables_state state;
> > +
> > +   state.mem_ctx = ralloc_parent(impl);
> > +   state.dead_ctx = ralloc_context(state.mem_ctx);
> > +   state.impl = impl;
> > +
> > +   state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
> > +
>  _mesa_key_pointer_equal);
> > +   state.deref_leaves = _mesa_hash_table_create(state.dead_ctx,
> > +                                                derefs_equal);
> > +   state.phi_table = _mesa_hash_table_create(state.dead_ctx,
> > +                                             _mesa_key_pointer_equal);
> > +
> > +   nir_foreach_block(impl, fill_deref_tables_block, &state);
> > +
> > +   struct set *outputs = _mesa_set_create(state.dead_ctx,
> > +                                          _mesa_key_pointer_equal);
> > +
> > +   bool progress = false;
> > +
> > +   nir_metadata_require(impl, nir_metadata_block_index);
> > +
> > +   struct hash_entry *entry;
> > +   hash_table_foreach(state.deref_leaves, entry) {
> > +      nir_deref_var *deref = (void *)entry->key;
> > +      struct deref_node *node = entry->data;
> > +
> > +      if (deref->var->data.mode != nir_var_local) {
> > +         _mesa_hash_table_remove(state.deref_leaves, entry);
> > +         continue;
> > +      }
> > +
> > +      if (deref_may_be_aliased(deref, &state)) {
> > +         _mesa_hash_table_remove(state.deref_leaves, entry);
> > +         continue;
> > +      }
> > +
> > +      node->lower_to_ssa = true;
> > +      progress = true;
> > +
> > +      if (deref->var->constant_initializer) {
> > +         nir_load_const_instr *load = get_const_initializer_load(deref,
> &state);
> > +         load->dest.is_ssa = true;
> > +         nir_ssa_def_init(&load->instr, &load->dest.ssa,
> > +                          glsl_get_vector_elements(node->type), NULL);
> > +         nir_instr_insert_before_cf_list(&impl->body, &load->instr);
> > +         def_stack_push(node, &load->dest.ssa, &state);
> > +      }
> > +
> > +      if (deref->var->data.mode == nir_var_shader_out)
> > +         _mesa_set_add(outputs, _mesa_hash_pointer(node), node);
> > +
> > +      foreach_deref_node_match(deref, lower_copies_to_load_store,
> &state);
> > +   }
> > +
> > +   if (!progress)
> > +      return false;
> > +
> > +   nir_metadata_require(impl, nir_metadata_dominance);
> > +
> > +   insert_phi_nodes(&state);
> > +   nir_foreach_block(impl, lower_deref_to_ssa_block, &state);
> > +
> > +   nir_metadata_dirty(impl, nir_metadata_block_index |
> > +                            nir_metadata_dominance);
> > +
> > +   ralloc_free(state.dead_ctx);
> > +
> > +   return progress;
> > +}
> > +
> > +void
> > +nir_lower_variables(nir_shader *shader)
> > +{
> > +   nir_foreach_overload(shader, overload) {
> > +      if (overload->impl)
> > +         nir_lower_variables_impl(overload->impl);
> > +   }
> > +}
> > --
> > 2.2.0
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20141217/1e14637b/attachment-0001.html>


More information about the mesa-dev mailing list