[Mesa-dev] [PATCH v2 08/11] nir: Add a array-of-vector variable shrinking pass

Fri Aug 24 01:51:38 UTC 2018

On Thu, Aug 23, 2018 at 8:16 PM Caio Marcelo de Oliveira Filho <
caio.oliveira at intel.com> wrote:

> On Sat, Jul 28, 2018 at 10:44:39PM -0700, Jason Ekstrand wrote:
> > This pass looks for variables with vector or array-of-vector types and
> > narrows the type to only the components used.
> > ---
> >  src/compiler/nir/nir.h            |   1 +
> >  src/compiler/nir/nir_split_vars.c | 694 ++++++++++++++++++++++++++++++
> >  2 files changed, 695 insertions(+)
>
> This patch and the one that enables the pass are
>
> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
>

Thanks!

> I have some suggestions below, pick the ones you like.
>

I took all but one.  Thanks for your careful review.

> (...)
>
> > +static void
> > +mark_deref_used(nir_deref_instr *deref,
> > +                nir_component_mask_t comps_read,
> > +                nir_component_mask_t comps_written,
> > +                nir_deref_instr *copy_deref,
> > +                struct hash_table *var_usage_map,
> > +                nir_variable_mode modes,
> > +                void *mem_ctx)
> > +{
> > +   if (!(deref->mode & modes))
> > +      return;
> > +
> > +   nir_variable *var = nir_deref_instr_get_variable(deref);
> > +
> > +   struct vec_var_usage *usage =
> > +      get_vec_var_usage(var, var_usage_map, true, mem_ctx);
> > +   if (!usage)
> > +      return;
> > +
> > +   const unsigned num_var_components =
> > +      glsl_get_components(glsl_without_array_or_matrix(var->type));
>
> Consider at vec_var_usage creation time, setting a usage->comps (or
> similar) to "(1u << num_var_components) - 1".  Some cheap savings here
> and in other passes, also would result in less noise lines here and
> there.
>

I considered it at one point but decided against it.  After going through
the exercise at your suggestion, there were a lot more occurrences than I
thought.  Thanks!

> > +
> > +   usage->comps_read |= comps_read & ((1u << num_var_components) - 1);
> > +   usage->comps_written |= comps_written & ((1u << num_var_components)
> - 1);
> > +
> > +   struct vec_var_usage *copy_usage = NULL;
> > +   if (copy_deref) {
> > +      copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
> > +                                       true, mem_ctx);
> > +      if (copy_usage) {
> > +         if (usage->vars_copied == NULL) {
> > +            usage->vars_copied = _mesa_set_create(mem_ctx,
> _mesa_hash_pointer,
> > +
> _mesa_key_pointer_equal);
> > +         }
> > +         _mesa_set_add(usage->vars_copied, copy_usage);
> > +      } else {
> > +         usage->has_external_copy = true;
> > +      }
> > +   }
> > +
> > +   nir_deref_path path;
> > +   nir_deref_path_init(&path, deref, mem_ctx);
> > +
> > +   nir_deref_path copy_path;
> > +   if (copy_usage)
> > +      nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
> > +
> > +   unsigned copy_i = 0;
> > +   for (unsigned i = 0; i < usage->num_levels; i++) {
> > +      struct array_level_usage *level = &usage->levels[i];
> > +      nir_deref_instr *deref = path.path[i + 1];
> > +      assert(deref->deref_type == nir_deref_type_array ||
> > +             deref->deref_type == nir_deref_type_array_wildcard);
> > +
> > +      unsigned max_used;
> > +      if (deref->deref_type == nir_deref_type_array) {
> > +         nir_const_value *const_index =
> > +            nir_src_as_const_value(deref->arr.index);
> > +         max_used = const_index ? const_index->u32[0] : UINT_MAX;
> > +      } else {
> > +         /* For wildcards, we read or wrote the whole thing. */
> > +         assert(deref->deref_type == nir_deref_type_array_wildcard);
> > +         max_used = level->array_len - 1;
> > +
> > +         if (copy_usage) {
> > +            /* Match each wildcard level with the level on copy_usage */
> > +            for (; copy_path.path[copy_i + 1]; copy_i++) {
> > +               if (copy_path.path[copy_i + 1]->deref_type ==
> > +                   nir_deref_type_array_wildcard)
> > +                  break;
> > +            }
> > +            struct array_level_usage *copy_level =
> > +               &copy_usage->levels[copy_i++];
> > +
> > +            if (level->levels_copied == NULL) {
> > +               level->levels_copied =
> > +                  _mesa_set_create(mem_ctx, _mesa_hash_pointer,
> > +                                   _mesa_key_pointer_equal);
> > +            }
> > +            _mesa_set_add(level->levels_copied, copy_level);
>
> Since once level->has_external_copy is set we don't really use the
> levels_copied, maybe wrap the bootstrap/set block with if
> (!level->has_external_copy)?
>

_mesa_set_add is cheap and I think I'd rather keep track of all the
internal copies for now in case this pass changes to need them.  I could
see this triggering some time but then it's very dependent on the external
copy being hit before the internal copy.  I think I'd rather leave this one
as-is if you don't mind.

> > +         } else {
> > +            /* We have a wildcard and we don't know where this copy
> came from,
> > +             * flag it and we'll know to not shorten this array.
> > +             */
>
> Maybe instead of "we don't know" say that it comes from a variable
> from other mode.
>

I said "comes from a variable we aren't tracking"

> > +            level->has_external_copy = true;
> > +         }
> > +      }
> > +
> > +      if (comps_written)
> > +         level->max_written = MAX2(level->max_written, max_used);
> > +      if (comps_read)
> > +         level->max_read = MAX2(level->max_read, max_used);
> > +   }
> > +}
>
> (...)
>
> > +static void
> > +find_used_components_impl(nir_function_impl *impl,
> > +                          struct hash_table *var_usage_map,
> > +                          nir_variable_mode modes,
> > +                          void *mem_ctx)
> > +{
> > +   nir_foreach_block(block, impl) {
> > +      nir_foreach_instr_safe(instr, block) {
>
> Unless I'm missing something, we don't need "safe" variant here
>

Correct.

> (...)
>
> > +static bool
> > +shrink_vec_var_list(struct exec_list *vars,
> > +                    struct hash_table *var_usage_map)
> > +{
> > +   /* Initialize the components kept field of each variable.  This is
> the
> > +    * AND of the components written and components read.  If a
> component is
> > +    * written but never read, it's dead.  If it is read but never
> written,
> > +    * then all values read are undefined garbage and we may as well not
> read
> > +    * them.
> > +    *
> > +    * The same logic applies to the array length.  We make the array
> length
> > +    * the minimum needed required length between read and write and
> plan to
> > +    * discard any OOB access.  The one exception here is indirect writes
> > +    * because we don't know where they will land and we can't shrink an
> array
> > +    * with indirect writes because previously in-bounds writes may
> become
> > +    * out-of-bounds and have undefined behavior.
> > +    *
> > +    * Also, if we have a copy that to/from something we can't shrink,
> we need
> > +    * to leave components and array_len of any wildcards alone.
> > +    */
> > +   nir_foreach_variable(var, vars) {
> > +      struct vec_var_usage *usage =
> > +         get_vec_var_usage(var, var_usage_map, false, NULL);
> > +      if (!usage)
> > +         continue;
>
> After reading I thought here and in the fix-point loop would be a
> better call to iterate directly var_usage_map.  Due to the way we
> reuse var_usage_map, we would have to skip based on modes, which kind
> of spoil things a bit, but maybe still a win.  Or use different sets
> and join them for the final step... Probably won't make much
> difference.
>
>
> > +
> > +      const unsigned var_num_components =
> > +         glsl_get_components(glsl_without_array_or_matrix(var->type));
> > +
> > +      assert(usage->comps_kept == 0);
> > +      if (usage->has_external_copy)
> > +         usage->comps_kept = (1u << var_num_components) - 1;
> > +      else
> > +         usage->comps_kept = usage->comps_read & usage->comps_written;
> > +
> > +      for (unsigned i = 0; i < usage->num_levels; i++) {
> > +         struct array_level_usage *level = &usage->levels[i];
> > +         assert(level->array_len > 0);
> > +
> > +         if (level->max_written == UINT_MAX || level->has_external_copy)
> > +            continue; /* Can't shrink */
> > +
> > +         unsigned max_used = MIN2(level->max_read, level->max_written);
> > +         level->array_len = MIN2(max_used, level->array_len - 1) + 1;
> > +      }
> > +   }
> > +
> > +   /* In order for variable copies to work, we have to have the same
> data type
> > +    * on the source and the destination.  In order to satisfy this, we
> run a
> > +    * little fixed-point algorithm to transitively ensure that we get
> enough
> > +    * components and array elements for this to hold for all copies.
> > +    */
> > +   bool fp_progress;
> > +   do {
> > +      fp_progress = false;
> > +      nir_foreach_variable(var, vars) {
> > +         struct vec_var_usage *var_usage =
> > +            get_vec_var_usage(var, var_usage_map, false, NULL);
> > +         if (!var_usage || !var_usage->vars_copied)
> > +            continue;
> > +
> > +         struct set_entry *copy_entry;
> > +         set_foreach(var_usage->vars_copied, copy_entry) {
> > +            const struct vec_var_usage *copy_usage = copy_entry->key;
> > +            if (copy_usage->comps_kept & ~var_usage->comps_kept) {
> > +               var_usage->comps_kept |= copy_usage->comps_kept;
>
> Could we set both sides here, i.e. do
> "copy_usage->comps_kept = var_usage->comps_kept"?  The reasoning
> is that it might save an iteration, e.g. A <-> C and B <-> C,
> but iteration happens in order: A, B then C.  Information from A
> will take two iterations to propagate to B.
>

Sure.  Easy enough to do.  At first I thought it wouldn't save us anything
so I didn't do it but I wasn't thinking about the third variable.  You're
right, it may be more efficient.  I've also done the same thing with the
array length below.

> > +               fp_progress = true;
> > +            }
> > +         }
> > +
> > +         for (unsigned i = 0; i < var_usage->num_levels; i++) {
> > +            struct array_level_usage *var_level = &var_usage->levels[i];
> > +            if (!var_level->levels_copied)
> > +               continue;
> > +
> > +            set_foreach(var_level->levels_copied, copy_entry) {
> > +               const struct array_level_usage *copy_level =
> copy_entry->key;
> > +               if (var_level->array_len < copy_level->array_len) {
> > +                  var_level->array_len = copy_level->array_len;
> > +                  fp_progress = true;
> > +               }
> > +            }
> > +         }
> > +      }
> > +   } while (fp_progress);
> > +
> > +   bool vars_shrunk = false;
> > +   nir_foreach_variable_safe(var, vars) {
> > +      struct vec_var_usage *usage =
> > +         get_vec_var_usage(var, var_usage_map, false, NULL);
> > +      if (!usage)
> > +         continue;
> > +
> > +      bool shrunk = false;
> > +      const struct glsl_type *vec_type = var->type;
> > +      for (unsigned i = 0; i < usage->num_levels; i++) {
> > +         /* If we've reduced the array to zero elements at some level,
> just
> > +          * set comps_kept to 0 and delete the variable.
> > +          */
> > +         if (usage->levels[i].array_len == 0)
> > +            usage->comps_kept = 0;
>
> Add a break here.  The rest of the loop execution would try to find a
> shrink situation, but regardless the result it will be ignored since
> we bail when comps_kept == 0.
>

Done.

> > +
> > +         assert(usage->levels[i].array_len <=
> glsl_get_length(vec_type));
> > +         if (usage->levels[i].array_len < glsl_get_length(vec_type))
> > +            shrunk = true;
> > +         vec_type = glsl_get_array_element(vec_type);
> > +      }
> > +      assert(glsl_type_is_vector_or_scalar(vec_type));
> > +
> > +      assert(usage->comps_kept < (1u << glsl_get_components(vec_type)));
> > +      if (usage->comps_kept != (1u << glsl_get_components(vec_type)) -
> 1)
> > +         shrunk = true;
> > +
> > +      if (usage->comps_kept == 0) {
> > +         /* This variable is dead, remove it */
> > +         vars_shrunk = true;
> > +         exec_node_remove(&var->node);
> > +         continue;
> > +      }
> > +
> > +      if (!shrunk) {
> > +         /* This variable doesn't need to be shrunk.  Remove it from the
> > +          * hash table so later passes will ignore it.
>
> s/later passes/later steps/ assuming you are referring to the access
> fixing step.

Done.

> > +          */
> > +         _mesa_hash_table_remove_key(var_usage_map, var);
> > +         continue;
> > +      }
>
> (...)
>
> > +static void
> > +shrink_vec_var_access_impl(nir_function_impl *impl,
> > +                           struct hash_table *var_usage_map,
> > +                           nir_variable_mode modes)
> > +{
> > +   nir_builder b;
> > +   nir_builder_init(&b, impl);
> > +
> > +   nir_foreach_block(block, impl) {
> > +      nir_foreach_instr_safe(instr, block) {
> > +         switch (instr->type) {
> > +         case nir_instr_type_deref: {
> > +            nir_deref_instr *deref = nir_instr_as_deref(instr);
> > +            if (!(deref->mode & modes))
> > +               break;
> > +
> > +            /* Clean up any dead derefs we find lying around.  They may
> refer
> > +             * to variables we've deleted.
> > +             */
> > +            if (nir_deref_instr_remove_if_unused(deref))
> > +               break;
> > +
> > +            /* We don't need to check if this is one of the derefs we're
> > +             * shrinking because this is a no-op if it isn't.  The
> worst that
> > +             * could happen is that we accidentally fix an invalid
> deref.
> > +             */
>
> Consider prefixing this comment with something like: "Update the new
> variable type in the deref."
>

Done.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180823/9a69c790/attachment-0001.html>