[Mesa-dev] [PATCH v2 08/11] nir: Add a array-of-vector variable shrinking pass

Caio Marcelo de Oliveira Filho caio.oliveira at intel.com
Tue Aug 21 22:55:55 UTC 2018


Hi,

On Sat, Jul 28, 2018 at 10:44:39PM -0700, Jason Ekstrand wrote:
> This pass looks for variables with vector or array-of-vector types and
> narrows the type to only the components used.
> ---
>  src/compiler/nir/nir.h            |   1 +
>  src/compiler/nir/nir_split_vars.c | 694 ++++++++++++++++++++++++++++++
>  2 files changed, 695 insertions(+)

My general impression was: could we do it as two separate simpler
passes, one for array lengths and other for vector types?  I felt the
intermix of book-keeping for both cases made things harder than usual
to read, but maybe is only because I'm not familiar enough.


> +static void
> +find_used_components_impl(nir_function_impl *impl,
> +                          struct hash_table *var_usage_map,
> +                          nir_variable_mode modes,
> +                          void *mem_ctx)
> +{
> +   nir_foreach_block(block, impl) {
> +      nir_foreach_instr_safe(instr, block) {
> +         if (instr->type != nir_instr_type_intrinsic)
> +            continue;
> +
> +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +         switch (intrin->intrinsic) {
> +         case nir_intrinsic_load_deref:
> +            mark_deref_used(nir_src_as_deref(intrin->src[0]),
> +                            nir_ssa_def_components_read(&intrin->dest.ssa), 0,
> +                            NULL, var_usage_map, modes, mem_ctx);
> +            break;
> +
> +         case nir_intrinsic_store_deref:
> +            mark_deref_used(nir_src_as_deref(intrin->src[0]),
> +                            0, get_non_self_referential_store_comps(intrin),
> +                            NULL, var_usage_map, modes, mem_ctx);
> +            break;
> +
> +         case nir_intrinsic_copy_deref: {
> +            /* Just mark everything used for copies. */
> +            nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
> +            nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);

Should self-copies be ignored?


> +            mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
> +            mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
> +            break;
> +         }

(...)

> +static bool
> +shrink_vec_var_list(struct exec_list *vars,
> +                    struct hash_table *var_usage_map)
> +{
> +   /* Initialize the components kept field of each variable.  This is the
> +    * AND of the components written and components read.  If a component is
> +    * written but never read, it's dead.  If it is read but never written,
> +    * then all values read are undefined garbage and we may as well not read
> +    * them.
> +    *
> +    * The same logic applies to the array length.  We make the array length
> +    * the minimum needed required length between read and write and plan to
> +    * discard any OOB access.  The one exception here is indirect writes
> +    * because we don't know where they will land and we can't shrink an array
> +    * with indirect writes because previously in-bounds writes may become
> +    * out-of-bounds and have undefined behavior.
> +    *
> +    * Also, if we have a copy that to/from something we can't shrink, we need
> +    * to leave components and array_len of any wildcards alone.
> +    */
> +   nir_foreach_variable(var, vars) {
> +      struct vec_var_usage *usage =
> +         get_vec_var_usage(var, var_usage_map, false, NULL);
> +      if (!usage)
> +         continue;
> +
> +      const unsigned var_num_components =
> +         glsl_get_components(glsl_without_array_or_matrix(var->type));
> +
> +      assert(usage->comps_kept == 0);
> +      if (usage->has_external_copy)
> +         usage->comps_kept = (1u << var_num_components) - 1;
> +      else
> +         usage->comps_kept = usage->comps_read & usage->comps_written;
> +
> +      for (unsigned i = 0; i < usage->num_levels; i++) {
> +         struct array_level_usage *level = &usage->levels[i];
> +         assert(level->array_len > 0);
> +
> +         if (level->max_written == UINT_MAX || level->has_external_copy)
> +            continue; /* Can't shrink */
> +
> +         unsigned max_used = MIN2(level->max_read, level->max_written);

Should this be MAX2?  What I'm thinking is if we read 3 and write 5,
we used up to 5, and the length should be at least 6.


> +         level->array_len = MIN2(max_used, level->array_len - 1) + 1;
> +      }
> +   }

(...)

> +static void
> +shrink_vec_var_access_impl(nir_function_impl *impl,
> +                           struct hash_table *var_usage_map,
> +                           nir_variable_mode modes)
> +{
> +   nir_builder b;
> +   nir_builder_init(&b, impl);
> +
> +   nir_foreach_block(block, impl) {
> +      nir_foreach_instr_safe(instr, block) {
> +         switch (instr->type) {
> +         case nir_instr_type_deref: {
> +            nir_deref_instr *deref = nir_instr_as_deref(instr);
> +            if (!(deref->mode & modes))
> +               break;
> +
> +            /* Clean up any dead derefs we find lying around.  They may refer
> +             * to variables we've deleted.
> +             */
> +            if (nir_deref_instr_remove_if_unused(deref))
> +               break;
> +
> +            /* We don't need to check if this is one of the derefs we're
> +             * shrinking because this is a no-op if it isn't.  The worst that
> +             * could happen is that we accidentally fix an invalid deref.
> +             */

On one hand I think this is fine for the reasons you mention (and the
fact it avoids us looking up the deref's var into our structures).  On
the other hand, if we get some pass generating invalid derefs, this
will hide the bug.


> +            if (deref->deref_type == nir_deref_type_var) {
> +               deref->type = deref->var->type;
> +            } else if (deref->deref_type == nir_deref_type_array ||
> +                       deref->deref_type == nir_deref_type_array_wildcard) {
> +               nir_deref_instr *parent = nir_deref_instr_parent(deref);
> +               assert(glsl_type_is_array(parent->type) ||
> +                      glsl_type_is_matrix(parent->type));
> +               deref->type = glsl_get_array_element(parent->type);
> +            }
> +            break;
> +         }

(...)

> +bool
> +nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
> +{
> +   assert((modes & (nir_var_global | nir_var_local)) == modes);
> +
> +   void *mem_ctx = ralloc_context(NULL);
> +
> +   struct hash_table *var_usage_map =
> +      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
> +                              _mesa_key_pointer_equal);
> +
> +   bool has_vars_to_shrink = false;
> +   nir_foreach_function(function, shader) {
> +      if (!function->impl)
> +         continue;
> +
> +      /* Don't even bother crawling the IR if we don't have any variables.
> +       * Given that this pass deletes any unused variables, it's likely that
> +       * we will be in this scenario eventually.
> +       */

The fact that we don't have unused variables doesn't mean we won't
have used variables.  Maybe I'm missing some context to fully
understand this comment block.


> +      if (!exec_list_is_empty(&shader->globals) ||
> +          !exec_list_is_empty(&function->impl->locals)) {

Condition each of those with the modes being enabled? I.e. if we do
have globals, but we are only working with locals in the pass, we
should skip it.


> +         has_vars_to_shrink = true;
> +         find_used_components_impl(function->impl, var_usage_map,
> +                                   modes, mem_ctx);
> +      }
> +   }

(...)


Thanks,
Caio


More information about the mesa-dev mailing list