[Mesa-dev] [PATCH v2 2/3] nir: add nir_opt_vectorize_io()

Sun Oct 28 22:25:50 UTC 2018

On October 27, 2018 22:08:01 Timothy Arceri <tarceri at itsqueeze.com> wrote:

> Once linking opts are done this pass recombines varying components.
>
>
> This patch is loosely based on Connor's vectorize alu pass.
>
>
> V2: skip fragment shaders
> ---
> src/compiler/Makefile.sources           |   1 +
> src/compiler/nir/meson.build            |   1 +
> src/compiler/nir/nir.h                  |   2 +
> src/compiler/nir/nir_opt_vectorize_io.c | 526 ++++++++++++++++++++++++
> 4 files changed, 530 insertions(+)
> create mode 100644 src/compiler/nir/nir_opt_vectorize_io.c
>
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index b65bb9b80b9..4048eebeb09 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -289,6 +289,7 @@ NIR_FILES = \
>  nir/nir_opt_shrink_load.c \
>  nir/nir_opt_trivial_continues.c \
>  nir/nir_opt_undef.c \
> + nir/nir_opt_vectorize_io.c \
>  nir/nir_phi_builder.c \
>  nir/nir_phi_builder.h \
>  nir/nir_print.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index d8f65640004..7b0007d3b60 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -173,6 +173,7 @@ files_libnir = files(
>   'nir_opt_shrink_load.c',
>   'nir_opt_trivial_continues.c',
>   'nir_opt_undef.c',
> +  'nir_opt_vectorize_io.c',
>   'nir_phi_builder.c',
>   'nir_phi_builder.h',
>   'nir_print.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index a0ae9a4430e..d54f97c1de6 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3105,6 +3105,8 @@ bool nir_opt_trivial_continues(nir_shader *shader);
>
> bool nir_opt_undef(nir_shader *shader);
>
> +bool nir_opt_vectorize_io(nir_shader *shader);
> +
> bool nir_opt_conditional_discard(nir_shader *shader);
>
> void nir_sweep(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_vectorize_io.c 
> b/src/compiler/nir/nir_opt_vectorize_io.c
> new file mode 100644
> index 00000000000..6695827290a
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_vectorize_io.c
> @@ -0,0 +1,526 @@
> +/*
> + * Copyright © 2018 Timothy Arceri
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +#include "nir_deref.h"
> +#include "util/u_dynarray.h"
> +#include "util/u_math.h"
> +
> +/** @file nir_opt_vectorize_io.c
> + *
> + * Replaces scalar nir_load_input/nir_store_output operations with
> + * vecotoized instructions.
> + */
> +
> +static bool
> +is_io_load(nir_intrinsic_instr *intr)
> +{
> +   switch (intr->intrinsic) {
> +   case nir_intrinsic_interp_deref_at_centroid:
> +   case nir_intrinsic_interp_deref_at_sample:
> +   case nir_intrinsic_interp_deref_at_offset:
> +   case nir_intrinsic_load_deref:
> +      return true;
> +   default:
> +      return false;
> +   }
> +}
> +
> +static nir_deref_instr *
> +clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
> +                  const nir_deref_instr *src_head)
> +{
> +   const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
> +
> +   if (!parent)
> +      return dst_tail;
> +
> +   assert(src_head->deref_type == nir_deref_type_array);
> +
> +   dst_tail = clone_deref_array(b, dst_tail, parent);
> +
> +   return nir_build_deref_array(b, dst_tail,
> +                                nir_ssa_for_src(b, src_head->arr.index, 1));
> +}
> +
> +static bool
> +instr_can_rewrite(nir_instr *instr)
> +{
> +   if (instr->type != nir_instr_type_intrinsic)
> +      return false;
> +
> +   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
> +
> +   if (intr->num_components != 1)
> +      return false;
> +
> +   if (!is_io_load(intr) &&
> +       intr->intrinsic != nir_intrinsic_store_deref)
> +      return false;
> +
> +   nir_variable *var =
> +      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));;
> +
> +   /* TODO: add doubles support ? */
> +   if (glsl_type_is_64bit(glsl_without_array(var->type)))
> +      return false;
> +
> +   /* Only touch user defined varyings as these are the only ones we split */
> +   if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
> +      return false;
> +
> +   /* Skip complex types we don't split in the first place */
> +   if (glsl_type_is_matrix(glsl_without_array(var->type)) ||
> +       glsl_type_is_struct(glsl_without_array(var->type)))
> +      return false;
> +
> +   return true;
> +}
> +
> +static bool
> +io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
> +{
> +   assert(instr1->type == nir_instr_type_intrinsic &&
> +          instr2->type == nir_instr_type_intrinsic);
> +
> +   nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
> +   nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
> +
> +   nir_variable *var1 =
> +      nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
> +   nir_variable *var2 =
> +      nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
> +
> +   /* We don't handle combining vars of different type e.g. different array
> +    * lengths so just skip if the type doesn't match.
> +    */
> +   if (var1->type != var2->type)
> +      return false;
> +
> +   if (is_io_load(intr1) != is_io_load(intr2))
> +      return false;
> +
> +   if (var1->data.location != var2->data.location)
> +      return false;
> +
> +   return true;
> +}
> +
> +static struct util_dynarray *
> +vec_instr_stack_create(void *mem_ctx)
> +{
> +   struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
> +   util_dynarray_init(stack, mem_ctx);
> +   return stack;
> +}
> +
> +static void
> +vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
> +{
> +   util_dynarray_append(stack, nir_instr *, instr);
> +}
> +
> +static void
> +create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
> +                unsigned comp, unsigned num_comps)
> +{
> +   b->cursor = nir_before_instr(&intr->instr);
> +
> +   assert(intr->dest.is_ssa);
> +
> +   nir_intrinsic_instr *new_intr =
> +      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
> +   nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
> +                     intr->dest.ssa.bit_size, NULL);
> +   new_intr->num_components = num_comps;
> +
> +   nir_deref_instr *deref = nir_build_deref_var(b, var);
> +   deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
> +
> +   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
> +
> +   if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
> +       intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
> +      nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
> +
> +   nir_builder_instr_insert(b, &new_intr->instr);
> +
> +   unsigned channel = comp - var->data.location_frac;
> +   nir_ssa_def *load = nir_channel(b, &new_intr->dest.ssa, channel);
> +   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
> +
> +   /* Remove the old load intrinsic */
> +   nir_instr_remove(&intr->instr);
> +}
> +
> +static void
> +create_new_store(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
> +                 nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps)
> +{
> +   b->cursor = nir_before_instr(&intr->instr);
> +
> +   nir_intrinsic_instr *new_intr =
> +      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
> +   new_intr->num_components = num_comps;
> +
> +   nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
> +
> +   nir_deref_instr *deref = nir_build_deref_var(b, var);
> +   deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
> +
> +   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
> +
> +   nir_ssa_def *stores[4];
> +   for (unsigned i = 0; i < num_comps; i++) {
> +      stores[i] = srcs[first_comp + i];
> +   }
> +
> +   new_intr->src[1] = nir_src_for_ssa(nir_vec(b, stores, num_comps));
> +
> +   nir_builder_instr_insert(b, &new_intr->instr);
> +
> +   /* Remove the old store intrinsic */
> +   nir_instr_remove(&intr->instr);
> +}
> +
> +static bool
> +vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
> +                    nir_instr *instr,
> +                    nir_variable *input_vars[MAX_VARYINGS_INCL_PATCH][4],
> +                    nir_variable *output_vars[MAX_VARYINGS_INCL_PATCH][4])
> +{
> +   nir_instr *last = util_dynarray_pop(stack, nir_instr *);
> +
> +   assert(last == instr);
> +   assert(last->type == nir_instr_type_intrinsic);
> +
> +   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
> +   nir_variable *var =
> +      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
> +   unsigned loc = var->data.location - VARYING_SLOT_VAR0;
> +
> +   nir_variable *new_var;
> +   if (var->data.mode == nir_var_shader_in)
> +      new_var = input_vars[loc][var->data.location_frac];
> +   else
> +      new_var = output_vars[loc][var->data.location_frac];
> +
> +   unsigned num_comps =
> +      glsl_get_vector_elements(glsl_without_array(new_var->type));
> +
> +   /* Don't bother walking the stack if this component can't be vectorised. */
> +   if (num_comps == 1)
> +      return false;
> +
> +   if (new_var == var)
> +      return false;
> +
> +   if (is_io_load(intr)) {
> +      create_new_load(b, intr, new_var, var->data.location_frac, num_comps);
> +      return true;
> +   }
> +
> +   b->cursor = nir_before_instr(last);
> +   nir_ssa_undef_instr *instr_undef =
> +      nir_ssa_undef_instr_create(b->shader, 1, 32);
> +   nir_builder_instr_insert(b, &instr_undef->instr);
> +
> +   nir_ssa_def *srcs[4];
> +   for (int i = 0; i < 4; i++) {
> +      srcs[i] = &instr_undef->def;
> +   }
> +   srcs[var->data.location_frac] = intr->src[1].ssa;
> +
> +   unsigned first_comp = var->data.location_frac;
> +   util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
> +      nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(*stack_instr);
> +      nir_variable *var2 =
> +         nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
> +      unsigned loc2 = var2->data.location - VARYING_SLOT_VAR0;
> +
> +      if (output_vars[loc][var->data.location_frac] !=
> +          output_vars[loc2][var2->data.location_frac])
> +         continue;
> +
> +      assert(glsl_get_vector_elements(glsl_without_array(var2->type)) == 1);
> +
> +      if (srcs[var2->data.location_frac] == &instr_undef->def) {
> +         assert(intr2->src[1].is_ssa);
> +         assert(intr2->src[1].ssa);
> +
> +         srcs[var2->data.location_frac] = intr2->src[1].ssa;
> +
> +         if (first_comp > var2->data.location_frac)
> +            first_comp = var2->data.location_frac;
> +      }
> +   }
> +
> +   create_new_store(b, intr, new_var, srcs, first_comp, num_comps);
> +
> +   return true;
> +}
> +
> +static bool
> +cmp_func(const void *data1, const void *data2)
> +{
> +   const struct util_dynarray *arr1 = data1;
> +   const struct util_dynarray *arr2 = data2;
> +
> +   const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
> +   const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
> +
> +   return io_access_same_var(instr1, instr2);
> +}
> +
> +#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
> +
> +static uint32_t
> +hash_instr(const nir_instr *instr)
> +{
> +   assert(instr->type == nir_instr_type_intrinsic);
> +
> +   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
> +   nir_variable *var =
> +      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
> +
> +   uint32_t hash = _mesa_fnv32_1a_offset_bias;
> +   bool is_load = is_io_load(intr);
> +
> +   hash = HASH(hash, var->type);
> +   hash = HASH(hash, is_load);
> +   return HASH(hash, var->data.location);
> +}
> +
> +static uint32_t
> +hash_stack(const void *data)
> +{
> +   const struct util_dynarray *stack = data;
> +   const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
> +   return hash_instr(first);
> +}
> +
> +static struct set *
> +vec_instr_set_create(void)
> +{
> +   return _mesa_set_create(NULL, hash_stack, cmp_func);
> +}
> +
> +static void
> +vec_instr_set_destroy(struct set *instr_set)
> +{
> +   _mesa_set_destroy(instr_set, NULL);
> +}
> +
> +static void
> +vec_instr_set_add(struct set *instr_set, nir_instr *instr)
> +{
> +   if (!instr_can_rewrite(instr))
> +      return;
> +
> +   struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
> +   vec_instr_stack_push(new_stack, instr);
> +
> +   struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
> +
> +   if (entry) {
> +      ralloc_free(new_stack);
> +      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
> +      vec_instr_stack_push(stack, instr);
> +      return;
> +   }
> +
> +   _mesa_set_add(instr_set, new_stack);
> +   return;
> +}
> +
> +static bool
> +vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
> +                     nir_variable *input_vars[MAX_VARYINGS_INCL_PATCH][4],
> +                     nir_variable *output_vars[MAX_VARYINGS_INCL_PATCH][4])
> +{
> +   if (!instr_can_rewrite(instr))
> +      return false;
> +
> +   /*
> +    * It's pretty unfortunate that we have to do this, but it's a side effect
> +    * of the hash set interfaces. The hash set assumes that we're only
> +    * interested in storing one equivalent element at a time, and if we try to
> +    * insert a duplicate element it will remove the original. We could hack up
> +    * the comparison function to "know" which input is an instruction we
> +    * passed in and which is an array that's part of the entry, but that
> +    * wouldn't work because we need to pass an array to _mesa_set_add() in
> +    * vec_instr_add() above, and _mesa_set_add() will call our comparison
> +    * function as well.
> +    */
> +   struct util_dynarray *temp = vec_instr_stack_create(instr_set);
> +   vec_instr_stack_push(temp, instr);
> +   struct set_entry *entry = _mesa_set_search(instr_set, temp);
> +   ralloc_free(temp);
> +
> +   if (entry) {
> +      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
> +      bool progress = vec_instr_stack_pop(b, stack, instr, input_vars,
> +                                          output_vars);
> +
> +      if (!util_dynarray_num_elements(stack, nir_instr *))
> +         _mesa_set_remove(instr_set, entry);
> +
> +      return progress;
> +   }
> +
> +   return false;
> +}
> +
> +static bool
> +vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
> +                nir_variable *input_vars[MAX_VARYINGS_INCL_PATCH][4],
> +                nir_variable *output_vars[MAX_VARYINGS_INCL_PATCH][4])
> +{
> +   bool progress = false;
> +
> +   nir_foreach_instr_safe(instr, block) {
> +      vec_instr_set_add(instr_set, instr);
> +   }
> +
> +   for (unsigned i = 0; i < block->num_dom_children; i++) {
> +      nir_block *child = block->dom_children[i];
> +      progress |= vectorize_block(b, child, instr_set, input_vars,
> +                                  output_vars);
> +   }
> +
> +   nir_foreach_instr_reverse_safe(instr, block) {
> +      progress |= vec_instr_set_remove(b, instr_set, instr, input_vars,
> +                                       output_vars);
> +   }
> +
> +   return progress;
> +}
> +
> +static void
> +create_new_io_var(nir_shader *shader,
> +                  nir_variable *vars[MAX_VARYINGS_INCL_PATCH][4],
> +                  unsigned location, unsigned comps)
> +{
> +   unsigned num_comps = util_bitcount(comps);
> +   unsigned first_comp = u_bit_scan(&comps);
> +
> +   nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
> +   var->data.location_frac = first_comp;
> +   var->type = glsl_replace_vector_type(var->type, num_comps);
> +
> +   nir_shader_add_variable(shader, var);
> +
> +   vars[location][first_comp] = var;
> +
> +   while (comps) {
> +      const int comp = u_bit_scan(&comps);
> +      vars[location][comp] = var;
> +   }
> +}
> +
> +static void
> +create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
> +                   nir_variable *vars[MAX_VARYINGS_INCL_PATCH][4])
> +{
> +   if (exec_list_is_empty(io_list))
> +      return;
> +
> +   nir_foreach_variable(var, io_list) {
> +      if (var->data.location >= VARYING_SLOT_VAR0) {
> +         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
> +         vars[loc][var->data.location_frac] = var;
> +      }
> +   }
> +
> +   /* We don't handle combining vars of different type e.g. different array
> +    * lengths.
> +    */
> +   for (unsigned i = 0; i < MAX_VARYINGS_INCL_PATCH; i++) {
> +      unsigned comps = 0;
> +      for (unsigned j = 0; j < 3; j++) {
> +         if (vars[i][j] && vars[i][j+1] && vars[i][j]->type == 
> vars[i][j+1]->type) {
> +            if (j == 2) {
> +               /* last component so create new variable */
> +               comps |= 3 << vars[i][j]->data.location_frac;
> +               create_new_io_var(shader, vars, i, comps);
> +            } else {
> +               /* Set comps */
> +               comps |= 3 << vars[i][j]->data.location_frac;
> +            }
> +         } else {
> +            if (comps) {
> +               /* Types didn't match but we have already seen matching types
> +                * at this location so create a new variable for those
> +                * components.
> +                */
> +               create_new_io_var(shader, vars, i, comps);
> +               comps = 0;
> +            }
> +         }
> +      }
> +   }
> +}
> +
> +static bool
> +nir_opt_vectorize_io_impl(nir_function_impl *impl)
> +{
> +   nir_builder b;
> +   nir_builder_init(&b, impl);
> +
> +   nir_metadata_require(impl, nir_metadata_dominance);
> +
> +   nir_shader *shader = impl->function->shader;
> +   nir_variable *input_vars[MAX_VARYINGS_INCL_PATCH][4] = {0};
> +   nir_variable *output_vars[MAX_VARYINGS_INCL_PATCH][4] = {0};
> +
> +   create_new_io_vars(shader, &shader->inputs, input_vars);
> +   create_new_io_vars(shader, &shader->outputs, output_vars);
> +
> +   struct set *instr_set = vec_instr_set_create();
> +   bool progress = vectorize_block(&b, nir_start_block(impl), instr_set,
> +                                   input_vars, output_vars);
> +
> +   if (progress)
> +      nir_metadata_preserve(impl, nir_metadata_block_index |
> +                                  nir_metadata_dominance);
> +
> +   vec_instr_set_destroy(instr_set);
> +   return false;
> +}
> +
> +bool
> +nir_opt_vectorize_io(nir_shader *shader)
> +{
> +   bool progress = false;
> +
> +   if (shader->info.stage == MESA_SHADER_FRAGMENT)
> +      return false;

Assuming this pass works for fragment inputs, I think I'd rather have the 
caller skip the pass for fragment shaders than to have the pass silently 
skip for them. That's more explicit and leaves the option open if someone 
has vec4 fragment shaders (old Mali does, I think.)

--Jason

> +
> +   nir_foreach_function(function, shader) {
> +      if (function->impl)
> +         progress |= nir_opt_vectorize_io_impl(function->impl);
> +   }
> +
> +   return progress;
> +}
> --
> 2.17.2
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev