[Mesa-dev] [PATCH v2 1/3] nir: Add a pass to lower vector phi nodes to scalar phi nodes

Connor Abbott cwabbott0 at gmail.com
Sat Jan 24 08:18:21 PST 2015


On Sat, Jan 24, 2015 at 1:00 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> ---
>  src/glsl/Makefile.sources               |   1 +
>  src/glsl/nir/nir.h                      |   2 +
>  src/glsl/nir/nir_lower_phis_to_scalar.c | 238 ++++++++++++++++++++++++++++++++
>  3 files changed, 241 insertions(+)
>  create mode 100644 src/glsl/nir/nir_lower_phis_to_scalar.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 96c4ec5..02d0780 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -28,6 +28,7 @@ NIR_FILES = \
>         nir/nir_lower_global_vars_to_local.c \
>         nir/nir_lower_locals_to_regs.c \
>         nir/nir_lower_io.c \
> +       nir/nir_lower_phis_to_scalar.c \
>         nir/nir_lower_samplers.cpp \
>         nir/nir_lower_system_values.c \
>         nir/nir_lower_to_source_mods.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 119ca01..cda14aa 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1523,6 +1523,8 @@ void nir_remove_dead_variables(nir_shader *shader);
>  void nir_lower_vec_to_movs(nir_shader *shader);
>  void nir_lower_alu_to_scalar(nir_shader *shader);
>
> +void nir_lower_phis_to_scalar(nir_shader *shader);
> +
>  void nir_lower_samplers(nir_shader *shader,
>                          struct gl_shader_program *shader_program,
>                          struct gl_program *prog);
> diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
> new file mode 100644
> index 0000000..9f901d6
> --- /dev/null
> +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
> @@ -0,0 +1,238 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jason Ekstrand (jason at jlekstrand.net)
> + *
> + */
> +
> +#include "nir.h"
> +
> +/*
> + * Implements common subexpression elimination
> + */

Whoops...

> +
> +struct lower_phis_to_scalar_state {
> +   void *mem_ctx;
> +   void *dead_ctx;
> +
> +   /* Hash table marking which phi nodes are scalarizable.  The key is
> +    * pointers to phi instructions and the entry is either NULL for not
> +    * scalarizable or non-null for scalarizable.
> +    */
> +   struct hash_table *phi_table;
> +};
> +
> +/* Determines if the given phi node should be lowered.  The only phi nodes
> + * we will scalarize at the moment are those where all of the sources are
> + * scalarizable.
> + */
> +static bool
> +should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
> +{
> +   /* Already scalar */
> +   if (phi->dest.ssa.num_components == 1)
> +      return false;
> +
> +   struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
> +   if (entry)
> +      return entry->data != NULL;
> +
> +   nir_foreach_phi_src(phi, src) {
> +      /* Don't know what to do with non-ssa sources */
> +      if (!src->src.is_ssa)
> +         return false;
> +
> +      nir_instr *src_instr = src->src.ssa->parent_instr;
> +      switch (src_instr->type) {
> +      case nir_instr_type_alu: {
> +         nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
> +
> +         /* ALU operations with output_size == 0 should be scalarized.  We
> +          * will also see a bunch of vecN operations from scalarizing ALU
> +          * operations and, since they can easily be copy-propagated, they
> +          * are ok too.
> +          */
> +         return nir_op_infos[src_alu->op].output_size == 0 ||
> +                src_alu->op != nir_op_vec2 ||
> +                src_alu->op != nir_op_vec3 ||
> +                src_alu->op != nir_op_vec4;
> +      }
> +
> +      case nir_instr_type_phi: {
> +         nir_phi_instr *src_phi = nir_instr_as_phi(src_instr);
> +
> +         /* Insert an entry and mark it as scalarizable for now. That way
> +          * we don't recurse forever and a cycle in the depencence graph
> +          * won't automatically make us fail to scalarize.
> +          */
> +         entry = _mesa_hash_table_insert(state->phi_table, src_phi, (void *)1);
> +         bool scalarizable = should_lower_phi(src_phi, state);
> +         entry->data = (void *)scalarizable;
> +
> +         return scalarizable;
> +      }
> +
> +      default:
> +         /* We can't scalarize this type of instruction */
> +         return false;
> +      }
> +   }
> +
> +   return true;
> +}
> +
> +static bool
> +lower_phis_to_scalar_block(nir_block *block, void *void_state)
> +{
> +   struct lower_phis_to_scalar_state *state = void_state;
> +
> +   /* Find the last phi node in the block */
> +   nir_phi_instr *last_phi = NULL;
> +   nir_foreach_instr(block, instr) {
> +      if (instr->type != nir_instr_type_phi)
> +         break;
> +
> +      last_phi = nir_instr_as_phi(instr);
> +   }
> +
> +   /* We have to handle the phi nodes in their own pass due to the way
> +    * we're modifying the linked list of instructions.
> +    */
> +   nir_foreach_instr_safe(block, instr) {
> +      if (instr->type != nir_instr_type_phi)
> +         break;
> +
> +      nir_phi_instr *phi = nir_instr_as_phi(instr);
> +
> +      if (!should_lower_phi(phi, state))
> +         continue;
> +
> +      /* Create a vecN operation to combine the results.  Most of these
> +       * will be redundant, but copy propagation should clean them up for
> +       * us.  No need to add the complexity here.
> +       */
> +      nir_op vec_op;
> +      switch (phi->dest.ssa.num_components) {
> +      case 2: vec_op = nir_op_vec2; break;
> +      case 3: vec_op = nir_op_vec3; break;
> +      case 4: vec_op = nir_op_vec4; break;
> +      default: unreachable("Invalid number of components");
> +      }
> +
> +      nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
> +      vec->dest.dest.is_ssa = true;
> +      nir_ssa_def_init(&vec->instr, &vec->dest.dest.ssa,
> +                       phi->dest.ssa.num_components, NULL);
> +      vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
> +
> +      for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
> +         nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
> +         new_phi->dest.is_ssa = true;
> +         nir_ssa_def_init(&new_phi->instr, &new_phi->dest.ssa, 1, NULL);
> +
> +         vec->src[i].src.is_ssa = true;
> +         vec->src[i].src.ssa = &new_phi->dest.ssa;
> +
> +         nir_foreach_phi_src(phi, src) {
> +            /* We need to insert a mov to grab the i'th component of src */
> +            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
> +                                                      nir_op_imov);
> +            mov->dest.dest.is_ssa = true;
> +            nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, 1, NULL);
> +            mov->dest.write_mask = 1;
> +            mov->src[0].src = nir_src_copy(src->src, state->mem_ctx);
> +            mov->src[0].swizzle[0] = i;
> +
> +            /* Insert at the end of the predecessor but before the jump */
> +            nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
> +            if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
> +               nir_instr_insert_before(pred_last_instr, &mov->instr);
> +            else
> +               nir_instr_insert_after_block(src->pred, &mov->instr);
> +
> +            nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
> +            new_src->pred = src->pred;
> +            new_src->src.is_ssa = true;
> +            new_src->src.ssa = &mov->dest.dest.ssa;
> +
> +            exec_list_push_tail(&new_phi->srcs, &new_src->node);
> +         }
> +
> +         nir_instr_insert_before(&phi->instr, &new_phi->instr);
> +      }
> +
> +      nir_instr_insert_after(&last_phi->instr, &vec->instr);
> +
> +      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
> +                               nir_src_for_ssa(&vec->dest.dest.ssa),
> +                               state->mem_ctx);
> +
> +      ralloc_steal(state->dead_ctx, phi);
> +      nir_instr_remove(&phi->instr);
> +
> +      /* We're using the safe iterator and inserting all the newly
> +       * scalarized phi nodes before their non-scalarized version so that's
> +       * ok.  However, we are also inserting vec operations after all of

*all of the phi nodes (or "after the last phi node")

> +       * the last phi node so once we get here, we can't trust even the
> +       * safe iterator to stop properly.  We have to break manually.
> +       */

I think there's something I'm not getting here. If we're on the last
phi instruction, and we insert a vec instruction after it, isn't this
still ok because the safe iterator will just skip over the vec
instruction and go to the next instruction (or the end of the block)
and we'll just break out of the loop?

> +      if (instr == &last_phi->instr)
> +         break;
> +   }
> +
> +   return true;
> +}
> +
> +static void
> +lower_phis_to_scalar_impl(nir_function_impl *impl)
> +{
> +   struct lower_phis_to_scalar_state state;
> +
> +   state.mem_ctx = ralloc_parent(impl);
> +   state.dead_ctx = ralloc_context(NULL);
> +   state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
> +                                             _mesa_key_pointer_equal);
> +
> +   nir_foreach_block(impl, lower_phis_to_scalar_block, &state);
> +
> +   nir_metadata_preserve(impl, nir_metadata_block_index |
> +                               nir_metadata_dominance);
> +
> +   ralloc_free(state.dead_ctx);
> +}
> +
> +/** A pass that lowers vector phi nodes to scalar
> + *
> + * This pass loops through the blocks and lowers looks for vector phi nodes
> + * it can lower to scalar phi nodes.  Not all phi nodes are lowered.  For
> + * instance, if one of the sources is a non-scalarizable vector, then we
> + * don't bother lowering because that would generate hard-to-coalesce movs.
> + */

Just wondering... what's the advantage to not scalarizing all the phi
nodes? This is supposed to be used for scalar targets, so they
potentially might even want all the phi nodes to be scalar, so the
only non-scalar things are load and store instructions (we don't care
that much right now though). Woudn't splitting everything just save us
some of the work of splitting up mov instructions later on?

> +void
> +nir_lower_phis_to_scalar(nir_shader *shader)
> +{
> +   nir_foreach_overload(shader, overload) {
> +      if (overload->impl)
> +         lower_phis_to_scalar_impl(overload->impl);
> +   }
> +}
> --
> 2.2.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list