[Mesa-stable] [Mesa-dev] [PATCH] nir: Walk blocks in source code order in lower_vars_to_ssa.

Connor Abbott cwabbott0 at gmail.com
Thu Aug 25 06:25:30 UTC 2016


On Thu, Aug 25, 2016 at 12:04 AM, Matt Turner <mattst88 at gmail.com> wrote:
> Prior to this commit rename_variables_block() is recursively called,
> performing a depth-first traversal of the control flow graph. The
> function uses a non-trivial amount of stack space for local variables,
> which puts us in danger of smashing the stack, given a sufficiently deep
> dominance tree.
>
> XCOM: Enemy Within contains a shader with such a dominance tree (1574
> nir_blocks in total, depth of at least 143).
>
> Jason tells me that he believes that any walk over the nir_blocks that
> respects dominance is sufficient (a DFS might have been necessary prior
> to the introduction of nir_phi_builder).

nir_phi_builder.h has some example pseudocode, and there's a comment
before the part which corresponds to rename_variables() that says:

// Visit each block.  This needs to visit dominators first;
// nir_for_each_block() will be ok.

and you're right that the DFS was necessary before the switch to
nir_phi_builder, since we used a different algorithm which did stuff
both before and after recursing into the children of the block. So if
you want, you can make this paragraph sound a little more confident.

>
> In fact, the introduction of nir_phi_builder made the problem worse:
> rename_variables_block(), walks to the bottom of the dominance tree
> before calling nir_phi_builder_value_get_block_def() which walks back to
> the top of the dominance tree...
>
> In any case, this patch ensures we avoid that problem as well.
>
> Cc: mesa-stable at lists.freedesktop.org
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97225

Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>

> ---
> Most easily reviewed with git show -w
>
>  src/compiler/nir/nir_lower_vars_to_ssa.c | 207 +++++++++++++++----------------
>  1 file changed, 103 insertions(+), 104 deletions(-)
>
> diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
> index 317647b..e2e13e2 100644
> --- a/src/compiler/nir/nir_lower_vars_to_ssa.c
> +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
> @@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
>   * SSA def on the stack per block.
>   */
>  static bool
> -rename_variables_block(nir_block *block, struct lower_variables_state *state)
> +rename_variables(struct lower_variables_state *state)
>  {
>     nir_builder b;
>     nir_builder_init(&b, state->impl);
>
> -   nir_foreach_instr_safe(instr, block) {
> -      if (instr->type != nir_instr_type_intrinsic)
> -         continue;
> -
> -      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> -
> -      switch (intrin->intrinsic) {
> -      case nir_intrinsic_load_var: {
> -         struct deref_node *node =
> -            get_deref_node(intrin->variables[0], state);
> -
> -         if (node == NULL) {
> -            /* If we hit this path then we are referencing an invalid
> -             * value.  Most likely, we unrolled something and are
> -             * reading past the end of some array.  In any case, this
> -             * should result in an undefined value.
> -             */
> -            nir_ssa_undef_instr *undef =
> -               nir_ssa_undef_instr_create(state->shader,
> -                                          intrin->num_components,
> -                                          intrin->dest.ssa.bit_size);
> -
> -            nir_instr_insert_before(&intrin->instr, &undef->instr);
> -            nir_instr_remove(&intrin->instr);
> -
> -            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> -                                     nir_src_for_ssa(&undef->def));
> +   nir_foreach_block(block, state->impl) {
> +      nir_foreach_instr_safe(instr, block) {
> +         if (instr->type != nir_instr_type_intrinsic)
>              continue;
> -         }
>
> -         if (!node->lower_to_ssa)
> -            continue;
> +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +
> +         switch (intrin->intrinsic) {
> +         case nir_intrinsic_load_var: {
> +            struct deref_node *node =
> +               get_deref_node(intrin->variables[0], state);
> +
> +            if (node == NULL) {
> +               /* If we hit this path then we are referencing an invalid
> +                * value.  Most likely, we unrolled something and are
> +                * reading past the end of some array.  In any case, this
> +                * should result in an undefined value.
> +                */
> +               nir_ssa_undef_instr *undef =
> +                  nir_ssa_undef_instr_create(state->shader,
> +                                             intrin->num_components,
> +                                             intrin->dest.ssa.bit_size);
> +
> +               nir_instr_insert_before(&intrin->instr, &undef->instr);
> +               nir_instr_remove(&intrin->instr);
> +
> +               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> +                                        nir_src_for_ssa(&undef->def));
> +               continue;
> +            }
>
> -         nir_alu_instr *mov = nir_alu_instr_create(state->shader,
> -                                                   nir_op_imov);
> -         mov->src[0].src = nir_src_for_ssa(
> -            nir_phi_builder_value_get_block_def(node->pb_value, block));
> -         for (unsigned i = intrin->num_components; i < 4; i++)
> -            mov->src[0].swizzle[i] = 0;
> +            if (!node->lower_to_ssa)
> +               continue;
>
> -         assert(intrin->dest.is_ssa);
> +            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
> +                                                      nir_op_imov);
> +            mov->src[0].src = nir_src_for_ssa(
> +               nir_phi_builder_value_get_block_def(node->pb_value, block));
> +            for (unsigned i = intrin->num_components; i < 4; i++)
> +               mov->src[0].swizzle[i] = 0;
>
> -         mov->dest.write_mask = (1 << intrin->num_components) - 1;
> -         nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
> -                           intrin->num_components,
> -                           intrin->dest.ssa.bit_size, NULL);
> +            assert(intrin->dest.is_ssa);
>
> -         nir_instr_insert_before(&intrin->instr, &mov->instr);
> -         nir_instr_remove(&intrin->instr);
> +            mov->dest.write_mask = (1 << intrin->num_components) - 1;
> +            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
> +                              intrin->num_components,
> +                              intrin->dest.ssa.bit_size, NULL);
>
> -         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> -                                  nir_src_for_ssa(&mov->dest.dest.ssa));
> -         break;
> -      }
> -
> -      case nir_intrinsic_store_var: {
> -         struct deref_node *node =
> -            get_deref_node(intrin->variables[0], state);
> -
> -         if (node == NULL) {
> -            /* Probably an out-of-bounds array store.  That should be a
> -             * no-op. */
> +            nir_instr_insert_before(&intrin->instr, &mov->instr);
>              nir_instr_remove(&intrin->instr);
> -            continue;
> -         }
>
> -         if (!node->lower_to_ssa)
> -            continue;
> -
> -         assert(intrin->num_components ==
> -                glsl_get_vector_elements(node->type));
> -
> -         assert(intrin->src[0].is_ssa);
> +            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> +                                     nir_src_for_ssa(&mov->dest.dest.ssa));
> +            break;
> +         }
>
> -         nir_ssa_def *new_def;
> -         b.cursor = nir_before_instr(&intrin->instr);
> +         case nir_intrinsic_store_var: {
> +            struct deref_node *node =
> +               get_deref_node(intrin->variables[0], state);
>
> -         unsigned wrmask = nir_intrinsic_write_mask(intrin);
> -         if (wrmask == (1 << intrin->num_components) - 1) {
> -            /* Whole variable store - just copy the source.  Note that
> -             * intrin->num_components and intrin->src[0].ssa->num_components
> -             * may differ.
> -             */
> -            unsigned swiz[4];
> -            for (unsigned i = 0; i < 4; i++)
> -               swiz[i] = i < intrin->num_components ? i : 0;
> +            if (node == NULL) {
> +               /* Probably an out-of-bounds array store.  That should be a
> +                * no-op. */
> +               nir_instr_remove(&intrin->instr);
> +               continue;
> +            }
>
> -            new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
> -                                  intrin->num_components, false);
> -         } else {
> -            nir_ssa_def *old_def =
> -               nir_phi_builder_value_get_block_def(node->pb_value, block);
> -            /* For writemasked store_var intrinsics, we combine the newly
> -             * written values with the existing contents of unwritten
> -             * channels, creating a new SSA value for the whole vector.
> -             */
> -            nir_ssa_def *srcs[4];
> -            for (unsigned i = 0; i < intrin->num_components; i++) {
> -               if (wrmask & (1 << i)) {
> -                  srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
> -               } else {
> -                  srcs[i] = nir_channel(&b, old_def, i);
> +            if (!node->lower_to_ssa)
> +               continue;
> +
> +            assert(intrin->num_components ==
> +                   glsl_get_vector_elements(node->type));
> +
> +            assert(intrin->src[0].is_ssa);
> +
> +            nir_ssa_def *new_def;
> +            b.cursor = nir_before_instr(&intrin->instr);
> +
> +            unsigned wrmask = nir_intrinsic_write_mask(intrin);
> +            if (wrmask == (1 << intrin->num_components) - 1) {
> +               /* Whole variable store - just copy the source.  Note that
> +                * intrin->num_components and intrin->src[0].ssa->num_components
> +                * may differ.
> +                */
> +               unsigned swiz[4];
> +               for (unsigned i = 0; i < 4; i++)
> +                  swiz[i] = i < intrin->num_components ? i : 0;
> +
> +               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
> +                                     intrin->num_components, false);
> +            } else {
> +               nir_ssa_def *old_def =
> +                  nir_phi_builder_value_get_block_def(node->pb_value, block);
> +               /* For writemasked store_var intrinsics, we combine the newly
> +                * written values with the existing contents of unwritten
> +                * channels, creating a new SSA value for the whole vector.
> +                */
> +               nir_ssa_def *srcs[4];
> +               for (unsigned i = 0; i < intrin->num_components; i++) {
> +                  if (wrmask & (1 << i)) {
> +                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
> +                  } else {
> +                     srcs[i] = nir_channel(&b, old_def, i);
> +                  }
>                 }
> +               new_def = nir_vec(&b, srcs, intrin->num_components);
>              }
> -            new_def = nir_vec(&b, srcs, intrin->num_components);
> -         }
>
> -         assert(new_def->num_components == intrin->num_components);
> +            assert(new_def->num_components == intrin->num_components);
>
> -         nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
> -         nir_instr_remove(&intrin->instr);
> -         break;
> -      }
> +            nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
> +            nir_instr_remove(&intrin->instr);
> +            break;
> +         }
>
> -      default:
> -         break;
> +         default:
> +            break;
> +         }
>        }
>     }
>
> -   for (unsigned i = 0; i < block->num_dom_children; ++i)
> -      rename_variables_block(block->dom_children[i], state);
> -
>     return true;
>  }
>
> @@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
>        }
>     }
>
> -   rename_variables_block(nir_start_block(impl), &state);
> +   rename_variables(&state);
>
>     nir_phi_builder_finish(state.phi_builder);
>
> --
> 2.7.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-stable mailing list