[Mesa-dev] [PATCH] nir: Walk blocks in source code order in lower_vars_to_ssa.
Connor Abbott
cwabbott0 at gmail.com
Thu Aug 25 06:25:30 UTC 2016
On Thu, Aug 25, 2016 at 12:04 AM, Matt Turner <mattst88 at gmail.com> wrote:
> Prior to this commit rename_variables_block() is recursively called,
> performing a depth-first traversal of the control flow graph. The
> function uses a non-trivial amount of stack space for local variables,
> which puts us in danger of smashing the stack, given a sufficiently deep
> dominance tree.
>
> XCOM: Enemy Within contains a shader with such a dominance tree (1574
> nir_blocks in total, depth of at least 143).
>
> Jason tells me that he believes that any walk over the nir_blocks that
> respects dominance is sufficient (a DFS might have been necessary prior
> to the introduction of nir_phi_builder).
nir_phi_builder.h has some example pseudocode, and there's a comment
before the part which corresponds to rename_variables() that says:
// Visit each block. This needs to visit dominators first;
// nir_for_each_block() will be ok.
and you're right that the DFS was necessary before the switch to
nir_phi_builder, since we used a different algorithm which did stuff
both before and after recursing into the children of the block. So if
you want, you can make this paragraph sound a little more confident.
>
> In fact, the introduction of nir_phi_builder made the problem worse:
> rename_variables_block(), walks to the bottom of the dominance tree
> before calling nir_phi_builder_value_get_block_def() which walks back to
> the top of the dominance tree...
>
> In any case, this patch ensures we avoid that problem as well.
>
> Cc: mesa-stable at lists.freedesktop.org
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97225
Reviewed-by: Connor Abbott <cwabbott0 at gmail.com>
> ---
> Most easily reviewed with git show -w
>
> src/compiler/nir/nir_lower_vars_to_ssa.c | 207 +++++++++++++++----------------
> 1 file changed, 103 insertions(+), 104 deletions(-)
>
> diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
> index 317647b..e2e13e2 100644
> --- a/src/compiler/nir/nir_lower_vars_to_ssa.c
> +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
> @@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
> * SSA def on the stack per block.
> */
> static bool
> -rename_variables_block(nir_block *block, struct lower_variables_state *state)
> +rename_variables(struct lower_variables_state *state)
> {
> nir_builder b;
> nir_builder_init(&b, state->impl);
>
> - nir_foreach_instr_safe(instr, block) {
> - if (instr->type != nir_instr_type_intrinsic)
> - continue;
> -
> - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> -
> - switch (intrin->intrinsic) {
> - case nir_intrinsic_load_var: {
> - struct deref_node *node =
> - get_deref_node(intrin->variables[0], state);
> -
> - if (node == NULL) {
> - /* If we hit this path then we are referencing an invalid
> - * value. Most likely, we unrolled something and are
> - * reading past the end of some array. In any case, this
> - * should result in an undefined value.
> - */
> - nir_ssa_undef_instr *undef =
> - nir_ssa_undef_instr_create(state->shader,
> - intrin->num_components,
> - intrin->dest.ssa.bit_size);
> -
> - nir_instr_insert_before(&intrin->instr, &undef->instr);
> - nir_instr_remove(&intrin->instr);
> -
> - nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> - nir_src_for_ssa(&undef->def));
> + nir_foreach_block(block, state->impl) {
> + nir_foreach_instr_safe(instr, block) {
> + if (instr->type != nir_instr_type_intrinsic)
> continue;
> - }
>
> - if (!node->lower_to_ssa)
> - continue;
> + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +
> + switch (intrin->intrinsic) {
> + case nir_intrinsic_load_var: {
> + struct deref_node *node =
> + get_deref_node(intrin->variables[0], state);
> +
> + if (node == NULL) {
> + /* If we hit this path then we are referencing an invalid
> + * value. Most likely, we unrolled something and are
> + * reading past the end of some array. In any case, this
> + * should result in an undefined value.
> + */
> + nir_ssa_undef_instr *undef =
> + nir_ssa_undef_instr_create(state->shader,
> + intrin->num_components,
> + intrin->dest.ssa.bit_size);
> +
> + nir_instr_insert_before(&intrin->instr, &undef->instr);
> + nir_instr_remove(&intrin->instr);
> +
> + nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> + nir_src_for_ssa(&undef->def));
> + continue;
> + }
>
> - nir_alu_instr *mov = nir_alu_instr_create(state->shader,
> - nir_op_imov);
> - mov->src[0].src = nir_src_for_ssa(
> - nir_phi_builder_value_get_block_def(node->pb_value, block));
> - for (unsigned i = intrin->num_components; i < 4; i++)
> - mov->src[0].swizzle[i] = 0;
> + if (!node->lower_to_ssa)
> + continue;
>
> - assert(intrin->dest.is_ssa);
> + nir_alu_instr *mov = nir_alu_instr_create(state->shader,
> + nir_op_imov);
> + mov->src[0].src = nir_src_for_ssa(
> + nir_phi_builder_value_get_block_def(node->pb_value, block));
> + for (unsigned i = intrin->num_components; i < 4; i++)
> + mov->src[0].swizzle[i] = 0;
>
> - mov->dest.write_mask = (1 << intrin->num_components) - 1;
> - nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
> - intrin->num_components,
> - intrin->dest.ssa.bit_size, NULL);
> + assert(intrin->dest.is_ssa);
>
> - nir_instr_insert_before(&intrin->instr, &mov->instr);
> - nir_instr_remove(&intrin->instr);
> + mov->dest.write_mask = (1 << intrin->num_components) - 1;
> + nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
> + intrin->num_components,
> + intrin->dest.ssa.bit_size, NULL);
>
> - nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> - nir_src_for_ssa(&mov->dest.dest.ssa));
> - break;
> - }
> -
> - case nir_intrinsic_store_var: {
> - struct deref_node *node =
> - get_deref_node(intrin->variables[0], state);
> -
> - if (node == NULL) {
> - /* Probably an out-of-bounds array store. That should be a
> - * no-op. */
> + nir_instr_insert_before(&intrin->instr, &mov->instr);
> nir_instr_remove(&intrin->instr);
> - continue;
> - }
>
> - if (!node->lower_to_ssa)
> - continue;
> -
> - assert(intrin->num_components ==
> - glsl_get_vector_elements(node->type));
> -
> - assert(intrin->src[0].is_ssa);
> + nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> + nir_src_for_ssa(&mov->dest.dest.ssa));
> + break;
> + }
>
> - nir_ssa_def *new_def;
> - b.cursor = nir_before_instr(&intrin->instr);
> + case nir_intrinsic_store_var: {
> + struct deref_node *node =
> + get_deref_node(intrin->variables[0], state);
>
> - unsigned wrmask = nir_intrinsic_write_mask(intrin);
> - if (wrmask == (1 << intrin->num_components) - 1) {
> - /* Whole variable store - just copy the source. Note that
> - * intrin->num_components and intrin->src[0].ssa->num_components
> - * may differ.
> - */
> - unsigned swiz[4];
> - for (unsigned i = 0; i < 4; i++)
> - swiz[i] = i < intrin->num_components ? i : 0;
> + if (node == NULL) {
> + /* Probably an out-of-bounds array store. That should be a
> + * no-op. */
> + nir_instr_remove(&intrin->instr);
> + continue;
> + }
>
> - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
> - intrin->num_components, false);
> - } else {
> - nir_ssa_def *old_def =
> - nir_phi_builder_value_get_block_def(node->pb_value, block);
> - /* For writemasked store_var intrinsics, we combine the newly
> - * written values with the existing contents of unwritten
> - * channels, creating a new SSA value for the whole vector.
> - */
> - nir_ssa_def *srcs[4];
> - for (unsigned i = 0; i < intrin->num_components; i++) {
> - if (wrmask & (1 << i)) {
> - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
> - } else {
> - srcs[i] = nir_channel(&b, old_def, i);
> + if (!node->lower_to_ssa)
> + continue;
> +
> + assert(intrin->num_components ==
> + glsl_get_vector_elements(node->type));
> +
> + assert(intrin->src[0].is_ssa);
> +
> + nir_ssa_def *new_def;
> + b.cursor = nir_before_instr(&intrin->instr);
> +
> + unsigned wrmask = nir_intrinsic_write_mask(intrin);
> + if (wrmask == (1 << intrin->num_components) - 1) {
> + /* Whole variable store - just copy the source. Note that
> + * intrin->num_components and intrin->src[0].ssa->num_components
> + * may differ.
> + */
> + unsigned swiz[4];
> + for (unsigned i = 0; i < 4; i++)
> + swiz[i] = i < intrin->num_components ? i : 0;
> +
> + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
> + intrin->num_components, false);
> + } else {
> + nir_ssa_def *old_def =
> + nir_phi_builder_value_get_block_def(node->pb_value, block);
> + /* For writemasked store_var intrinsics, we combine the newly
> + * written values with the existing contents of unwritten
> + * channels, creating a new SSA value for the whole vector.
> + */
> + nir_ssa_def *srcs[4];
> + for (unsigned i = 0; i < intrin->num_components; i++) {
> + if (wrmask & (1 << i)) {
> + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
> + } else {
> + srcs[i] = nir_channel(&b, old_def, i);
> + }
> }
> + new_def = nir_vec(&b, srcs, intrin->num_components);
> }
> - new_def = nir_vec(&b, srcs, intrin->num_components);
> - }
>
> - assert(new_def->num_components == intrin->num_components);
> + assert(new_def->num_components == intrin->num_components);
>
> - nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
> - nir_instr_remove(&intrin->instr);
> - break;
> - }
> + nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
> + nir_instr_remove(&intrin->instr);
> + break;
> + }
>
> - default:
> - break;
> + default:
> + break;
> + }
> }
> }
>
> - for (unsigned i = 0; i < block->num_dom_children; ++i)
> - rename_variables_block(block->dom_children[i], state);
> -
> return true;
> }
>
> @@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
> }
> }
>
> - rename_variables_block(nir_start_block(impl), &state);
> + rename_variables(&state);
>
> nir_phi_builder_finish(state.phi_builder);
>
> --
> 2.7.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list