[Mesa-dev] [PATCH 139/133] nir/from_ssa: Clean up parallel copy handling and document it better
Connor Abbott
cwabbott0 at gmail.com
Mon Jan 5 23:19:59 PST 2015
On Wed, Dec 17, 2014 at 8:04 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> Previously, we were doing a lazy creation of the parallel copy
> instructions. This is confusing, hard to get right, and involves some
> extra state tracking of the copies. This commit adds an extra walk over
> the basic blocks to add the block-end parallel copies up front. This
> should be much less confusing and, consequently, easier to get right. This
> commit also adds more comments about parallel copies to help explain what
> all is going on.
>
> As a consequence of these changes, we can now remove the at_end parameter
> from nir_parallel_copy_instr.
> ---
> src/glsl/nir/nir.c | 1 -
> src/glsl/nir/nir.h | 7 ---
> src/glsl/nir/nir_from_ssa.c | 150 +++++++++++++++++++++++++++-----------------
> 3 files changed, 92 insertions(+), 66 deletions(-)
>
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index d0cab5a..17c1efe 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -471,7 +471,6 @@ nir_parallel_copy_instr_create(void *mem_ctx)
> nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
> instr_init(&instr->instr, nir_instr_type_parallel_copy);
>
> - instr->at_end = false;
> exec_list_make_empty(&instr->copies);
>
> return instr;
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index dbfa461..a23bc5f 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -967,13 +967,6 @@ typedef struct {
>
> typedef struct {
> nir_instr instr;
> -
> - /* Indicates that this is the parallel copy at the end of the block.
> - * When isolating phi nodes, we create 2 parallel copies in most blocks;
> - * this flag helps tell them apart.
> - */
> - bool at_end;
> -
> struct exec_list copies;
> } nir_parallel_copy_instr;
>
> diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
> index 1d17ac4..29fcc64 100644
> --- a/src/glsl/nir/nir_from_ssa.c
> +++ b/src/glsl/nir/nir_from_ssa.c
> @@ -229,48 +229,90 @@ merge_sets_interfere(merge_set *a, merge_set *b)
> return false;
> }
>
> -static nir_parallel_copy_instr *
> -block_get_parallel_copy_at_end(nir_block *block, void *mem_ctx)
> +static bool
> +add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
> {
> - nir_instr *last_instr = nir_block_last_instr(block);
> + struct from_ssa_state *state = void_state;
>
> - /* First we try and find a parallel copy if it already exists. If the
> - * last instruction is a jump, it will be right before the jump;
> - * otherwise, it will be the last instruction.
> - */
> - nir_instr *pcopy_instr;
> - if (last_instr != NULL && last_instr->type == nir_instr_type_jump)
> - pcopy_instr = nir_instr_prev(last_instr);
> - else
> - pcopy_instr = last_instr;
> + bool need_end_copy = false;
> + if (block->successors[0]) {
> + nir_instr *instr = nir_block_first_instr(block->successors[0]);
> + if (instr && instr->type == nir_instr_type_phi)
> + need_end_copy = true;
> + }
>
> - if (pcopy_instr != NULL &&
> - pcopy_instr->type == nir_instr_type_parallel_copy) {
> - /* A parallel copy already exists. */
> - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(pcopy_instr);
> + if (block->successors[1]) {
> + nir_instr *instr = nir_block_first_instr(block->successors[1]);
> + if (instr && instr->type == nir_instr_type_phi)
> + need_end_copy = true;
> + }
>
> - /* This parallel copy may be the copy for the beginning of some
> - * block, so we need to check for that before we return it.
> + if (need_end_copy) {
> + /* If one of our successors has at least one phi node, we need to
> + * create a parallel copy at the end of the block but before the jump
> + * (if there is one).
> */
> - if (pcopy->at_end)
> - return pcopy;
> + nir_parallel_copy_instr *pcopy =
> + nir_parallel_copy_instr_create(state->dead_ctx);
> +
> + nir_instr *last_instr = nir_block_last_instr(block);
> + if (last_instr && last_instr->type == nir_instr_type_jump) {
> + nir_instr_insert_before(last_instr, &pcopy->instr);
> + } else {
> + nir_instr_insert_after_block(block, &pcopy->instr);
> + }
> }
>
> - /* At this point, we haven't found a suitable parallel copy, so we
> - * have to create one.
> - */
> - nir_parallel_copy_instr *pcopy = nir_parallel_copy_instr_create(mem_ctx);
> - pcopy->at_end = true;
> + return true;
> +}
>
> - if (last_instr && last_instr->type == nir_instr_type_jump) {
> - nir_instr_insert_before(last_instr, &pcopy->instr);
> - } else {
> - nir_instr_insert_after_block(block, &pcopy->instr);
> - }
> +static nir_parallel_copy_instr *
> +get_parallel_copy_at_end_of_block(nir_block *block)
> +{
> + nir_instr *last_instr = nir_block_last_instr(block);
> + if (last_instr == NULL)
> + return NULL;
>
> - return pcopy;
> + /* The last instruction may be a jump in which case the parallel copy is
> + * right before it.
> + */
> + if (last_instr->type == nir_instr_type_jump)
> + last_instr = nir_instr_prev(last_instr);
> +
> + if (last_instr->type == nir_instr_type_parallel_copy)
> + return nir_instr_as_parallel_copy(last_instr);
> + else
> + return NULL;
> }
>
> +/** Isolate phi nodes with parallel copies
> + *
> + * In order to solve the dependency problems with the sources and
> + * destinations of phi nodes, we first isolate them by adding parallel
> + * copies to the beginnings and ends of basic blocks. For every block with
> + * phi nodes, we add a parallel copy immediately following the last phi
> + * node that copies the destinations of all of the phi nodes to new SSA
> + * values. We also add a parallel copy to the end of every block that has
> + * a successor with phi nodes that, for each phi node in each successor,
> + * copies the corresponding sorce of the phi node and adjust the phi to
> + * used the destination of the parallel copy.
> + *
> + * In SSA form, each value has exactly one definition. What this does is
> + * ensure that each value used in a phi also has exactly one use. The
> + * destinations of phis are only used by the parallel copy immediately
> + * following the phi nodes and. Thanks to the parallel copy at the end of
> + * the predecessor block, the sources of phi nodes are are the only use of
> + * that value. This allows us to immediately assign all the sources and
> + * destinations of any given phi node to the same register without worrying
> + * about interference at all. We do coalescing to get rid of the parallel
> + * copies where possible.
> + *
> + * Before this pass can be run, we have to iterate over the blocks with
> + * add_parallel_cop_to_end_of_block to ensure that the parallel copies at
add_parallel_copy_to_end_of_block
> + * the ends of blocks exist. We can create the ones at the beginnings as
> + * we go, but the ones at the ends of blocks need to be created ahead of
> + * time because of potential back-edges in the CFG.
> + */
> static bool
> isolate_phi_nodes_block(nir_block *block, void *void_state)
> {
> @@ -305,7 +347,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
> assert(phi->dest.is_ssa);
> foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
> nir_parallel_copy_instr *pcopy =
> - block_get_parallel_copy_at_end(src->pred, state->dead_ctx);
> + get_parallel_copy_at_end_of_block(src->pred);
> + assert(pcopy);
>
> nir_parallel_copy_copy *copy = ralloc(state->dead_ctx,
> nir_parallel_copy_copy);
> @@ -420,29 +463,26 @@ agressive_coalesce_block(nir_block *block, void *void_state)
> {
> struct from_ssa_state *state = void_state;
>
> + nir_parallel_copy_instr *start_pcopy = NULL;
> nir_foreach_instr(block, instr) {
> /* Phi nodes only ever come at the start of a block */
> if (instr->type != nir_instr_type_phi) {
> if (instr->type != nir_instr_type_parallel_copy)
> break; /* The parallel copy must be right after the phis */
>
> - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr);
> + start_pcopy = nir_instr_as_parallel_copy(instr);
>
> - agressive_coalesce_parallel_copy(pcopy, state);
> -
> - if (pcopy->at_end)
> - return true;
> + agressive_coalesce_parallel_copy(start_pcopy, state);
>
> break;
> }
> }
>
> - nir_instr *last_instr = nir_block_last_instr(block);
> - if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
> - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(last_instr);
> - if (pcopy->at_end)
> - agressive_coalesce_parallel_copy(pcopy, state);
> - }
> + nir_parallel_copy_instr *end_pcopy =
> + get_parallel_copy_at_end_of_block(block);
> +
> + if (end_pcopy && end_pcopy != start_pcopy)
> + agressive_coalesce_parallel_copy(end_pcopy, state);
>
> return true;
> }
> @@ -642,7 +682,6 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
> if (!copy->src.is_ssa && copy->src.reg.reg == copy->dest.reg.reg)
> continue;
>
> - /* Set both indices equal to UINT_MAX to mark them as not indexed yet. */
> num_copies++;
> }
>
> @@ -806,21 +845,15 @@ resolve_parallel_copies_block(nir_block *block, void *void_state)
> resolve_parallel_copy(pcopy, state);
> }
>
> - nir_instr *last_instr = nir_block_last_instr(block);
> - if (last_instr == NULL)
> - return true; /* Now empty, nothing to do. */
> -
> - /* If the last instruction is a jump, the parallel copy will be before
> - * the jump.
> + /* It's possible that the above code already cleaned up the end parallel
> + * copy. However, doing so removed it form the instructions list so we
> + * won't find it here. Therefore, it's safe to go ahead and just look
> + * for one and clean it up if it exists.
> */
> - if (last_instr->type == nir_instr_type_jump)
> - last_instr = nir_instr_prev(last_instr);
> -
> - if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
> - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(last_instr);
> - if (pcopy->at_end)
> - resolve_parallel_copy(pcopy, state);
> - }
> + nir_parallel_copy_instr *end_pcopy =
> + get_parallel_copy_at_end_of_block(block);
> + if (end_pcopy)
> + resolve_parallel_copy(end_pcopy, state);
>
> return true;
> }
> @@ -836,6 +869,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl)
> state.merge_node_table = _mesa_hash_table_create(NULL,
> _mesa_key_pointer_equal);
>
> + nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
> nir_foreach_block(impl, isolate_phi_nodes_block, &state);
>
> /* Mark metadata as dirty before we ask for liveness analysis */
> --
> 2.2.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list