[Mesa-dev] [PATCH 139/133] nir/from_ssa: Clean up parallel copy handling and document it better

Jason Ekstrand jason at jlekstrand.net
Wed Dec 17 17:04:32 PST 2014


Previously, we were doing a lazy creation of the parallel copy
instructions.  This is confusing, hard to get right, and involves some
extra state tracking of the copies.  This commit adds an extra walk over
the basic blocks to add the block-end parallel copies up front.  This
should be much less confusing and, consequently, easier to get right.  This
commit also adds more comments about parallel copies to help explain what
all is going on.

As a consequence of these changes, we can now remove the at_end parameter
from nir_parallel_copy_instr.
---
 src/glsl/nir/nir.c          |   1 -
 src/glsl/nir/nir.h          |   7 ---
 src/glsl/nir/nir_from_ssa.c | 150 +++++++++++++++++++++++++++-----------------
 3 files changed, 92 insertions(+), 66 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index d0cab5a..17c1efe 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -471,7 +471,6 @@ nir_parallel_copy_instr_create(void *mem_ctx)
    nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
    instr_init(&instr->instr, nir_instr_type_parallel_copy);
 
-   instr->at_end = false;
    exec_list_make_empty(&instr->copies);
 
    return instr;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index dbfa461..a23bc5f 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -967,13 +967,6 @@ typedef struct {
 
 typedef struct {
    nir_instr instr;
-
-   /* Indicates that this is the parallel copy at the end of the block.
-    * When isolating phi nodes, we create 2 parallel copies in most blocks;
-    * this flag helps tell them apart.
-    */
-   bool at_end;
-
    struct exec_list copies;
 } nir_parallel_copy_instr;
 
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 1d17ac4..29fcc64 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -229,48 +229,90 @@ merge_sets_interfere(merge_set *a, merge_set *b)
    return false;
 }
 
-static nir_parallel_copy_instr *
-block_get_parallel_copy_at_end(nir_block *block, void *mem_ctx)
+static bool
+add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
 {
-   nir_instr *last_instr = nir_block_last_instr(block);
+   struct from_ssa_state *state = void_state;
 
-   /* First we try and find a parallel copy if it already exists.  If the
-    * last instruction is a jump, it will be right before the jump;
-    * otherwise, it will be the last instruction.
-    */
-   nir_instr *pcopy_instr;
-   if (last_instr != NULL && last_instr->type == nir_instr_type_jump)
-      pcopy_instr = nir_instr_prev(last_instr);
-   else
-      pcopy_instr = last_instr;
+   bool need_end_copy = false;
+   if (block->successors[0]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[0]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
 
-   if (pcopy_instr != NULL &&
-       pcopy_instr->type == nir_instr_type_parallel_copy) {
-      /* A parallel copy already exists. */
-      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(pcopy_instr);
+   if (block->successors[1]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[1]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
 
-      /* This parallel copy may be the copy for the beginning of some
-       * block, so we need to check for that before we return it.
+   if (need_end_copy) {
+      /* If one of our successors has at least one phi node, we need to
+       * create a parallel copy at the end of the block but before the jump
+       * (if there is one).
        */
-      if (pcopy->at_end)
-         return pcopy;
+      nir_parallel_copy_instr *pcopy =
+         nir_parallel_copy_instr_create(state->dead_ctx);
+
+      nir_instr *last_instr = nir_block_last_instr(block);
+      if (last_instr && last_instr->type == nir_instr_type_jump) {
+         nir_instr_insert_before(last_instr, &pcopy->instr);
+      } else {
+         nir_instr_insert_after_block(block, &pcopy->instr);
+      }
    }
 
-   /* At this point, we haven't found a suitable parallel copy, so we
-    * have to create one.
-    */
-   nir_parallel_copy_instr *pcopy = nir_parallel_copy_instr_create(mem_ctx);
-   pcopy->at_end = true;
+   return true;
+}
 
-   if (last_instr && last_instr->type == nir_instr_type_jump) {
-      nir_instr_insert_before(last_instr, &pcopy->instr);
-   } else {
-      nir_instr_insert_after_block(block, &pcopy->instr);
-   }
+static nir_parallel_copy_instr *
+get_parallel_copy_at_end_of_block(nir_block *block)
+{
+   nir_instr *last_instr = nir_block_last_instr(block);
+   if (last_instr == NULL)
+      return NULL;
 
-   return pcopy;
+   /* The last instruction may be a jump in which case the parallel copy is
+    * right before it.
+    */
+   if (last_instr->type == nir_instr_type_jump)
+      last_instr = nir_instr_prev(last_instr);
+
+   if (last_instr->type == nir_instr_type_parallel_copy)
+      return nir_instr_as_parallel_copy(last_instr);
+   else
+      return NULL;
 }
 
+/** Isolate phi nodes with parallel copies
+ *
+ * In order to solve the dependency problems with the sources and
+ * destinations of phi nodes, we first isolate them by adding parallel
+ * copies to the beginnings and ends of basic blocks.  For every block with
+ * phi nodes, we add a parallel copy immediately following the last phi
+ * node that copies the destinations of all of the phi nodes to new SSA
+ * values.  We also add a parallel copy to the end of every block that has
+ * a successor with phi nodes that, for each phi node in each successor,
+ * copies the corresponding sorce of the phi node and adjust the phi to
+ * used the destination of the parallel copy.
+ *
+ * In SSA form, each value has exactly one definition.  What this does is
+ * ensure that each value used in a phi also has exactly one use.  The
+ * destinations of phis are only used by the parallel copy immediately
+ * following the phi nodes and.  Thanks to the parallel copy at the end of
+ * the predecessor block, the sources of phi nodes are are the only use of
+ * that value.  This allows us to immediately assign all the sources and
+ * destinations of any given phi node to the same register without worrying
+ * about interference at all.  We do coalescing to get rid of the parallel
+ * copies where possible.
+ *
+ * Before this pass can be run, we have to iterate over the blocks with
+ * add_parallel_cop_to_end_of_block to ensure that the parallel copies at
+ * the ends of blocks exist.  We can create the ones at the beginnings as
+ * we go, but the ones at the ends of blocks need to be created ahead of
+ * time because of potential back-edges in the CFG.
+ */
 static bool
 isolate_phi_nodes_block(nir_block *block, void *void_state)
 {
@@ -305,7 +347,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
       assert(phi->dest.is_ssa);
       foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
          nir_parallel_copy_instr *pcopy =
-            block_get_parallel_copy_at_end(src->pred, state->dead_ctx);
+            get_parallel_copy_at_end_of_block(src->pred);
+         assert(pcopy);
 
          nir_parallel_copy_copy *copy = ralloc(state->dead_ctx,
                                                nir_parallel_copy_copy);
@@ -420,29 +463,26 @@ agressive_coalesce_block(nir_block *block, void *void_state)
 {
    struct from_ssa_state *state = void_state;
 
+   nir_parallel_copy_instr *start_pcopy = NULL;
    nir_foreach_instr(block, instr) {
       /* Phi nodes only ever come at the start of a block */
       if (instr->type != nir_instr_type_phi) {
          if (instr->type != nir_instr_type_parallel_copy)
             break; /* The parallel copy must be right after the phis */
 
-         nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr);
+         start_pcopy = nir_instr_as_parallel_copy(instr);
 
-         agressive_coalesce_parallel_copy(pcopy, state);
-
-         if (pcopy->at_end)
-            return true;
+         agressive_coalesce_parallel_copy(start_pcopy, state);
 
          break;
       }
    }
 
-   nir_instr *last_instr = nir_block_last_instr(block);
-   if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
-      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(last_instr);
-      if (pcopy->at_end)
-         agressive_coalesce_parallel_copy(pcopy, state);
-   }
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+
+   if (end_pcopy && end_pcopy != start_pcopy)
+      agressive_coalesce_parallel_copy(end_pcopy, state);
 
    return true;
 }
@@ -642,7 +682,6 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
       if (!copy->src.is_ssa && copy->src.reg.reg == copy->dest.reg.reg)
          continue;
 
-      /* Set both indices equal to UINT_MAX to mark them as not indexed yet. */
       num_copies++;
    }
 
@@ -806,21 +845,15 @@ resolve_parallel_copies_block(nir_block *block, void *void_state)
       resolve_parallel_copy(pcopy, state);
    }
 
-   nir_instr *last_instr = nir_block_last_instr(block);
-   if (last_instr == NULL)
-      return true; /* Now empty, nothing to do. */
-
-   /* If the last instruction is a jump, the parallel copy will be before
-    * the jump.
+   /* It's possible that the above code already cleaned up the end parallel
+    * copy.  However, doing so removed it form the instructions list so we
+    * won't find it here.  Therefore, it's safe to go ahead and just look
+    * for one and clean it up if it exists.
     */
-   if (last_instr->type == nir_instr_type_jump)
-      last_instr = nir_instr_prev(last_instr);
-
-   if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
-      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(last_instr);
-      if (pcopy->at_end)
-         resolve_parallel_copy(pcopy, state);
-   }
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+   if (end_pcopy)
+      resolve_parallel_copy(end_pcopy, state);
 
    return true;
 }
@@ -836,6 +869,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl)
    state.merge_node_table = _mesa_hash_table_create(NULL,
                                                     _mesa_key_pointer_equal);
 
+   nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
    nir_foreach_block(impl, isolate_phi_nodes_block, &state);
 
    /* Mark metadata as dirty before we ask for liveness analysis */
-- 
2.2.0



More information about the mesa-dev mailing list