Mesa (nv50-compiler): nv50: build proper phi functions in the first place

Wed Aug 4 22:49:20 UTC 2010

Module: Mesa
Branch: nv50-compiler
Commit: aaa8802a22d83fd89d7e306b7d03fa587a19aa0a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aaa8802a22d83fd89d7e306b7d03fa587a19aa0a

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Thu Aug  5 00:11:56 2010 +0200

nv50: build proper phi functions in the first place

---

 src/gallium/drivers/nv50/nv50_pc.c          |   39 +++++++-
 src/gallium/drivers/nv50/nv50_pc.h          |    3 +
 src/gallium/drivers/nv50/nv50_pc_optimize.c |    4 +
 src/gallium/drivers/nv50/nv50_pc_regalloc.c |  140 +++++----------------------
 src/gallium/drivers/nv50/nv50_tgsi_to_nc.c  |  127 +++++++++++++++++++------
 5 files changed, 166 insertions(+), 147 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 614982d..e32d28a 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -394,7 +394,7 @@ nv_nvi_delete(struct nv_instruction *nvi)
    struct nv_basic_block *b = nvi->bb;
    int j;
 
-   debug_printf("REM: "); nv_print_instruction(nvi);
+   /* debug_printf("REM: "); nv_print_instruction(nvi); */
 
    for (j = 0; j < 5; ++j)
       nv_reference(NULL, &nvi->src[j], NULL);
@@ -477,5 +477,40 @@ nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
    for (j = 0; j < b->num_in; ++j)
       n += nvbb_dominated_by(b->in[j], d);
 
-   return n && (n == b->num_in);
+   return (n && (n == b->num_in)) ? 1 : 0;
+}
+
+/* check if bf (future) can be reached from bp (past) */
+boolean
+nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+                  struct nv_basic_block *bt)
+{
+   if (bf == bp)
+      return TRUE;
+   if (bp == bt)
+      return FALSE;
+
+   if (bp->out[0] && bp->out[0] != bp &&
+       nvbb_reachable_by(bf, bp->out[0], bt))
+      return TRUE;
+   if (bp->out[1] && bp->out[1] != bp &&
+       nvbb_reachable_by(bf, bp->out[1], bt))
+      return TRUE;
+   return FALSE;
+}
+
+struct nv_basic_block *
+nvbb_dom_frontier(struct nv_basic_block *b)
+{
+   struct nv_basic_block *df = b->out[0];
+
+   assert(df);
+   while (nvbb_dominated_by(df, b) ||
+          (!nvbb_dominated_by(df->in[0], b) &&
+           (!df->in[1] || !nvbb_dominated_by(df->in[1], b)))) {
+      df = df->out[0];
+      assert(df);
+   }
+   assert(df);
+   return df;
 }
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 4b191c5..987043c 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -426,6 +426,9 @@ void nv_nvi_delete(struct nv_instruction *);
 void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
 void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);
 int nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
+boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
+                          struct nv_basic_block *);
+struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
 int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
                        struct nv_value *new_val);
 
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 324f8bb..f2f8d0e 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -771,6 +771,10 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
             if (ik->src[4] || ir->src[4])
                continue; /* don't mess with address registers */
 
+            if (ik->flags_src || ir->flags_src ||
+                ik->flags_def || ir->flags_def)
+               continue; /* and also not with flags, for now */
+
             for (s = 0; s < 3; ++s) {
                struct nv_value *a, *b;
 
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
index 941ec9f..172e44f 100644
--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -43,25 +43,6 @@ struct nv_pc_pass {
    uint pass_seq;
 };
 
-/* check if bf (future) can be reached from bp (past) */
-static boolean
-bb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
-                struct nv_basic_block *bt)
-{
-   if (bf == bp)
-      return TRUE;
-   if (bp == bt)
-      return FALSE;
-
-   if (bp->out[0] && bp->out[0] != bp &&
-       bb_reachable_by(bf, bp->out[0], bt))
-      return TRUE;
-   if (bp->out[1] && bp->out[1] != bp &&
-       bb_reachable_by(bf, bp->out[1], bt))
-      return TRUE;
-   return FALSE;
-}
-
 static void
 ranges_coalesce(struct nv_range *range)
 {
@@ -377,32 +358,13 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
    do_join_values(ctx, a, b);
 }
 
-/* For phi functions with sources from blocks that are not direct predecessors,
- * if such a source is to be used in an earlier predecessor, we need to add an
- * additional phi function. Used when inserting the MOVs below.
- */
-static struct nv_value *
-propagate_phi(struct nv_pc *pc, struct nv_instruction *phi, int s)
-{
-   struct nv_basic_block *b = pc->current_block;
-   struct nv_value *val = phi->src[s]->value;
-   struct nv_instruction *nvi = new_instruction(pc, NV_OP_PHI);
-   int i, k;
-
-   (nvi->def[0] = new_value(pc, val->reg.file, val->reg.type))->insn = nvi;
-
-   for (k = 0, i = 0; i < 4 && phi->src[i]; ++i) {
-      if (bb_reachable_by(b, phi->src[i]->value->insn->bb, b))
-         nvi->src[k++] = new_ref(pc, phi->src[i]->value);
-   }
-   return nvi->def[0];
-}
-
-/* For IF blocks without ELSE blocks, insert an empty block for the MOVs.
- * Insert additional PHIs for cases where a direct MOV wouldn't be valid.
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
  */
 static int
-pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
 {
    struct nv_instruction *i, *ni;
    struct nv_value *val;
@@ -426,31 +388,36 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
          if (p->exit->target == b) /* target to new else-block */
             p->exit->target = pn;
 
-         for (j = 0; j < b->num_in; ++j) {
-            if (b->in[j] == p) {
-               b->in[j] = pn;
-               break;
-            }
-         }
+         b->in[n] = pn;
+
          pn->out[0] = b;
          pn->in[0] = p;
          pn->num_in = 1;
       }
-
       ctx->pc->current_block = pn;
 
       for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
          for (j = 0; j < 4 && i->src[j]; ++j) {
-            if (bb_reachable_by(pn, i->src[j]->value->insn->bb, b))
+            if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))
                break;
          }
          if (j >= 4 || !i->src[j])
             continue;
          val = i->src[j]->value;
 
-         if (!nvbb_dominated_by(pn, val->insn->bb))
-            nv_reference(ctx->pc, &i->src[j], propagate_phi(ctx->pc, i, j));
+         ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+         /* TODO: insert instruction at correct position in the first place */
+         if (ni->prev && ni->prev->target)
+            nv_nvi_permute(ni->prev, ni);
+
+         ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
+         ni->def[0]->insn = ni;
+         ni->src[0] = new_ref(ctx->pc, val);
+
+         nv_reference(ctx->pc, &i->src[j], ni->def[0]);
       }
+
       if (pn != p && pn->exit) {
          ctx->pc->current_block = b->in[n ? 0 : 1];
          ni = new_instruction(ctx->pc, NV_OP_BRA);
@@ -461,70 +428,11 @@ pass_generate_phi_movs_1(struct nv_pc_pass *ctx, struct nv_basic_block *b)
 
    for (j = 0; j < 2; ++j)
       if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
-         pass_generate_phi_movs_1(ctx, b->out[j]);
+         pass_generate_phi_movs(ctx, b->out[j]);
 
    return 0;
 }
 
-/* Now everything should be in order and we can insert the MOVs. */
-static int
-pass_generate_phi_movs_2(struct nv_pc_pass *ctx, struct nv_basic_block *b)
-{
-   struct nv_instruction *i, *mov;
-   struct nv_value *val;
-   struct nv_basic_block *p;
-   int n, j;
-
-   b->pass_seq = ctx->pc->pass_seq;
-
-   for (n = 0; n < b->num_in; ++n) {
-      ctx->pc->current_block = p = b->in[n];
-
-      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
-         for (j = 0; j < 4 && i->src[j]; ++j) {
-            if (bb_reachable_by(p, i->src[j]->value->insn->bb, b))
-               break;
-         }
-         if (j >= 4 || !i->src[j])
-            continue;
-         val = i->src[j]->value;
-
-         mov = new_instruction(ctx->pc, NV_OP_MOV);
-
-         /* TODO: insert instruction at correct position in the first place */
-         if (mov->prev && mov->prev->target)
-            nv_nvi_permute(mov->prev, mov);
-
-         mov->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
-         mov->def[0]->insn = mov;
-         mov->src[0] = new_ref(ctx->pc, val);
-
-         nv_reference(ctx->pc, &i->src[j], mov->def[0]);
-      }
-   }
-
-   for (j = 1; j >= 0; --j) /* different order for the sake of diversity */
-      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
-         pass_generate_phi_movs_2(ctx, b->out[j]);
-
-   return 0;
-}
-
-/* For each operand of each PHI in b, generate a new value by inserting a MOV
- * at the end of the block it is coming from and replace the operand with its
- * result. This eliminates liveness conflicts and enables us to let values be
- * copied to the right register if such a conflict exists nonetheless.
- */
-static INLINE int
-pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
-{
-   if (pass_generate_phi_movs_1(ctx, b))
-      return 1;
-
-   ++ctx->pc->pass_seq;
-   return pass_generate_phi_movs_2(ctx, b);
-}
-
 static int
 pass_join_values(struct nv_pc_pass *ctx, int iter)
 {
@@ -688,7 +596,7 @@ pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
                break;
             assert(i->src[j]->value->insn);
 
-            if (bb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
+            if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
                live_set_add(b, i->src[j]->value);
                debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);
             } else {
@@ -774,7 +682,7 @@ pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
             if (!i->src[s])
                break;
             assert(i->src[s]->value->insn);
-            if (bb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
+            if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
                live_set_add(b, i->src[s]->value);
             else
                live_set_rem(b, i->src[s]->value);
@@ -978,7 +886,7 @@ nv_pc_exec_pass1(struct nv_pc *pc)
 
    nv_print_program(ctx->pc->root);
 
-   ctx->insns = CALLOC(pc->num_instructions, sizeof(struct nv_instruction *));
+   ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
 
    pc->pass_seq++;
    ret = pass_generate_phi_movs(ctx, pc->root);
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index 8846ef0..6a9259c 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -51,16 +51,22 @@ struct bld_value_stack {
 };
 
 static INLINE void
-bld_push_value(struct bld_value_stack *stk)
+bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
 {
-   assert(!stk->size || (stk->body[stk->size - 1] != stk->top));
+   assert(!stk->size || (stk->body[stk->size - 1] != val));
 
    if (!(stk->size % 8)) {
       unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
       unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
       stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
    }
-   stk->body[stk->size++] = stk->top;
+   stk->body[stk->size++] = val;
+}
+
+static INLINE void
+bld_vals_push(struct bld_value_stack *stk)
+{
+   bld_vals_push_val(stk, stk->top);
    stk->top = NULL;
 }
 
@@ -72,7 +78,7 @@ bld_push_values(struct bld_value_stack *stacks, int n)
    for (i = 0; i < n; ++i)
       for (c = 0; c < 4; ++c)
          if (stacks[i * 4 + c].top)
-            bld_push_value(&stacks[i * 4 + c]);
+            bld_vals_push(&stacks[i * 4 + c]);
 }
 
 #define FETCH_TEMP(i, c)    (bld->tvs[i][c].top)
@@ -121,6 +127,17 @@ struct bld_context {
    uint num_immds;
 };
 
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+                       struct bld_value_stack *stk, struct nv_basic_block *b)
+{
+   long i = (stk - &bld->tvs[0][0]) / 4;
+   long c = (stk - &bld->tvs[0][0]) & 3;
+
+   debug_printf("WARNING: TEMP[%li].%li %s used uninitialized in BB:%i\n",
+                i, c, kind ? "may be" : "is", b->id);
+}
+
 static INLINE struct nv_value *
 bld_def(struct nv_instruction *i, int c, struct nv_value *value)
 {
@@ -168,42 +185,91 @@ fetch_by_bb(struct bld_value_stack *stack,
       fetch_by_bb(stack, vals, n, b->in[i]);
 }
 
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
 static struct nv_value *
-bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+        struct bld_value_stack *stack)
 {
-   struct nv_value *vals[16], *phi = NULL;
-   int j, i = 0, n = 0;
+   struct nv_basic_block *in;
+   struct nv_value *vals[16], *val;
+   struct nv_instruction *phi;
+   int i, j, n;
+
+   do {
+      i = n = 0;
+      fetch_by_bb(stack, vals, &n, b);
+
+      if (!n) {
+         bld_warn_uninitialized(bld, 0, stack, b);
+         return NULL;
+      }
 
-   fetch_by_bb(stack, vals, &n, bld->pc->current_block);
+      if (n == 1) {
+         if (nvbb_dominated_by(b, vals[0]->insn->bb))
+            break;
 
-   if (n == 0)
-      return NULL;
-   if (n == 1)
-      return vals[0];
+         bld_warn_uninitialized(bld, 1, stack, b);
+
+         /* back-tracking to insert missing value of other path */
+         in = b;
+         while (in->in[0]) {
+            if (in->num_in == 1) {
+               in = in->in[0];
+            } else {
+               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) {
+                  in = in->in[0];
+                  break;
+               }
+               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) {
+                  in = in->in[1];
+                  break;
+               }
+               in = in->in[0];
+            }
+         }
+         bld->pc->current_block = in;
+
+         /* should make this a no-op */
+         bld_vals_push_val(stack, bld_load_imm_u32(bld, 0));
+         continue;
+      }
 
-   debug_printf("phi required: %i candidates\n", n);
+      for (i = 0; i < n; ++i) {
+         if (nvbb_dominated_by(b, vals[i]->insn->bb))
+            continue;
 
-   while (i < n) {
-      struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_PHI);
+         for (j = 0; j < b->num_in; ++j)
+            if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
+               break;
+         if (j == b->num_in) {
+            in = nvbb_dom_frontier(vals[i]->insn->bb);
+            val = bld_phi(bld, in, stack);
+            bld_vals_push_val(stack, val);
+            break;
+         }
+      }
+   } while(i < n);
 
-      j = phi ? 1 : 0;
-      if (phi)
-         insn->src[0] = new_ref(bld->pc, phi);
+   bld->pc->current_block = b;
 
-      phi = new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type);
+   if (n == 1)
+      return vals[0];
 
-      bld_def(insn, 0, phi);
+   phi = new_instruction(bld->pc, NV_OP_PHI);
 
-      for (; j < 4; ++j) {
-         insn->src[j] = new_ref(bld->pc, vals[i++]);
-         if (i == n)
-            break;
-      }
-      debug_printf("new phi: %i, %i in\n", phi->n, j);
-   }
+   bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
+   for (i = 0; i < n; ++i)
+      phi->src[i] = new_ref(bld->pc, vals[i]);
 
-   /* insert_at_head(list, phi) is done at end of block */
-   return phi;
+   return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
+{
+   return bld_phi(bld, bld->pc->current_block, stack);
 }
 
 static INLINE struct nv_value *
@@ -640,6 +706,9 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
 
    for (i = 0; i < 4; ++i)
       bld->saved_addr[i][0] = NULL;
+
+   for (i = 0; i < 128; ++i)
+      bld->saved_inputs[i] = NULL;
 }
 
 static struct nv_value *