Mesa (nv50-compiler): nv50: improve and fix modifier folding optimization

Wed Sep 15 13:41:34 UTC 2010

Module: Mesa
Branch: nv50-compiler
Commit: c46e7a05e501e02b10dbc06772c0ef01308f60d5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c46e7a05e501e02b10dbc06772c0ef01308f60d5

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Wed Sep 15 13:59:09 2010 +0200

nv50: improve and fix modifier folding optimization

Execute before folding loads, because we don't check if it's legal
in lower_mods.
Ensure that a value's insn pointer is updated when transferring it
to a different instruction.

---

 src/gallium/drivers/nv50/nv50_pc.c          |    1 +
 src/gallium/drivers/nv50/nv50_pc.h          |    1 -
 src/gallium/drivers/nv50/nv50_pc_emit.c     |    5 ++
 src/gallium/drivers/nv50/nv50_pc_optimize.c |   65 ++++++++++++++++----------
 4 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 78aca8f..2706d88 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -104,6 +104,7 @@ nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
    case NV_OP_FLOOR:
    case NV_OP_TRUNC:
    case NV_OP_CVT:
+   case NV_OP_NEG:
    case NV_OP_MAD:
    case NV_OP_MUL:
    case NV_OP_SAT:
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 8f15a82..92c6be5 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -220,7 +220,6 @@ struct nv_value {
 
 struct nv_ref {
    struct nv_value *value;
-   struct nv_instruction *insn;
    ubyte mod;
    ubyte typecast;
    ubyte flags; /* not used yet */
diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c
index 1eb4474..137a531 100644
--- a/src/gallium/drivers/nv50/nv50_pc_emit.c
+++ b/src/gallium/drivers/nv50/nv50_pc_emit.c
@@ -654,6 +654,8 @@ emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
 {
    pc->emit[0] = 0xb0000000;
 
+   assert(!((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS));
+
    if (SFILE(i, 1) == NV_FILE_IMM) {
       emit_form_IMM(pc, i, 0);
 
@@ -665,6 +667,9 @@ emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
 
       if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26;
       if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27;
+
+      if (i->saturate)
+         pc->emit[1] |= 0x20000000;
    } else {
       emit_form_MUL(pc, i);
 
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 3ff6db7..921ed15 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -336,6 +336,7 @@ nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
          continue;
 
       nvi->def[0] = sti->def[0];
+      nvi->def[0]->insn = nvi;
       nvi->fixed = sti->fixed;
 
       nv_nvi_delete(sti);
@@ -374,7 +375,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
          if (j == 0 && ld->src[4]) /* can't load shared mem */
             continue;
 
-         /* fold it ! */ /* XXX: ref->insn */
+         /* fold it ! */
          nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
          if (ld->src[4])
             nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
@@ -388,6 +389,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
    return 0;
 }
 
+/* NOTE: Assumes loads have not yet been folded. */
 static int
 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
 {
@@ -402,14 +404,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
          nvi->src[1]->mod ^= NV_MOD_NEG;
       }
 
-      /* should not put any modifiers on NEG and ABS */
-      assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
-      assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
-
-      for (j = 0; j < 4; ++j) {
-         if (!nvi->src[j])
-            break;
-
+      for (j = 0; j < 4 && nvi->src[j]; ++j) {
          mi = nvi->src[j]->value->insn;
          if (!mi)
             continue;
@@ -421,16 +416,32 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
          if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
          else
             continue;
+         assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
+
+         mod |= mi->src[0]->mod;
+
+         if (mi->flags_def || mi->flags_src)
+            continue;
 
-         if (nvi->opcode == NV_OP_ABS)
+         if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
+            /* abs neg [abs] = abs */
             mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
-         else
-         if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
-            nvi->opcode = NV_OP_MOV;
+         } else
+         if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
+            /* neg as opcode and modifier on same insn cannot occur */
+            /* neg neg abs = abs, neg neg = identity */
+            assert(j == 0);
+            if (mod & NV_MOD_ABS)
+               nvi->opcode = NV_OP_ABS;
+            else
+            if (nvi->flags_def)
+               nvi->opcode = NV_OP_CVT;
+            else
+               nvi->opcode = NV_OP_MOV;
             mod = 0;
          }
 
-         if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
+         if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod)
             continue;
 
          nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
@@ -441,11 +452,15 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
       if (nvi->opcode == NV_OP_SAT) {
          mi = nvi->src[0]->value->insn;
 
-         if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
-            mi->saturate = 1;
-            mi->def[0] = nvi->def[0];
-            nv_nvi_delete(nvi);
-         }
+         if (mi->opcode != NV_OP_ADD || mi->opcode != NV_OP_MAD)
+            continue;
+         if (mi->flags_def || mi->def[0]->refc > 1)
+            continue;
+
+         mi->saturate = 1;
+         mi->def[0] = nvi->def[0];
+         mi->def[0]->insn = mi;
+         nv_nvi_delete(nvi);
       }
    }
    DESCEND_ARBITRARY(j, nv_pass_lower_mods);
@@ -956,7 +971,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
          for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
             if (!nv50_nvi_can_predicate(nvi))
                break;
-#ifdef NV50_PC_DEBUG
+#ifdef NV50PC_DEBUG
          if (nvi) {
             debug_printf("cannot predicate: "); nv_print_instruction(nvi);
          }
@@ -1082,6 +1097,11 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
       return ret;
 
    pc->pass_seq++;
+   ret = nv_pass_lower_mods(&pass, root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
    ret = nv_pass_fold_loads(&pass, root);
    if (ret)
       return ret;
@@ -1106,11 +1126,6 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
    if (ret)
       return ret;
 
-   pc->pass_seq++;
-   ret = nv_pass_lower_mods(&pass, root);
-   if (ret)
-      return ret;
-
    dce.pc = pc;
    do {
       dce.removed = 0;