[Mesa-dev] [PATCH v3 9/9] nv50/ir: optimize ADD3(d, a, b, c) to ADD(d, a, b + c)

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Sep 13 19:36:12 UTC 2016


And ADD3(d, a, b, c) to ADD(d, b, a + c) as well.

Very modest effect because OP_ADD3 only supports integers, but can
reduce the number of instructions in some shaders.

total instructions in shared programs :2594754 -> 2594686 (-0.00%)
total gprs used in shared programs    :366893 -> 366919 (0.01%)
total local used in shared programs   :31872 -> 31872 (0.00%)

                local        gpr       inst      bytes
    helped           0           0          39          39
      hurt           0          26           0           0

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 62 ++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 6ba2af6..e5e6e8e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -374,6 +374,7 @@ private:
    void expr(Instruction *, ImmediateValue&, ImmediateValue&);
    void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
    void opnd(Instruction *, ImmediateValue&, int s);
+   void opnd2(Instruction *, ImmediateValue&, int, ImmediateValue&, int);
    void opnd3(Instruction *, ImmediateValue&);
 
    void unary(Instruction *, const ImmediateValue&);
@@ -429,6 +430,13 @@ ConstantFolding::visit(BasicBlock *bb)
          opnd(i, src1, 1);
       if (i->srcExists(2) && i->src(2).getImmediate(src2))
          opnd3(i, src2);
+      if (i->srcExists(2) &&
+          i->src(0).getImmediate(src0) && i->src(2).getImmediate(src2))
+         opnd2(i, src0, 0, src2, 2);
+      else
+      if (i->srcExists(2) &&
+          i->src(1).getImmediate(src1) && i->src(2).getImmediate(src2))
+         opnd2(i, src1, 1, src2, 2);
    }
    return true;
 }
@@ -960,6 +968,60 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
 }
 
 void
+ConstantFolding::opnd2(Instruction *i, ImmediateValue &imm0, int s0,
+                       ImmediateValue &imm1, int s1)
+{
+   struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
+   ImmediateValue src0, src1;
+   struct Storage res;
+   DataType type = i->dType;
+
+   memset(&res.data, 0, sizeof(res.data));
+
+   switch (i->op) {
+   case OP_ADD3:
+      switch (i->dType) {
+      case TYPE_S32:
+      case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
+      default:
+         return;
+      }
+      break;
+   default:
+      return;
+   }
+   ++foldCount;
+
+   i->op = OP_ADD;
+
+   if (s0 == 0) {
+      i->setSrc(0, i->getSrc(1));
+      i->src(0).mod = i->src(1).mod;
+   }
+
+   i->setSrc(1, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
+   i->setSrc(2, NULL);
+
+   i->getSrc(1)->reg.data = res.data;
+   i->getSrc(1)->reg.type = type;
+   i->getSrc(1)->reg.size = typeSizeof(type);
+
+   src1 = *i->getSrc(1)->asImm();
+
+   // Move the immediate into position 1, where we know it might be
+   // emittable. However it might not be anyways, as there may be other
+   // restrictions, so move it into a separate LValue.
+   bld.setPosition(i, false);
+   i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(1), type)->getDef(0));
+   i->src(1).mod = Modifier(0);
+
+   if (i->src(0).getImmediate(src0))
+      expr(i, src0, src1);
+   else
+      opnd(i, src1, 1);
+}
+
+void
 ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 {
    const int t = !s;
-- 
2.9.3



More information about the mesa-dev mailing list