[Mesa-dev] [PATCH v3 9/9] nv50/ir: optimize ADD3(d, a, b, c) to ADD(d, a, b + c)
Samuel Pitoiset
samuel.pitoiset at gmail.com
Tue Sep 13 19:36:12 UTC 2016
And ADD3(d, a, b, c) to ADD(d, b, a + c) as well.
Very modest effect because OP_ADD3 only supports integers, but can
reduce the number of instructions in some shaders.
total instructions in shared programs :2594754 -> 2594686 (-0.00%)
total gprs used in shared programs :366893 -> 366919 (0.01%)
total local used in shared programs :31872 -> 31872 (0.00%)
local gpr inst bytes
helped 0 0 39 39
hurt 0 26 0 0
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 62 ++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 6ba2af6..e5e6e8e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -374,6 +374,7 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd2(Instruction *, ImmediateValue&, int, ImmediateValue&, int);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -429,6 +430,13 @@ ConstantFolding::visit(BasicBlock *bb)
opnd(i, src1, 1);
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
+ if (i->srcExists(2) &&
+ i->src(0).getImmediate(src0) && i->src(2).getImmediate(src2))
+ opnd2(i, src0, 0, src2, 2);
+ else
+ if (i->srcExists(2) &&
+ i->src(1).getImmediate(src1) && i->src(2).getImmediate(src2))
+ opnd2(i, src1, 1, src2, 2);
}
return true;
}
@@ -960,6 +968,60 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
}
void
+ConstantFolding::opnd2(Instruction *i, ImmediateValue &imm0, int s0,
+ ImmediateValue &imm1, int s1)
+{
+ struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
+ ImmediateValue src0, src1;
+ struct Storage res;
+ DataType type = i->dType;
+
+ memset(&res.data, 0, sizeof(res.data));
+
+ switch (i->op) {
+ case OP_ADD3:
+ switch (i->dType) {
+ case TYPE_S32:
+ case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+ ++foldCount;
+
+ i->op = OP_ADD;
+
+ if (s0 == 0) {
+ i->setSrc(0, i->getSrc(1));
+ i->src(0).mod = i->src(1).mod;
+ }
+
+ i->setSrc(1, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
+ i->setSrc(2, NULL);
+
+ i->getSrc(1)->reg.data = res.data;
+ i->getSrc(1)->reg.type = type;
+ i->getSrc(1)->reg.size = typeSizeof(type);
+
+ src1 = *i->getSrc(1)->asImm();
+
+ // Move the immediate into position 1, where we know it might be
+ // emittable. However it might not be anyways, as there may be other
+ // restrictions, so move it into a separate LValue.
+ bld.setPosition(i, false);
+ i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(1), type)->getDef(0));
+ i->src(1).mod = Modifier(0);
+
+ if (i->src(0).getImmediate(src0))
+ expr(i, src0, src1);
+ else
+ opnd(i, src1, 1);
+}
+
+void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
--
2.9.3
More information about the mesa-dev
mailing list