[Mesa-dev] [PATCH] nv50/ir: handle SHLADD in IndirectPropagation
Rhys Perry
pendingchaos02 at gmail.com
Mon Jun 11 12:39:26 UTC 2018
An alternative solution to the problem fixed in
0bd83d0 ("nv50/ir: move LateAlgebraicOpt to the very end"). Should be
useful in the future and seems to make dolphin ubershaders a bit smaller.
total instructions in shared programs : 226722 -> 226464 (-0.11%)
total gprs used in shared programs : 19378 -> 19378 (0.00%)
total shared used in shared programs : 0 -> 0 (0.00%)
total local used in shared programs : 0 -> 0 (0.00%)
local shared gpr inst bytes
helped 0 0 0 51 51
hurt 0 0 0 0 0
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044..4d0589214d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -283,6 +283,8 @@ class IndirectPropagation : public Pass
{
private:
virtual bool visit(BasicBlock *);
+
+ BuildUtil bld;
};
bool
@@ -294,6 +296,8 @@ IndirectPropagation::visit(BasicBlock *bb)
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
+ bld.setPosition(i, false);
+
for (int s = 0; i->srcExists(s); ++s) {
Instruction *insn;
ImmediateValue imm;
@@ -325,6 +329,14 @@ IndirectPropagation::visit(BasicBlock *bb)
i->setIndirect(s, 0, NULL);
i->setSrc(s, cloneShallow(func, i->getSrc(s)));
i->src(s).get()->reg.data.offset += imm.reg.data.u32;
+ } else if (insn->op == OP_SHLADD) {
+ if (!insn->src(2).getImmediate(imm) ||
+ !targ->insnCanLoadOffset(i, s, imm.reg.data.s32))
+ continue;
+ i->setIndirect(s, 0, bld.mkOp2v(
+ OP_SHL, TYPE_U32, bld.getSSA(), insn->getSrc(0), insn->getSrc(1)));
+ i->setSrc(s, cloneShallow(func, i->getSrc(s)));
+ i->src(s).get()->reg.data.offset += imm.reg.data.u32;
}
}
}
@@ -3797,11 +3809,11 @@ Program::optimizeSSA(int level)
RUN_PASS(2, AlgebraicOpt, run);
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
RUN_PASS(1, ConstantFolding, foldAll);
+ RUN_PASS(2, LateAlgebraicOpt, run);
RUN_PASS(1, Split64BitOpPreRA, run);
RUN_PASS(1, LoadPropagation, run);
RUN_PASS(1, IndirectPropagation, run);
RUN_PASS(2, MemoryOpt, run);
- RUN_PASS(2, LateAlgebraicOpt, run);
RUN_PASS(2, LocalCSE, run);
RUN_PASS(0, DeadCodeElim, buryAll);
--
2.14.4
More information about the mesa-dev
mailing list