[Nouveau] [PATCH 3/3] nv50/ir: Fold IMM into MAD
Roy Spliet
rspliet at eclipso.eu
Sat Jan 10 16:23:00 PST 2015
Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it is required that SDST == SSRC2.
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 52 ++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20ca..1fc3ae6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
// =============================================================================
+// Fold Immediate into MAD; must be done after register allocation due to
+// constraint SDST == SSRC2
+// TODO:
+// Does NVC0+ have other situations where this pass makes sense?
+class NV50PostRaConstantFolding : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if(i->def(0).getFile() == FILE_GPR &&
+ i->src(0).getFile() == FILE_GPR &&
+ i->src(1).getFile() == FILE_GPR &&
+ i->src(2).getFile() == FILE_GPR &&
+ i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) {
+ for (int s = 1; s >= 0; s--) {
+ def = i->getSrc(1)->getInsn();
+ if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
+ vtmp = i->getSrc(1);
+ i->setSrc(1, def->getSrc(0));
+ if (vtmp->refCount() == 0)
+ delete_Instruction(bb->getProgram(), def);
+ break;
+ }
+
+ vtmp = i->getSrc(0);
+ i->setSrc(0, i->getSrc(1));
+ i->setSrc(1, vtmp);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+// =============================================================================
+
// Common subexpression elimination. Stupid O^2 implementation.
class LocalCSE : public Pass
{
@@ -2629,6 +2679,8 @@ bool
Program::optimizePostRA(int level)
{
RUN_PASS(2, FlatteningPass, run);
+ if (getTarget()->getChipset() < 0xc0)
+ RUN_PASS(2, NV50PostRaConstantFolding, run);
return true;
}
--
2.1.0
More information about the Nouveau
mailing list