[Nouveau] [PATCH 2/3] nv50/ir: Fold IMM into MAD

Roy Spliet rspliet at eclipso.eu
Tue Jan 13 12:35:48 PST 2015


Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it requires that SDST == SSRC2.

V2: improve readability and add comments to clarify decisions

Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 60 ++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20ca..723c255 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2259,6 +2259,63 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
 
 // =============================================================================
 
+// Fold Immediate into MAD; must be done after register allocation due to
+// constraint SDST == SSRC2
+// TODO:
+// Does NVC0+ have other situations where this pass makes sense?
+class NV50PostRaConstantFolding : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+   Value *vtmp;
+   Instruction *def;
+
+   for (Instruction *i = bb->getFirst(); i; i = i->next) {
+      switch (i->op) {
+      case OP_MAD:
+         if(i->def(0).getFile() != FILE_GPR ||
+            i->src(0).getFile() != FILE_GPR ||
+            i->src(1).getFile() != FILE_GPR ||
+            i->src(2).getFile() != FILE_GPR ||
+            i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+            break;
+
+         for (int s = 0; s < 2; s++) {
+            def = i->getSrc(1)->getInsn();
+            if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
+               vtmp = i->getSrc(1);
+               i->setSrc(1, def->getSrc(0));
+
+               /* There's no post-RA dead code elimination, so do it here
+                * XXX: if we add more code-removing post-RA passes, we might
+                *      want to create a post-RA dead-code elim pass */
+               if (vtmp->refCount() == 0)
+                  delete_Instruction(bb->getProgram(), def);
+
+               break;
+            }
+
+            /* Swap inputs, IMM must be SRC1 */
+            vtmp = i->getSrc(0);
+            i->setSrc(0, i->getSrc(1));
+            i->setSrc(1, vtmp);
+         }
+         break;
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+// =============================================================================
+
 // Common subexpression elimination. Stupid O^2 implementation.
 class LocalCSE : public Pass
 {
@@ -2629,6 +2686,9 @@ bool
 Program::optimizePostRA(int level)
 {
    RUN_PASS(2, FlatteningPass, run);
+   if (getTarget()->getChipset() < 0xc0)
+      RUN_PASS(2, NV50PostRaConstantFolding, run);
+
    return true;
 }
 
-- 
2.1.0





More information about the Nouveau mailing list