[Mesa-dev] [PATCH v2 5/6] nv50/ir: implement mad post ra folding for nvc0+
Karol Herbst
karolherbst at gmail.com
Sun Oct 9 09:04:55 UTC 2016
changes for GpuTest /test=pixmark_piano /benchmark /no_scorebox /msaa=0
/benchmark_duration_ms=60000 /width=1024 /height=640:
score: 1026 -> 1044
changes for shader-db:
total instructions in shared programs : 2818606 -> 2811662 (-0.25%)
total gprs used in shared programs : 379273 -> 379273 (0.00%)
total local used in shared programs : 9505 -> 9505 (0.00%)
total bytes used in shared programs : 25837192 -> 25773432 (-0.25%)
local gpr inst bytes
helped 0 0 3084 3084
hurt 0 0 0 0
v2: removed TODO
reorderd to show changes without RA modification
removed stale debugging print() call
Signed-off-by: Karol Herbst <karolherbst at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 64 +++++++++++++++++++---
1 file changed, 57 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 1f47ba2..bcbc0c0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2954,19 +2954,18 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
// Fold Immediate into MAD; must be done after register allocation due to
// constraint SDST == SSRC2
-// TODO:
-// Does NVC0+ have other situations where this pass makes sense?
class PostRaConstantFolding : public Pass
{
private:
virtual bool visit(Instruction *);
- void handleMAD(Instruction *);
+ void handleMADforNV50(Instruction *);
+ void handleMADforNVC0(Instruction *);
};
// Fold Immediate into MAD; must be done after register allocation due to
// constraint SDST == SSRC2
void
-PostRaConstantFolding::handleMAD(Instruction *i)
+PostRaConstantFolding::handleMADforNV50(Instruction *i)
{
if (i->def(0).getFile() != FILE_GPR ||
i->src(0).getFile() != FILE_GPR ||
@@ -3019,12 +3018,64 @@ PostRaConstantFolding::handleMAD(Instruction *i)
}
}
+void
+PostRaConstantFolding::handleMADforNVC0(Instruction *i)
+{
+ if (i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+ return;
+
+ int chipset = prog->getTarget()->getChipset();
+ if (i->getPredicate()) {
+ // prior gk110 we can't do that if we have a predicate
+ if (chipset < NVISA_GK20A_CHIPSET)
+ return;
+ // and gk110 can't handle a cc
+ if (chipset < NVISA_GM107_CHIPSET && i->cc)
+ return;
+ }
+
+ // TODO: gm107 can also do this for S32
+ if (i->dType != TYPE_F32)
+ return;
+
+ if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
+ return;
+
+ ImmediateValue val;
+ int s;
+
+ if (i->src(0).getImmediate(val))
+ s = 1;
+ else if (i->src(1).getImmediate(val))
+ s = 0;
+ else
+ return;
+
+ if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
+ return;
+
+ if (s == 1)
+ i->swapSources(0, 1);
+
+ Instruction *imm = i->getSrc(1)->getInsn();
+ i->setSrc(1, imm->getSrc(0));
+ if (imm->isDead(true))
+ delete_Instruction(prog, imm);
+}
+
bool
PostRaConstantFolding::visit(Instruction *i)
{
switch (i->op) {
case OP_MAD:
- handleMAD(i);
+ if (prog->getTarget()->getChipset() < 0xc0)
+ handleMADforNV50(i);
+ else
+ handleMADforNVC0(i);
break;
default:
break;
@@ -3447,8 +3498,7 @@ bool
Program::optimizePostRA(int level)
{
RUN_PASS(2, FlatteningPass, run);
- if (getTarget()->getChipset() < 0xc0)
- RUN_PASS(2, PostRaConstantFolding, run);
+ RUN_PASS(2, PostRaConstantFolding, run);
return true;
}
--
2.10.0
More information about the mesa-dev
mailing list