[Mesa-dev] [PATCH 2/2] nvc0/ir: replace cvt instructions with add to improve shader performance
Karol Herbst
kherbst at redhat.com
Thu Dec 13 23:14:41 UTC 2018
gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
gp107.
changes in shader-db:
total instructions in shared programs : 7614782 -> 7614782 (0.00%)
total cvt instructions in shared programs : 139343 -> 95856 (-31.21%)
total gprs used in shared programs : 798045 -> 798045 (0.00%)
total shared used in shared programs : 639636 -> 639636 (0.00%)
total local used in shared programs : 24648 -> 24648 (0.00%)
total bytes used in shared programs : 81330696 -> 81330696 (0.00%)
local shared gpr inst cvts bytes
helped 0 0 0 0 14037 0
hurt 0 0 0 0 0 0
Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 55 +++++++++++++++++++
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
2 files changed, 56 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 295497be2f9..267fef0898d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -719,6 +719,58 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
bb->remove(bb->getEntry());
}
+// replaces instructions which would end up as f2f or i2i with faster
+// alternatives:
+// - abs(a) -> add(0, abs a)
+// - neg(a) -> add(0, neg a)
+// - neg(abs a) -> add(0, neg abs a)
+// - sat(a) -> sat add(0, a)
+void
+NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
+{
+ if (typeSizeof(cvt->sType) == 8)
+ return;
+ if (cvt->sType != cvt->dType)
+ return;
+ // we could make it work, but in this case we have optimizations disabled
+ // and we don't really care either way.
+ if (cvt->src(0).getFile() == FILE_IMMEDIATE)
+ return;
+
+ switch (cvt->op) {
+ case OP_ABS:
+ if (cvt->src(0).mod)
+ break;
+ if (!isFloatType(cvt->sType))
+ break;
+ cvt->op = OP_ADD;
+ cvt->moveSources(0, 1);
+ cvt->src(1).mod = NV50_IR_MOD_ABS;
+ cvt->setSrc(0, rZero);
+ cvt->src(0).mod = 0;
+ break;
+ case OP_NEG:
+ if (cvt->src(0).mod && (cvt->src(0).mod.neg() || !isFloatType(cvt->sType)))
+ break;
+ cvt->op = OP_ADD;
+ cvt->moveSources(0, 1);
+ cvt->src(1).mod = cvt->src(0).mod ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
+ cvt->setSrc(0, rZero);
+ cvt->src(0).mod = 0;
+ break;
+ case OP_SAT:
+ if (cvt->src(0).mod)
+ break;
+ cvt->op = OP_ADD;
+ cvt->moveSources(0, 1);
+ cvt->setSrc(0, rZero);
+ cvt->saturate = true;
+ break;
+ default:
+ break;
+ }
+}
+
bool
NVC0LegalizePostRA::visit(BasicBlock *bb)
{
@@ -758,6 +810,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
next = hi;
}
+ if (i->isCvt())
+ replaceCvt(i);
+
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index e0f50ab0904..4679c56471b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -81,6 +81,7 @@ private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
+ void replaceCvt(Instruction *);
void replaceZero(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
--
2.19.2
More information about the mesa-dev
mailing list