[Mesa-dev] [PATCH v3] nvc0/ir: replace cvt instructions with add to improve shader performance
Karol Herbst
kherbst at redhat.com
Mon Jan 28 21:21:34 UTC 2019
gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
gp107.
reduces the amount of generated convert instructions by roughly 30% in
shader-db.
v2: only for 32 bit operations
move some common code out of the switch
handle OP_SAT with modifiers
v3: only for registers and const memory
rework if clauses
merge isCvt into this patch
Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
.../drivers/nouveau/codegen/nv50_ir_inlines.h | 17 +++++
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
4 files changed, 83 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 8085bb2f542..b00a005bdef 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -867,6 +867,7 @@ public:
inline bool isPseudo() const { return op < OP_MOV; }
bool isDead() const;
bool isNop() const;
+ inline bool isCvt() const;
bool isCommutationLegal(const Instruction *) const; // must be adjacent !
bool isActionEqual(const Instruction *) const;
bool isResultEqual(const Instruction *) const;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
index 4cb53ab42ed..06882058dc9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
@@ -419,4 +419,21 @@ LValue *Function::getLValue(int id)
return reinterpret_cast<LValue *>(allLValues.get(id));
}
+bool Instruction::isCvt() const
+{
+ switch (op) {
+ case OP_ABS:
+ case OP_CEIL:
+ case OP_FLOOR:
+ case OP_NEG:
+ case OP_TRUNC:
+ case OP_SAT:
+ return true;
+ case OP_CVT:
+ return def(0).getFile() != FILE_PREDICATE && src(0).getFile() != FILE_PREDICATE;
+ default:
+ return false;
+ }
+}
+
#endif // __NV50_IR_INLINES_H__
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 295497be2f9..28f0aae6432 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
bb->remove(bb->getEntry());
}
+// replaces instructions which would end up as f2f or i2i with faster
+// alternatives:
+// - abs(a) -> add(0, abs a)
+// - fneg(a) -> fadd(neg 0, neg a)
+// - ineg(a) -> iadd(0, neg a)
+// - fneg(abs a) -> fadd(neg 0, neg abs a)
+// - ineg(abs a) -> iadd(0, neg abs a)
+// - sat(a) -> sat add(0, a)
+void
+NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
+{
+ if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
+ return;
+ if (cvt->sType != cvt->dType)
+ return;
+ // we could make it work, but in this case we have optimizations disabled
+ // and we don't really care either way.
+ if (cvt->src(0).getFile() != FILE_GPR &&
+ cvt->src(0).getFile() != FILE_MEMORY_CONST)
+ return;
+
+ Modifier mod0, mod1;
+
+ switch (cvt->op) {
+ case OP_ABS:
+ if (cvt->src(0).mod)
+ return;
+ if (!isFloatType(cvt->sType))
+ return;
+ mod0 = 0;
+ mod1 = NV50_IR_MOD_ABS;
+ break;
+ case OP_NEG:
+ if (!isFloatType(cvt->sType) && cvt->src(0).mod)
+ return;
+ if (isFloatType(cvt->sType) &&
+ (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
+ return;
+
+ mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
+ mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS)
+ ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
+ break;
+ case OP_SAT:
+ if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
+ return;
+ mod0 = 0;
+ mod1 = cvt->src(0).mod;
+ cvt->saturate = true;
+ break;
+ default:
+ return;
+ }
+
+ cvt->op = OP_ADD;
+ cvt->moveSources(0, 1);
+ cvt->setSrc(0, rZero);
+ cvt->src(0).mod = mod0;
+ cvt->src(1).mod = mod1;
+}
+
bool
NVC0LegalizePostRA::visit(BasicBlock *bb)
{
@@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
next = hi;
}
+ if (i->isCvt())
+ replaceCvt(i);
+
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index e0f50ab0904..4679c56471b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -81,6 +81,7 @@ private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
+ void replaceCvt(Instruction *);
void replaceZero(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
--
2.20.1
More information about the mesa-dev
mailing list