[Mesa-dev] [PATCH] nvc0/ir: set FTZ when the inputs are of uncertain provenance
Ilia Mirkin
imirkin at alum.mit.edu
Tue Apr 28 00:30:08 PDT 2015
This will set the FTZ flag (flush denorms to zero) on all opcodes that
can take it when they have sources that come from other types of
opcodes.
This resolves issues in Unigine Heaven 4.0 where there were solid-filled
boxes popping up.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 33 +++++++++++++++++++++-
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 87e75e1..9a8a41d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -96,6 +96,32 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
}
+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+ unsigned s;
+
+ if (i->dType != TYPE_F32 || i->sType != TYPE_F32)
+ return;
+
+ // Flush denorms to zero to avoid unexpected situations in code. We can
+ // assume that the output of any instruction that *could* take a ftz (even
+ // if it doesn't have one) does not need further flushing.
+ for (s = 0; i->srcExists(s); ++s) {
+ Instruction *ld = i->getSrc(s)->getInsn();
+ if (!ld)
+ continue;
+
+ OpClass cls = prog->getTarget()->getOpClass(ld->op);
+ if (ld->dType != TYPE_F32 || (
+ cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+ cls != OPCLASS_CONVERT)) {
+ i->ftz = true;
+ return;
+ }
+ }
+}
+
bool
NVC0LegalizeSSA::visit(Function *fn)
{
@@ -109,8 +135,13 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
- if (i->dType == TYPE_F32)
+ if (i->dType == TYPE_F32) {
+ OpClass cls = prog->getTarget()->getOpClass(i->op);
+ if (cls == OPCLASS_ARITH || cls == OPCLASS_COMPARE ||
+ cls == OPCLASS_CONVERT)
+ handleFTZ(i);
continue;
+ }
switch (i->op) {
case OP_DIV:
case OP_MOD:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index d8ff5cd..260e101 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -36,6 +36,7 @@ private:
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+ void handleFTZ(Instruction *);
private:
BuildUtil bld;
--
2.0.5
More information about the mesa-dev
mailing list