[Mesa-dev] [PATCH 07/13] gk110/ir: Use the new rcp/rsq in library
Karol Herbst
kherbst at redhat.com
Sun Jul 15 18:15:47 UTC 2018
From: Boyan Ding <boyan.j.ding at gmail.com>
v2: (Karol Herbst <kherbst at redhat.com>)
* fix Value setup for the builtins
Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 36 +++++++++++++++++++
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 +
2 files changed, 37 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 743f5bd552b..99758f31e35 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -83,6 +83,36 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
delete_Instruction(prog, i);
}
+void
+NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
+{
+ FlowInstruction *call;
+ Value *def[2];
+ int builtin;
+
+ def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
+ def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
+
+ if (i->op == OP_RCP)
+ builtin = NVC0_BUILTIN_RCP_F64;
+ else
+ builtin = NVC0_BUILTIN_RSQ_F64;
+
+ call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+ def[0] = bld.getSSA();
+ def[1] = bld.getSSA();
+ bld.mkMovFromReg(def[0], 0);
+ bld.mkMovFromReg(def[1], 1);
+ bld.mkClobber(FILE_GPR, 0x3fc, 2);
+ bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
+ bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
+
+ call->fixed = 1;
+ call->absolute = call->builtin = 1;
+ call->target.builtin = builtin;
+ delete_Instruction(prog, i);
+}
+
void
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
{
@@ -96,6 +126,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
Value *src[2], *dst[2], *def = i->getDef(0);
bld.mkSplit(src, 4, i->getSrc(0));
+ int chip = prog->getTarget()->getChipset();
+ if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) {
+ handleRCPRSQLib(i, src);
+ return;
+ }
+
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
dst[1] = bld.getSSA();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 5dbb3e4f009..06b363e8fa5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -62,6 +62,7 @@ private:
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
+ void handleRCPRSQLib(Instruction *, Value *[]);
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
void handleFTZ(Instruction *);
void handleSET(CmpInstruction *);
--
2.17.1
More information about the mesa-dev
mailing list