[Mesa-dev] [RFC 3/3] gk110/ir: Use the new rcp/rsq f64 in library

Boyan Ding boyan.j.ding at gmail.com
Sun Mar 5 15:34:58 UTC 2017


Signed-off-by: Boyan Ding <boyan.j.ding at gmail.com>
---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 32 ++++++++++++++++++++++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |  1 +
 2 files changed, 33 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 732e1a93b4..7e4d175a99 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -68,6 +68,32 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
 }
 
 void
+NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
+{
+   FlowInstruction *call;
+   Value *def[2];
+   int builtin;
+
+   def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
+   def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
+
+   if (i->op == OP_RCP)
+      builtin = NVC0_BUILTIN_RCP_F64;
+   else
+      builtin = NVC0_BUILTIN_RSQ_F64;
+
+   call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+   bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
+   bld.mkClobber(FILE_GPR, 0x3fc, 2);
+   bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
+
+   call->fixed = 1;
+   call->absolute = call->builtin = 1;
+   call->target.builtin = builtin;
+   delete_Instruction(prog, i);
+}
+
+void
 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
 {
    assert(i->dType == TYPE_F64);
@@ -80,6 +106,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
    Value *src[2], *dst[2], *def = i->getDef(0);
    bld.mkSplit(src, 4, i->getSrc(0));
 
+   int chip = prog->getTarget()->getChipset();
+   if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) {
+      handleRCPRSQLib(i, src);
+      return;
+   }
+
    // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
    dst[0] = bld.loadImm(NULL, 0);
    dst[1] = bld.getSSA();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 7fae7e24b9..30bc0b48df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -33,6 +33,7 @@ private:
 
    // we want to insert calls to the builtin library only after optimization
    void handleDIV(Instruction *); // integer division, modulus
+   void handleRCPRSQLib(Instruction *, Value *[]);
    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
    void handleFTZ(Instruction *);
    void handleSET(CmpInstruction *);
-- 
2.12.0



More information about the mesa-dev mailing list