[Mesa-dev] [PATCH 1/2] gm200/ir: add native OP_SQRT support

Karol Herbst kherbst at redhat.com
Sat Aug 4 02:52:05 UTC 2018


./GpuTest /test=pixmark_piano 1024x640 30sec:
301 -> 327 points

shader-db:
total instructions in shared programs : 5472103 -> 5456166 (-0.29%)
total gprs used in shared programs    : 647530 -> 647522 (-0.00%)
total shared used in shared programs  : 389120 -> 389120 (0.00%)
total local used in shared programs   : 21064 -> 21064 (0.00%)
total bytes used in shared programs   : 58459304 -> 58288696 (-0.29%)

                local     shared        gpr       inst      bytes
    helped           0           0          27        8281        8281
      hurt           0           0          21         431         431

Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp        | 4 +++-
 .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp     | 3 +++
 .../drivers/nouveau/codegen/nv50_ir_target_gm107.cpp      | 8 +++++++-
 .../drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp       | 1 +
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 1d31f181e44..5e8c22cd54b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1409,6 +1409,7 @@ CodeEmitterGM107::emitMUFU()
    case OP_LG2: mufu = 3; break;
    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
+   case OP_SQRT: mufu = 8; break;
    default:
       assert(!"invalid mufu");
       break;
@@ -1418,7 +1419,7 @@ CodeEmitterGM107::emitMUFU()
    emitSAT  (0x32);
    emitNEG  (0x30, insn->src(0));
    emitABS  (0x2e, insn->src(0));
-   emitField(0x14, 3, mufu);
+   emitField(0x14, 4, mufu);
    emitGPR  (0x08, insn->src(0));
    emitGPR  (0x00, insn->def(0));
 }
@@ -3342,6 +3343,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
    case OP_LG2:
    case OP_RCP:
    case OP_RSQ:
+   case OP_SQRT:
       emitMUFU();
       break;
    case OP_AND:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1410cf26c87..c47d10896ce 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2752,6 +2752,9 @@ NVC0LoweringPass::handleMOD(Instruction *i)
 bool
 NVC0LoweringPass::handleSQRT(Instruction *i)
 {
+   if (targ->isOpSupported(OP_SQRT, i->dType))
+      return true;
+
    if (i->dType == TYPE_F64) {
       Value *pred = bld.getSSA(1, FILE_PREDICATE);
       Value *zero = bld.loadImm(NULL, 0.0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index adbfcc3cfec..b42c01f1d21 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -57,10 +57,13 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
    switch (op) {
    case OP_SAD:
    case OP_POW:
-   case OP_SQRT:
    case OP_DIV:
    case OP_MOD:
       return false;
+   case OP_SQRT:
+      if (ty == TYPE_F64)
+         return false;
+      return chipset >= 0x120;
    default:
       break;
    }
@@ -125,6 +128,7 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
       case OP_RCP:
       case OP_RSQ:
       case OP_SIN:
+      case OP_SQRT:
          return true;
       default:
          break;
@@ -256,6 +260,7 @@ TargetGM107::getLatency(const Instruction *insn) const
    case OP_RCP:
    case OP_RSQ:
    case OP_SIN:
+   case OP_SQRT:
       return 13;
    default:
       break;
@@ -284,6 +289,7 @@ TargetGM107::getReadLatency(const Instruction *insn) const
    case OP_RSQ:
    case OP_SAT:
    case OP_SIN:
+   case OP_SQRT:
    case OP_SULDB:
    case OP_SULDP:
    case OP_SUREDB:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 7e059235f4c..9304e392361 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -129,6 +129,7 @@ static const struct opProperties _initProps[] =
    { OP_LG2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    { OP_RCP,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    { OP_RSQ,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
+   { OP_SQRT,   0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
    { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
    { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
-- 
2.17.1



More information about the mesa-dev mailing list