[Mesa-dev] [PATCH 1/2] gm200/ir: add native OP_SQRT support

Ilia Mirkin imirkin at alum.mit.edu
Sat Aug 4 03:26:12 UTC 2018


On Fri, Aug 3, 2018 at 10:52 PM, Karol Herbst <kherbst at redhat.com> wrote:
> ./GpuTest /test=pixmark_piano 1024x640 30sec:
> 301 -> 327 points
>
> shader-db:
> total instructions in shared programs : 5472103 -> 5456166 (-0.29%)
> total gprs used in shared programs    : 647530 -> 647522 (-0.00%)
> total shared used in shared programs  : 389120 -> 389120 (0.00%)
> total local used in shared programs   : 21064 -> 21064 (0.00%)
> total bytes used in shared programs   : 58459304 -> 58288696 (-0.29%)
>
>                 local     shared        gpr       inst      bytes
>     helped           0           0          27        8281        8281
>       hurt           0           0          21         431         431
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp        | 4 +++-
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp     | 3 +++
>  .../drivers/nouveau/codegen/nv50_ir_target_gm107.cpp      | 8 +++++++-
>  .../drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp       | 1 +
>  4 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> index 1d31f181e44..5e8c22cd54b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> @@ -1409,6 +1409,7 @@ CodeEmitterGM107::emitMUFU()
>     case OP_LG2: mufu = 3; break;
>     case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
>     case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
> +   case OP_SQRT: mufu = 8; break;
>     default:
>        assert(!"invalid mufu");
>        break;
> @@ -1418,7 +1419,7 @@ CodeEmitterGM107::emitMUFU()
>     emitSAT  (0x32);
>     emitNEG  (0x30, insn->src(0));
>     emitABS  (0x2e, insn->src(0));
> -   emitField(0x14, 3, mufu);
> +   emitField(0x14, 4, mufu);
>     emitGPR  (0x08, insn->src(0));
>     emitGPR  (0x00, insn->def(0));
>  }
> @@ -3342,6 +3343,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
>     case OP_LG2:
>     case OP_RCP:
>     case OP_RSQ:
> +   case OP_SQRT:
>        emitMUFU();
>        break;
>     case OP_AND:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 1410cf26c87..c47d10896ce 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2752,6 +2752,9 @@ NVC0LoweringPass::handleMOD(Instruction *i)
>  bool
>  NVC0LoweringPass::handleSQRT(Instruction *i)
>  {
> +   if (targ->isOpSupported(OP_SQRT, i->dType))
> +      return true;
> +
>     if (i->dType == TYPE_F64) {
>        Value *pred = bld.getSSA(1, FILE_PREDICATE);
>        Value *zero = bld.loadImm(NULL, 0.0);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> index adbfcc3cfec..b42c01f1d21 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> @@ -57,10 +57,13 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
>     switch (op) {
>     case OP_SAD:
>     case OP_POW:
> -   case OP_SQRT:
>     case OP_DIV:
>     case OP_MOD:
>        return false;
> +   case OP_SQRT:
> +      if (ty == TYPE_F64)
> +         return false;
> +      return chipset >= 0x120;

NVISA_GM200_CHIPSET

>     default:
>        break;
>     }
> @@ -125,6 +128,7 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
>        case OP_RCP:
>        case OP_RSQ:
>        case OP_SIN:
> +      case OP_SQRT:
>           return true;
>        default:
>           break;
> @@ -256,6 +260,7 @@ TargetGM107::getLatency(const Instruction *insn) const
>     case OP_RCP:
>     case OP_RSQ:
>     case OP_SIN:
> +   case OP_SQRT:
>        return 13;
>     default:
>        break;
> @@ -284,6 +289,7 @@ TargetGM107::getReadLatency(const Instruction *insn) const
>     case OP_RSQ:
>     case OP_SAT:
>     case OP_SIN:
> +   case OP_SQRT:
>     case OP_SULDB:
>     case OP_SULDP:
>     case OP_SUREDB:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 7e059235f4c..9304e392361 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -129,6 +129,7 @@ static const struct opProperties _initProps[] =
>     { OP_LG2,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
>     { OP_RCP,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
>     { OP_RSQ,    0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
> +   { OP_SQRT,   0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
>     { OP_DFDX,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
>     { OP_DFDY,   0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
>     { OP_CALL,   0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list