[Mesa-dev] [PATCH 3/3] nv50, nvc0: handle SQRT lowering inside the driver

Samuel Pitoiset samuel.pitoiset at gmail.com
Sun Mar 13 11:52:31 UTC 2016



On 03/13/2016 04:07 AM, Ilia Mirkin wrote:
> First off, st/mesa lowers DSQRT incorrectly (it uses CMP to attempt to
> find out whether the input is less than 0). Secondly the current
> approach (x * rsq(x)) behaves poorly for x = inf - a NaN is produced
> instead of inf.
>

When I had a look at this precision thing, this seemed a bit weird to me 
but I did not investigate much of time on it.

> Instead we switch to the less accurate rcp(rsq(x)) method - this behaves
> nicely for all valid inputs. We still don't do this for DSQRT since the
> RSQ/RCP ops are *really* inaccurate, and don't even have Newton-Raphson
> steps right now. Eventually we should have a separate library function
> for DSQRT that does it more precisely (and perhaps move this lowering to
> the post-opt phase).
>
> This fixes a number of dEQP precision tests that were expecting better
> behavior for infinite inputs.

Yep, this fixes distance, length, refract and sqrt deqp precision tests 
(37 tests). But they are still some fails with atan2, idexp and tanh.
Are you going to fix them too?

Except that you forgot to remove unused variables:
codegen/nv50_ir_lowering_nvc0.cpp: In member function ‘bool 
nv50_ir::NVC0LoweringPass::handleSQRT(nv50_ir::Instruction*)’:
codegen/nv50_ir_lowering_nvc0.cpp:1785:20: warning: unused variable 
‘mov’ [-Wunused-variable]
        Instruction *mov, *rsq;
                     ^
codegen/nv50_ir_lowering_nvc0.cpp:1785:26: warning: unused variable 
‘rsq’ [-Wunused-variable]
        Instruction *mov, *rsq;
                           ^
   CC       nvc0/nvc0_surface.lo

This patch is:

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>   .../drivers/nouveau/codegen/nv50_ir_build_util.cpp |  6 +++-
>   .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  2 ++
>   .../nouveau/codegen/nv50_ir_lowering_nv50.cpp      |  7 ++---
>   .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 32 +++++++++++-----------
>   src/gallium/drivers/nouveau/nv50/nv50_screen.c     |  2 +-
>   src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 +-
>   6 files changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> index f58cf97..84ebfdb 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> @@ -585,6 +585,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
>            return NULL;
>         srcNr = 2;
>         break;
> +   case OP_SELP: srcNr = 3; break;
>      default:
>         // TODO when needed
>         return NULL;
> @@ -601,7 +602,10 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
>
>      for (int s = 0; s < srcNr; ++s) {
>         if (lo->getSrc(s)->reg.size < 8) {
> -         hi->setSrc(s, zero);
> +         if (s == 2)
> +            hi->setSrc(s, lo->getSrc(s));
> +         else
> +            hi->setSrc(s, zero);
>         } else {
>            if (lo->getSrc(s)->refCount() > 1)
>               lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index b06d86a..d284446 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -616,6 +616,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
>
>      NV50_IR_OPCODE_CASE(RCP, RCP);
>      NV50_IR_OPCODE_CASE(RSQ, RSQ);
> +   NV50_IR_OPCODE_CASE(SQRT, SQRT);
>
>      NV50_IR_OPCODE_CASE(MUL, MUL);
>      NV50_IR_OPCODE_CASE(ADD, ADD);
> @@ -2689,6 +2690,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
>      case TGSI_OPCODE_FLR:
>      case TGSI_OPCODE_TRUNC:
>      case TGSI_OPCODE_RCP:
> +   case TGSI_OPCODE_SQRT:
>      case TGSI_OPCODE_IABS:
>      case TGSI_OPCODE_INEG:
>      case TGSI_OPCODE_NOT:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> index 8752b0c..12c5f69 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> @@ -1203,10 +1203,9 @@ NV50LoweringPreSSA::handleDIV(Instruction *i)
>   bool
>   NV50LoweringPreSSA::handleSQRT(Instruction *i)
>   {
> -   Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
> -                                bld.getSSA(), i->getSrc(0));
> -   i->op = OP_MUL;
> -   i->setSrc(1, rsq->getDef(0));
> +   bld.setPosition(i, true);
> +   i->op = OP_RSQ;
> +   bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
>
>      return true;
>   }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index d181f15..29b77c9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1778,22 +1778,22 @@ NVC0LoweringPass::handleMOD(Instruction *i)
>   bool
>   NVC0LoweringPass::handleSQRT(Instruction *i)
>   {
> -   Value *pred = bld.getSSA(1, FILE_PREDICATE);
> -   Value *zero = bld.getSSA();
> -   Instruction *rsq;
> -
> -   bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
> -   if (i->dType == TYPE_F64)
> -      zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
> -   bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
> -   bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
> -   rsq = bld.mkOp1(OP_RSQ, i->dType,
> -                   bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
> -   rsq->setPredicate(CC_NOT_P, pred);
> -   i->op = OP_MUL;
> -   i->setSrc(1, rsq->getDef(0));
> -   i->setPredicate(CC_NOT_P, pred);
> -
> +   if (i->dType == TYPE_F64) {
> +      Value *pred = bld.getSSA(1, FILE_PREDICATE);
> +      Value *zero = bld.loadImm(NULL, 0.0d);
> +      Value *dst = bld.getSSA(8);
> +      Instruction *mov, *rsq;
> +      bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
> +      bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
> +      bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred);
> +      i->op = OP_MUL;
> +      i->setSrc(1, dst);
> +      // TODO: Handle this properly with a library function
> +   } else {
> +      bld.setPosition(i, true);
> +      i->op = OP_RSQ;
> +      bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
> +   }
>
>      return true;
>   }
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> index 28e0ed3..5836bb2 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> @@ -305,7 +305,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>      case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
>         return 1;
>      case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> -      return 0;
> +      return 1;
>      case PIPE_SHADER_CAP_SUBROUTINES:
>         return 0; /* please inline, or provide function declarations */
>      case PIPE_SHADER_CAP_INTEGERS:
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 30afdf2f0..3c5b1da 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -328,7 +328,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>      case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
>         return 1;
>      case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> -      return 0;
> +      return 1;
>      case PIPE_SHADER_CAP_SUBROUTINES:
>         return 1;
>      case PIPE_SHADER_CAP_INTEGERS:
>


More information about the mesa-dev mailing list