[Mesa-dev] [PATCH 2/2] nvc0/ir: replace cvt instructions with add to improve shader performance

Ilia Mirkin imirkin at alum.mit.edu
Thu Dec 13 23:27:14 UTC 2018


On Thu, Dec 13, 2018 at 6:14 PM Karol Herbst <kherbst at redhat.com> wrote:
>
> gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
> gp107.
>
> changes in shader-db:
>
> total instructions in shared programs     : 7614782 -> 7614782 (0.00%)
> total cvt instructions in shared programs : 139343 -> 95856 (-31.21%)
> total gprs used in shared programs        : 798045 -> 798045 (0.00%)
> total shared used in shared programs      : 639636 -> 639636 (0.00%)
> total local used in shared programs       : 24648 -> 24648 (0.00%)
> total bytes used in shared programs       : 81330696 -> 81330696 (0.00%)
>
>                 local     shared        gpr       inst       cvts      bytes
>     helped          0          0          0          0      14037          0
>       hurt          0          0          0          0          0          0
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 55 +++++++++++++++++++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  1 +
>  2 files changed, 56 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 295497be2f9..267fef0898d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -719,6 +719,58 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
>     bb->remove(bb->getEntry());
>  }
>
> +// replaces instructions which would end up as f2f or i2i with faster
> +// alternatives:
> +//  - abs(a)     -> add(0, abs a)
> +//  - neg(a)     -> add(0, neg a)
> +//  - neg(abs a) -> add(0, neg abs a)
> +//  - sat(a)     -> sat add(0, a)
> +void
> +NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
> +{
> +   if (typeSizeof(cvt->sType) == 8)

!= 4

Otherwise you're just asking for trouble.

> +      return;
> +   if (cvt->sType != cvt->dType)
> +      return;
> +   // we could make it work, but in this case we have optimizations disabled
> +   // and we don't really care either way.
> +   if (cvt->src(0).getFile() == FILE_IMMEDIATE)
> +      return;
> +
> +   switch (cvt->op) {
> +   case OP_ABS:
> +      if (cvt->src(0).mod)
> +         break;
> +      if (!isFloatType(cvt->sType))

Why? What's wrong with IABS -> IADD? Similar question for OP_NEG.

> +         break;
> +      cvt->op = OP_ADD;
> +      cvt->moveSources(0, 1);

These two lines, along with moveSources are always repeated. Would be
nice to centralize this somehow.

> +      cvt->src(1).mod = NV50_IR_MOD_ABS;
> +      cvt->setSrc(0, rZero);
> +      cvt->src(0).mod = 0;
> +      break;
> +   case OP_NEG:
> +      if (cvt->src(0).mod && (cvt->src(0).mod.neg() || !isFloatType(cvt->sType)))
> +         break;
> +      cvt->op = OP_ADD;
> +      cvt->moveSources(0, 1);
> +      cvt->src(1).mod = cvt->src(0).mod ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
> +      cvt->setSrc(0, rZero);
> +      cvt->src(0).mod = 0;
> +      break;
> +   case OP_SAT:
> +      if (cvt->src(0).mod)
> +         break;
> +      cvt->op = OP_ADD;
> +      cvt->moveSources(0, 1);
> +      cvt->setSrc(0, rZero);
> +      cvt->saturate = true;
> +      break;
> +   default:
> +      break;
> +   }
> +}
> +
>  bool
>  NVC0LegalizePostRA::visit(BasicBlock *bb)
>  {
> @@ -758,6 +810,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
>                 next = hi;
>           }
>
> +         if (i->isCvt())
> +            replaceCvt(i);
> +
>           if (i->op != OP_MOV && i->op != OP_PFETCH)
>              replaceZero(i);
>        }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index e0f50ab0904..4679c56471b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -81,6 +81,7 @@ private:
>     virtual bool visit(Function *);
>     virtual bool visit(BasicBlock *);
>
> +   void replaceCvt(Instruction *);
>     void replaceZero(Instruction *);
>     bool tryReplaceContWithBra(BasicBlock *);
>     void propagateJoin(BasicBlock *);
> --
> 2.19.2
>


More information about the mesa-dev mailing list