[Mesa-dev] [PATCH v3] nvc0/ir: replace cvt instructions with add to improve shader performance

Ilia Mirkin imirkin at alum.mit.edu
Mon Jan 28 21:28:56 UTC 2019


On Mon, Jan 28, 2019 at 4:21 PM Karol Herbst <kherbst at redhat.com> wrote:
>
> gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
> gp107.
>
> reduces the amount of generated convert instructions by roughly 30% in
> shader-db.
>
> v2: only for 32 bit operations
>     move some common code out of the switch
>     handle OP_SAT with modifiers
> v3: only for registers and const memory
>     rework if clauses
>     merge isCvt into this patch
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h |  1 +
>  .../drivers/nouveau/codegen/nv50_ir_inlines.h | 17 +++++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  1 +
>  4 files changed, 83 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 8085bb2f542..b00a005bdef 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -867,6 +867,7 @@ public:
>     inline bool isPseudo() const { return op < OP_MOV; }
>     bool isDead() const;
>     bool isNop() const;
> +   inline bool isCvt() const;
>     bool isCommutationLegal(const Instruction *) const; // must be adjacent !
>     bool isActionEqual(const Instruction *) const;
>     bool isResultEqual(const Instruction *) const;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> index 4cb53ab42ed..06882058dc9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> @@ -419,4 +419,21 @@ LValue *Function::getLValue(int id)
>     return reinterpret_cast<LValue *>(allLValues.get(id));
>  }
>
> +bool Instruction::isCvt() const
> +{
> +   switch (op) {
> +   case OP_ABS:
> +   case OP_CEIL:
> +   case OP_FLOOR:
> +   case OP_NEG:
> +   case OP_TRUNC:
> +   case OP_SAT:
> +      return true;
> +   case OP_CVT:
> +      return def(0).getFile() != FILE_PREDICATE && src(0).getFile() != FILE_PREDICATE;

Maybe better to whitelist the things you like? E.g. FILE_FLAGS is
probably also not appropriate for this?

And I know this is a left-over from your earlier change to print this
out more generically, however I'd just fold this into lowering_nvc0
somewhere. (Is it needed at all? The current logic should reject
anythign it doesn't like anyways...)

> +   default:
> +      return false;
> +   }
> +}
> +
>  #endif // __NV50_IR_INLINES_H__
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 295497be2f9..28f0aae6432 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
>     bb->remove(bb->getEntry());
>  }
>
> +// replaces instructions which would end up as f2f or i2i with faster
> +// alternatives:
> +//  - abs(a)      -> add(0, abs a)
> +//  - fneg(a)     -> fadd(neg 0, neg a)
> +//  - ineg(a)     -> iadd(0, neg a)
> +//  - fneg(abs a) -> fadd(neg 0, neg abs a)
> +//  - ineg(abs a) -> iadd(0, neg abs a)
> +//  - sat(a)      -> sat add(0, a)
> +void
> +NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
> +{
> +   if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
> +      return;
> +   if (cvt->sType != cvt->dType)
> +      return;
> +   // we could make it work, but in this case we have optimizations disabled
> +   // and we don't really care either way.
> +   if (cvt->src(0).getFile() != FILE_GPR &&
> +       cvt->src(0).getFile() != FILE_MEMORY_CONST)
> +      return;
> +
> +   Modifier mod0, mod1;
> +
> +   switch (cvt->op) {
> +   case OP_ABS:
> +      if (cvt->src(0).mod)
> +         return;
> +      if (!isFloatType(cvt->sType))
> +         return;
> +      mod0 = 0;
> +      mod1 = NV50_IR_MOD_ABS;
> +      break;
> +   case OP_NEG:
> +      if (!isFloatType(cvt->sType) && cvt->src(0).mod)
> +         return;
> +      if (isFloatType(cvt->sType) &&
> +          (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
> +         return;
> +
> +      mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
> +      mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS)
> +           ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;

I'd much prefer the ? on the previous line.

> +      break;
> +   case OP_SAT:
> +      if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
> +         return;
> +      mod0 = 0;
> +      mod1 = cvt->src(0).mod;
> +      cvt->saturate = true;
> +      break;
> +   default:
> +      return;
> +   }
> +
> +   cvt->op = OP_ADD;
> +   cvt->moveSources(0, 1);
> +   cvt->setSrc(0, rZero);
> +   cvt->src(0).mod = mod0;
> +   cvt->src(1).mod = mod1;
> +}
> +
>  bool
>  NVC0LegalizePostRA::visit(BasicBlock *bb)
>  {
> @@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
>                 next = hi;
>           }
>
> +         if (i->isCvt())
> +            replaceCvt(i);
> +
>           if (i->op != OP_MOV && i->op != OP_PFETCH)
>              replaceZero(i);
>        }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index e0f50ab0904..4679c56471b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -81,6 +81,7 @@ private:
>     virtual bool visit(Function *);
>     virtual bool visit(BasicBlock *);
>
> +   void replaceCvt(Instruction *);
>     void replaceZero(Instruction *);
>     bool tryReplaceContWithBra(BasicBlock *);
>     void propagateJoin(BasicBlock *);
> --
> 2.20.1
>


More information about the mesa-dev mailing list