[Mesa-dev] [PATCH v2] nv50/ir: optimize ADD(SHL(a, b), c) to SHLADD(a, b, c)

Ilia Mirkin imirkin at alum.mit.edu
Tue Oct 11 21:17:59 UTC 2016


On Tue, Oct 11, 2016 at 5:01 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> total instructions in shared programs :2286901 -> 2284473 (-0.11%)
> total gprs used in shared programs    :335256 -> 335273 (0.01%)
> total local used in shared programs   :31968 -> 31968 (0.00%)
>
>                 local        gpr       inst      bytes
>     helped           0          41         852         852
>       hurt           0          44          23          23
>
> v2: - use visit(Instruction *)
>     - use getUniqueInsn()
>     - use getImmediate()
>     - fix mod for src0
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 87 ++++++++++++++++++++++
>  1 file changed, 87 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 6efb29e..6045e8b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2132,6 +2132,92 @@ AlgebraicOpt::visit(BasicBlock *bb)
>
>  // =============================================================================
>
> +// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
> +class LateAlgebraicOpt : public Pass
> +{
> +private:
> +   virtual bool visit(Instruction *);
> +
> +   void handleADD(Instruction *);
> +   bool tryADDToSHLADD(Instruction *);
> +};
> +
> +void
> +LateAlgebraicOpt::handleADD(Instruction *add)
> +{
> +   Value *src0 = add->getSrc(0);
> +   Value *src1 = add->getSrc(1);
> +
> +   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
> +      return;
> +
> +   if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType))
> +      tryADDToSHLADD(add);
> +}
> +
> +// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
> +bool
> +LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
> +{
> +   Value *src0 = add->getSrc(0);
> +   Value *src1 = add->getSrc(1);
> +   ImmediateValue imm;
> +   Instruction *shl;
> +   Modifier mod[2];
> +   Value *src;
> +   int s;
> +
> +   if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8)
> +      return false;

|| isFloatType(add->dType)

> +
> +   if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL)
> +      s = 0;
> +   else
> +   if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL)
> +      s = 1;
> +   else
> +      return false;
> +
> +   src = add->getSrc(s);
> +   shl = src->getUniqueInsn();
> +
> +   if (shl->bb != add->bb || shl->usesFlags() || shl->subOp)
> +      return false;
> +
> +   if (!shl->src(1).getImmediate(imm))
> +      return false;
> +
> +   mod[0] = add->src(0).mod;
> +   mod[1] = add->src(1).mod;
> +
> +   add->op = OP_SHLADD;
> +   add->setSrc(2, add->src(s ? 0 : 1));

src(!s)

In other places, we do

int t = !s;

To avoid having to do the !s. The convention is that s = immediate
source, so t = "other source".

> +   add->src(2).mod = mod[s];
> +
> +   add->setSrc(0, shl->getSrc(0));
> +   add->src(0).mod = mod[!s];

This mod[] array shouldn't be necessary...

With those minor modifications, this is

Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>

> +   add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
> +   add->src(1).mod = Modifier(0);
> +
> +   return true;
> +}
> +
> +bool
> +LateAlgebraicOpt::visit(Instruction *i)
> +{
> +   switch (i->op) {
> +   case OP_ADD:
> +      handleADD(i);
> +      break;
> +   default:
> +      break;
> +   }
> +
> +   return true;
> +}
> +
> +// =============================================================================
> +
>  static inline void
>  updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
>  {
> @@ -3436,6 +3522,7 @@ Program::optimizeSSA(int level)
>     RUN_PASS(2, AlgebraicOpt, run);
>     RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
>     RUN_PASS(1, ConstantFolding, foldAll);
> +   RUN_PASS(2, LateAlgebraicOpt, run);
>     RUN_PASS(1, LoadPropagation, run);
>     RUN_PASS(1, IndirectPropagation, run);
>     RUN_PASS(2, MemoryOpt, run);
> --
> 2.10.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list