[Nouveau] [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg

Tobias Klausmann tobias.johannes.klausmann at mni.thm.de
Sat May 9 13:04:43 PDT 2015



On 09.05.2015 07:35, Ilia Mirkin wrote:
> This covers the pattern where a KILL_IF is used, which triggers a
> comparison of -x to 0. This can usually be folded into the comparison whose
> result is being compared to 0, however it may, itself, have already been
> combined with another comparison. That shouldn't impact the logic of
> this pass however. With this and the & 1.0 change, code like
>
> 00000020: 001c0001 80081df4     set b32 $r0 lt f32 $r0 0x3e800000
> 00000028: 001c0000 201fc000     and b32 $r0 $r0 0x3f800000
> 00000030: 7f9c001e dd885c00     set $p0 0x1 lt f32 neg $r0 0x0
> 00000038: 0000003c 19800000     $p0 discard
>
> becomes
>
> 00000020: 001c001d b5881df4     set $p0 0x1 lt f32 $r0 0x3e800000
> 00000028: 0000003c 19800000     $p0 discard
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++++++++++++++--------
>   1 file changed, 33 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index d8af19a..43a2fe9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -278,7 +278,6 @@ private:
>   
>      void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
>   
> -   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
>      CmpInstruction *findOriginForTestWithZero(Value *);
>   
>      unsigned int foldCount;
> @@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
>         return NULL;
>      Instruction *insn = value->getInsn();
>   
> -   while (insn && insn->op != OP_SET) {
> +   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
> +          insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
>         Instruction *next = NULL;
>         switch (insn->op) {
> -      case OP_NEG:
> -      case OP_ABS:
> -      case OP_CVT:
> -         next = insn->getSrc(0)->getInsn();
> -         if (insn->sType != next->dType)
> -            return NULL;
> -         break;
>         case OP_MOV:
>            next = insn->getSrc(0)->getInsn();
>            break;
> @@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>   
>      case OP_SET: // TODO: SET_AND,OR,XOR

delete this comment?!

>      {
> +      /* This optimizes the case where the output of a set is being compared
> +       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
> +       * can be a lot cleverer in our comparison.
> +       */
>         CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
>         CondCode cc, ccZ;
> -      if (i->src(t).mod != Modifier(0))
> -         return;
> -      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
> +      if (imm0.reg.data.u32 != 0 || !si)
>            return;
>         cc = si->setCond;
>         ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
> +      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
> +      // first.
>         if (s == 0)
>            ccZ = reverseCondCode(ccZ);
> +      // If there is a negative modifier, we need to undo that, by flipping
> +      // the comparison to zero.
> +      if (i->src(t).mod.neg())
> +         ccZ = reverseCondCode(ccZ);
> +      // If this is a signed comparison, we expect the input to be a regular
> +      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
> +      // is positive, so just flip the sign.
> +      if (i->sType == TYPE_S32) {
> +         assert(!isFloatType(si->dType));
> +         ccZ = reverseCondCode(ccZ);
> +      }

can both this and the previous condition evaluate to true? if yes, this 
double-flips ccZ...

>         switch (ccZ) {
> -      case CC_LT: cc = CC_FL; break;
> -      case CC_GE: cc = CC_TR; break;
> -      case CC_EQ: cc = inverseCondCode(cc); break;
> -      case CC_LE: cc = inverseCondCode(cc); break;
> -      case CC_GT: break;
> -      case CC_NE: break;
> +      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
> +      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
> +      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
> +      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
> +      case CC_GT: break; // bool > 0 -- bool
> +      case CC_NE: break; // bool != 0 -- bool
>         default:
>            return;
>         }
> +
> +      // Update the condition of this SET to be identical to the origin set,
> +      // but with the updated condition code. The original SET should get
> +      // DCE'd, ideally.
> +      i->op = si->op;
>         i->asCmp()->setCond = cc;
>         i->setSrc(0, si->src(0));
>         i->setSrc(1, si->src(1));
> +      if (si->srcExists(2))
> +         i->setSrc(2, si->src(2));
>         i->sType = si->sType;
>      }
>         break;



More information about the Nouveau mailing list