[Nouveau] [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg
Tobias Klausmann
tobias.johannes.klausmann at mni.thm.de
Sat May 9 13:04:43 PDT 2015
On 09.05.2015 07:35, Ilia Mirkin wrote:
> This covers the pattern where a KILL_IF is used, which triggers a
> comparison of -x to 0. This can usually be folded into the comparison whose
> result is being compared to 0, however it may, itself, have already been
> combined with another comparison. That shouldn't impact the logic of
> this pass however. With this and the & 1.0 change, code like
>
> 00000020: 001c0001 80081df4 set b32 $r0 lt f32 $r0 0x3e800000
> 00000028: 001c0000 201fc000 and b32 $r0 $r0 0x3f800000
> 00000030: 7f9c001e dd885c00 set $p0 0x1 lt f32 neg $r0 0x0
> 00000038: 0000003c 19800000 $p0 discard
>
> becomes
>
> 00000020: 001c001d b5881df4 set $p0 0x1 lt f32 $r0 0x3e800000
> 00000028: 0000003c 19800000 $p0 discard
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 51 ++++++++++++++--------
> 1 file changed, 33 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index d8af19a..43a2fe9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -278,7 +278,6 @@ private:
>
> void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
>
> - // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
> CmpInstruction *findOriginForTestWithZero(Value *);
>
> unsigned int foldCount;
> @@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
> return NULL;
> Instruction *insn = value->getInsn();
>
> - while (insn && insn->op != OP_SET) {
> + while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
> + insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
> Instruction *next = NULL;
> switch (insn->op) {
> - case OP_NEG:
> - case OP_ABS:
> - case OP_CVT:
> - next = insn->getSrc(0)->getInsn();
> - if (insn->sType != next->dType)
> - return NULL;
> - break;
> case OP_MOV:
> next = insn->getSrc(0)->getInsn();
> break;
> @@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
>
> case OP_SET: // TODO: SET_AND,OR,XOR
delete this comment?!
> {
> + /* This optimizes the case where the output of a set is being compared
> + * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
> + * can be a lot cleverer in our comparison.
> + */
> CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
> CondCode cc, ccZ;
> - if (i->src(t).mod != Modifier(0))
> - return;
> - if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
> + if (imm0.reg.data.u32 != 0 || !si)
> return;
> cc = si->setCond;
> ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
> + // We do everything assuming var (cmp) 0, reverse the condition if 0 is
> + // first.
> if (s == 0)
> ccZ = reverseCondCode(ccZ);
> + // If there is a negative modifier, we need to undo that, by flipping
> + // the comparison to zero.
> + if (i->src(t).mod.neg())
> + ccZ = reverseCondCode(ccZ);
> + // If this is a signed comparison, we expect the input to be a regular
> + // boolean, i.e. 0/-1. However the rest of the logic assumes that true
> + // is positive, so just flip the sign.
> + if (i->sType == TYPE_S32) {
> + assert(!isFloatType(si->dType));
> + ccZ = reverseCondCode(ccZ);
> + }
can both this and the previous condition evaluate to true? if yes, this
double-flips ccZ...
> switch (ccZ) {
> - case CC_LT: cc = CC_FL; break;
> - case CC_GE: cc = CC_TR; break;
> - case CC_EQ: cc = inverseCondCode(cc); break;
> - case CC_LE: cc = inverseCondCode(cc); break;
> - case CC_GT: break;
> - case CC_NE: break;
> + case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
> + case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
> + case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
> + case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
> + case CC_GT: break; // bool > 0 -- bool
> + case CC_NE: break; // bool != 0 -- bool
> default:
> return;
> }
> +
> + // Update the condition of this SET to be identical to the origin set,
> + // but with the updated condition code. The original SET should get
> + // DCE'd, ideally.
> + i->op = si->op;
> i->asCmp()->setCond = cc;
> i->setSrc(0, si->src(0));
> i->setSrc(1, si->src(1));
> + if (si->srcExists(2))
> + i->setSrc(2, si->src(2));
> i->sType = si->sType;
> }
> break;
More information about the Nouveau
mailing list