Mesa (master): nv50/ir/opt: try to convert ABS(SUB) to SAD

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Sun Apr 29 16:06:52 UTC 2012


Module: Mesa
Branch: master
Commit: 1f4c154f0253ed8fb448402532cfa670f74e69cd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f4c154f0253ed8fb448402532cfa670f74e69cd

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Sat Apr 28 17:06:59 2012 +0200

nv50/ir/opt: try to convert ABS(SUB) to SAD

---

 src/gallium/drivers/nv50/codegen/nv50_ir.cpp       |   25 +++++
 src/gallium/drivers/nv50/codegen/nv50_ir.h         |    1 +
 .../drivers/nv50/codegen/nv50_ir_emit_nv50.cpp     |   33 ++++++
 src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h |   11 ++
 .../drivers/nv50/codegen/nv50_ir_peephole.cpp      |  107 +++++++++++++++++---
 .../drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp     |   16 +++
 .../drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp   |    2 +-
 7 files changed, 179 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
index 1006985..335e9e0 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp
@@ -658,6 +658,31 @@ Instruction::swapSources(int a, int b)
    srcs[b].mod = m;
 }
 
+// TODO: extend for delta < 0
+void
+Instruction::moveSources(int s, int delta)
+{
+   if (delta == 0)
+      return;
+   assert(delta > 0);
+
+   int k;
+   for (k = 0; srcExists(k); ++k) {
+      for (int i = 0; i < 2; ++i) {
+         if (src(k).indirect[i] >= s)
+            src(k).indirect[i] += delta;
+      }
+   }
+   if (predSrc >= s)
+      predSrc += delta;
+   if (flagsSrc >= s)
+      flagsSrc += delta;
+
+   --k;
+   for (int p = k + delta; k >= s; --k, --p)
+      setSrc(p, src(k));
+}
+
 void
 Instruction::takeExtraSources(int s, Value *values[3])
 {
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index e544d07..9b47e3e 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -603,6 +603,7 @@ public:
    void setSrc(int s, Value *);
    void setSrc(int s, const ValueRef&);
    void swapSources(int a, int b);
+   void moveSources(int s, int delta); // NOTE: only delta > 0 implemented
    bool setIndirect(int s, int dim, Value *);
 
    inline ValueRef& src(int s) { return srcs[s]; }
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
index c534d4a..7542b84 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp
@@ -99,6 +99,7 @@ private:
    void emitFMUL(const Instruction *);
    void emitFMAD(const Instruction *);
    void emitIMAD(const Instruction *);
+   void emitISAD(const Instruction *);
 
    void emitMINMAX(const Instruction *);
 
@@ -1023,6 +1024,35 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
 }
 
 void
+CodeEmitterNV50::emitISAD(const Instruction *i)
+{
+   if (i->encSize == 8) {
+      code[0] = 0x50000000;
+      switch (i->sType) {
+      case TYPE_U32: code[1] = 0x04000000; break;
+      case TYPE_S32: code[1] = 0x0c000000; break;
+      case TYPE_U16: code[1] = 0x00000000; break;
+      case TYPE_S16: code[1] = 0x08000000; break;
+      default:
+         assert(0);
+         break;
+      }
+      emitForm_MAD(i);
+   } else {
+      switch (i->sType) {
+      case TYPE_U32: code[0] = 0x50008000; break;
+      case TYPE_S32: code[0] = 0x50008100; break;
+      case TYPE_U16: code[0] = 0x50000000; break;
+      case TYPE_S16: code[0] = 0x50000100; break;
+      default:
+         assert(0);
+         break;
+      }
+      emitForm_MUL(i);
+   }
+}
+
+void
 CodeEmitterNV50::emitSET(const Instruction *i)
 {
    code[0] = 0x30000000;
@@ -1543,6 +1573,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
       else
          emitIMAD(insn);
       break;
+   case OP_SAD:
+      emitISAD(insn);
+      break;
    case OP_NOT:
       emitNOT(insn);
       break;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 93e502e..b62431f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -114,6 +114,17 @@ static inline bool isSignedType(DataType ty)
    }
 }
 
+static inline DataType intTypeToSigned(DataType ty)
+{
+   switch (ty) {
+   case TYPE_U32: return TYPE_S32;
+   case TYPE_U16: return TYPE_S16;
+   case TYPE_U8: return TYPE_S8;
+   default:
+      return ty;
+   }
+}
+
 const ValueRef *ValueRef::getIndirect(int dim) const
 {
    return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
index 5bc3a45..8613d7f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -915,57 +915,129 @@ class AlgebraicOpt : public Pass
 private:
    virtual bool visit(BasicBlock *);
 
-   void handleADD(Instruction *);
+   void handleABS(Instruction *);
+   bool handleADD(Instruction *);
+   bool tryADDToMADOrSAD(Instruction *, operation toOp);
    void handleMINMAX(Instruction *);
    void handleRCP(Instruction *);
    void handleSLCT(Instruction *);
    void handleLOGOP(Instruction *);
    void handleCVT(Instruction *);
+
+   BuildUtil bld;
 };
 
 void
+AlgebraicOpt::handleABS(Instruction *abs)
+{
+   Instruction *sub = abs->getSrc(0)->getInsn();
+   DataType ty;
+   if (!sub ||
+       !prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
+      return;
+   // expect not to have mods yet, if we do, bail
+   if (sub->src(0).mod || sub->src(1).mod)
+      return;
+   // hidden conversion ?
+   ty = intTypeToSigned(sub->dType);
+   if (abs->dType != abs->sType || ty != abs->sType)
+      return;
+
+   if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
+       sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
+       sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
+         return;
+
+   Value *src0 = sub->getSrc(0);
+   Value *src1 = sub->getSrc(1);
+
+   if (sub->op == OP_ADD) {
+      Instruction *neg = sub->getSrc(1)->getInsn();
+      if (neg && neg->op != OP_NEG) {
+         neg = sub->getSrc(0)->getInsn();
+         src0 = sub->getSrc(1);
+      }
+      if (!neg || neg->op != OP_NEG ||
+          neg->dType != neg->sType || neg->sType != ty)
+         return;
+      src1 = neg->getSrc(0);
+   }
+
+   // found ABS(SUB))
+   abs->moveSources(1, 2); // move sources >=1 up by 2
+   abs->op = OP_SAD;
+   abs->setType(sub->dType);
+   abs->setSrc(0, src0);
+   abs->setSrc(1, src1);
+   bld.setPosition(abs, false);
+   abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
+}
+
+bool
 AlgebraicOpt::handleADD(Instruction *add)
 {
    Value *src0 = add->getSrc(0);
    Value *src1 = add->getSrc(1);
+
+   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
+      return false;
+
+   bool changed = false;
+   if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
+      changed = tryADDToMADOrSAD(add, OP_MAD);
+   if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
+      changed = tryADDToMADOrSAD(add, OP_SAD);
+   return changed;
+}
+
+// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
+// ADD(MUL(a,b), c) -> MAD(a,b,c)
+bool
+AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
+{
+   Value *src0 = add->getSrc(0);
+   Value *src1 = add->getSrc(1);
    Value *src;
    int s;
+   const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
+   const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
    Modifier mod[4];
 
-   if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType))
-      return;
-
-   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
-      return;
-
    if (src0->refCount() == 1 &&
-       src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL)
+       src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
       s = 0;
    else
    if (src1->refCount() == 1 &&
-       src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL)
+       src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
       s = 1;
    else
-      return;
+      return false;
 
    if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
        (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
-      return;
+      return false;
 
    src = add->getSrc(s);
 
    if (src->getInsn()->postFactor)
-      return;
+      return false;
+   if (toOp == OP_SAD) {
+      ImmediateValue imm;
+      if (!src->getInsn()->src(2).getImmediate(imm))
+         return false;
+      if (!imm.isInteger(0))
+         return false;
+   }
 
    mod[0] = add->src(0).mod;
    mod[1] = add->src(1).mod;
    mod[2] = src->getUniqueInsn()->src(0).mod;
    mod[3] = src->getUniqueInsn()->src(1).mod;
 
-   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG))
-      return;
+   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
+      return false;
 
-   add->op = OP_MAD;
+   add->op = toOp;
    add->subOp = src->getInsn()->subOp; // potentially mul-high
 
    add->setSrc(2, add->src(s ? 0 : 1));
@@ -974,6 +1046,8 @@ AlgebraicOpt::handleADD(Instruction *add)
    add->src(0).mod = mod[2] ^ mod[s];
    add->setSrc(1, src->getInsn()->getSrc(1));
    add->src(1).mod = mod[3];
+
+   return true;
 }
 
 void
@@ -1140,6 +1214,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
    for (Instruction *i = bb->getEntry(); i; i = next) {
       next = i->next;
       switch (i->op) {
+      case OP_ABS:
+         handleABS(i);
+         break;
       case OP_ADD:
          handleADD(i);
          break;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
index 026a6a0..2ca4979 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -87,6 +87,7 @@ private:
    void emitUMUL(const Instruction *);
    void emitFMUL(const Instruction *);
    void emitIMAD(const Instruction *);
+   void emitISAD(const Instruction *);
    void emitFMAD(const Instruction *);
 
    void emitNOT(Instruction *);
@@ -621,6 +622,18 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitISAD(const Instruction *i)
+{
+   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
+   assert(i->encSize == 8);
+
+   emitForm_A(i, HEX64(38000000, 00000003));
+
+   if (i->dType == TYPE_S32)
+      code[0] |= 1 << 5;
+}
+
+void
 CodeEmitterNVC0::emitNOT(Instruction *i)
 {
    assert(i->encSize == 8);
@@ -1608,6 +1621,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
       else
          emitIMAD(insn);
       break;
+   case OP_SAD:
+      emitISAD(insn);
+      break;
    case OP_NOT:
       emitNOT(insn);
       break;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
index ffa40dd..10c2d09 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -456,7 +456,7 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
 {
    if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
       return false;
-   if (op == OP_SAD && ty != TYPE_S32)
+   if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
       return false;
    if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
       return false;




More information about the mesa-commit mailing list