[Mesa-dev] [PATCH v3] nvc0/ir: replace cvt instructions with add to improve shader performance

Karol Herbst kherbst at redhat.com
Mon Jan 28 21:21:34 UTC 2019


gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
gp107.

reduces the amount of generated convert instructions by roughly 30% in
shader-db.

v2: only for 32 bit operations
    move some common code out of the switch
    handle OP_SAT with modifiers
v3: only for registers and const memory
    rework if clauses
    merge isCvt into this patch

Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |  1 +
 .../drivers/nouveau/codegen/nv50_ir_inlines.h | 17 +++++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 64 +++++++++++++++++++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  1 +
 4 files changed, 83 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 8085bb2f542..b00a005bdef 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -867,6 +867,7 @@ public:
    inline bool isPseudo() const { return op < OP_MOV; }
    bool isDead() const;
    bool isNop() const;
+   inline bool isCvt() const;
    bool isCommutationLegal(const Instruction *) const; // must be adjacent !
    bool isActionEqual(const Instruction *) const;
    bool isResultEqual(const Instruction *) const;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
index 4cb53ab42ed..06882058dc9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
@@ -419,4 +419,21 @@ LValue *Function::getLValue(int id)
    return reinterpret_cast<LValue *>(allLValues.get(id));
 }
 
+bool Instruction::isCvt() const
+{
+   switch (op) {
+   case OP_ABS:
+   case OP_CEIL:
+   case OP_FLOOR:
+   case OP_NEG:
+   case OP_TRUNC:
+   case OP_SAT:
+      return true;
+   case OP_CVT:
+      return def(0).getFile() != FILE_PREDICATE && src(0).getFile() != FILE_PREDICATE;
+   default:
+      return false;
+   }
+}
+
 #endif // __NV50_IR_INLINES_H__
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 295497be2f9..28f0aae6432 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -719,6 +719,67 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
    bb->remove(bb->getEntry());
 }
 
+// replaces instructions which would end up as f2f or i2i with faster
+// alternatives:
+//  - abs(a)      -> add(0, abs a)
+//  - fneg(a)     -> fadd(neg 0, neg a)
+//  - ineg(a)     -> iadd(0, neg a)
+//  - fneg(abs a) -> fadd(neg 0, neg abs a)
+//  - ineg(abs a) -> iadd(0, neg abs a)
+//  - sat(a)      -> sat add(0, a)
+void
+NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
+{
+   if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
+      return;
+   if (cvt->sType != cvt->dType)
+      return;
+   // we could make it work, but in this case we have optimizations disabled
+   // and we don't really care either way.
+   if (cvt->src(0).getFile() != FILE_GPR &&
+       cvt->src(0).getFile() != FILE_MEMORY_CONST)
+      return;
+
+   Modifier mod0, mod1;
+
+   switch (cvt->op) {
+   case OP_ABS:
+      if (cvt->src(0).mod)
+         return;
+      if (!isFloatType(cvt->sType))
+         return;
+      mod0 = 0;
+      mod1 = NV50_IR_MOD_ABS;
+      break;
+   case OP_NEG:
+      if (!isFloatType(cvt->sType) && cvt->src(0).mod)
+         return;
+      if (isFloatType(cvt->sType) &&
+          (cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
+         return;
+
+      mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
+      mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS)
+           ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
+      break;
+   case OP_SAT:
+      if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
+         return;
+      mod0 = 0;
+      mod1 = cvt->src(0).mod;
+      cvt->saturate = true;
+      break;
+   default:
+      return;
+   }
+
+   cvt->op = OP_ADD;
+   cvt->moveSources(0, 1);
+   cvt->setSrc(0, rZero);
+   cvt->src(0).mod = mod0;
+   cvt->src(1).mod = mod1;
+}
+
 bool
 NVC0LegalizePostRA::visit(BasicBlock *bb)
 {
@@ -758,6 +819,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
                next = hi;
          }
 
+         if (i->isCvt())
+            replaceCvt(i);
+
          if (i->op != OP_MOV && i->op != OP_PFETCH)
             replaceZero(i);
       }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index e0f50ab0904..4679c56471b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -81,6 +81,7 @@ private:
    virtual bool visit(Function *);
    virtual bool visit(BasicBlock *);
 
+   void replaceCvt(Instruction *);
    void replaceZero(Instruction *);
    bool tryReplaceContWithBra(BasicBlock *);
    void propagateJoin(BasicBlock *);
-- 
2.20.1



More information about the mesa-dev mailing list