[Mesa-dev] [PATCH v2] nv50/ir: improve performance of signed division by powers of two

Rhys Perry pendingchaos02 at gmail.com
Sat Jun 9 15:55:59 UTC 2018


Changes in v2:
- Stylistic changes
- Use OP_SLCT instead of OP_SELP which only worked by luck
- Fix issues in edge cases

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 30 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044..d636eb130a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1095,10 +1095,36 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
          i->op = OP_MOV;
          i->setSrc(1, NULL);
       } else
+      if (imm0.reg.data.s32 == -1) {
+         i->op = OP_NEG;
+         i->setSrc(1, NULL);
+      } else
       if (i->dType == TYPE_U32 && imm0.isPow2()) {
          i->op = OP_SHR;
          i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
       } else
+      if (i->dType == TYPE_S32 && util_is_power_of_two_or_zero(llabs(imm0.reg.data.s32))) {
+         Value *a = i->getSrc(0);
+         int64_t absb = llabs(imm0.reg.data.s32);
+
+         Value *sign = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), a, bld.mkImm(31));
+         Value *adjusted = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), a,
+                                      bld.loadImm(bld.getSSA(), (uint32_t)(absb - 1)));
+
+         Value *selected = bld.getSSA();
+         bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, selected, TYPE_U32, adjusted, a, sign);
+
+         if (imm0.reg.data.s32 < 0) {
+            i->op = OP_NEG;
+            i->setSrc(0, bld.mkOp2v(
+               OP_SHR, TYPE_S32, bld.getSSA(), selected, bld.mkImm(util_logbase2(absb))));
+            i->setSrc(1, NULL);
+         } else {
+            i->op = OP_SHR;
+            i->setSrc(0, selected);
+            i->setSrc(1, bld.mkImm(util_logbase2(absb)));
+         }
+      } else
       if (i->dType == TYPE_U32) {
          Instruction *mul;
          Value *tA, *tB;
@@ -1129,10 +1155,6 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
             bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
 
          delete_Instruction(prog, i);
-      } else
-      if (imm0.reg.data.s32 == -1) {
-         i->op = OP_NEG;
-         i->setSrc(1, NULL);
       } else {
          LValue *tA, *tB;
          LValue *tD;
-- 
2.14.4



More information about the mesa-dev mailing list