[Mesa-dev] [PATCH v2] nv50/ir: improve performance of signed division by powers of two
Rhys Perry
pendingchaos02 at gmail.com
Sat Jun 9 15:55:59 UTC 2018
Changes in v2:
- Stylistic changes
- Use OP_SLCT instead of OP_SELP which only worked by luck
- Fix issues in edge cases
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 30 +++++++++++++++++++---
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044..d636eb130a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1095,10 +1095,36 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->op = OP_MOV;
i->setSrc(1, NULL);
} else
+ if (imm0.reg.data.s32 == -1) {
+ i->op = OP_NEG;
+ i->setSrc(1, NULL);
+ } else
if (i->dType == TYPE_U32 && imm0.isPow2()) {
i->op = OP_SHR;
i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
} else
+ if (i->dType == TYPE_S32 && util_is_power_of_two_or_zero(llabs(imm0.reg.data.s32))) {
+ Value *a = i->getSrc(0);
+ int64_t absb = llabs(imm0.reg.data.s32);
+
+ Value *sign = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), a, bld.mkImm(31));
+ Value *adjusted = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), a,
+ bld.loadImm(bld.getSSA(), (uint32_t)(absb - 1)));
+
+ Value *selected = bld.getSSA();
+ bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, selected, TYPE_U32, adjusted, a, sign);
+
+ if (imm0.reg.data.s32 < 0) {
+ i->op = OP_NEG;
+ i->setSrc(0, bld.mkOp2v(
+ OP_SHR, TYPE_S32, bld.getSSA(), selected, bld.mkImm(util_logbase2(absb))));
+ i->setSrc(1, NULL);
+ } else {
+ i->op = OP_SHR;
+ i->setSrc(0, selected);
+ i->setSrc(1, bld.mkImm(util_logbase2(absb)));
+ }
+ } else
if (i->dType == TYPE_U32) {
Instruction *mul;
Value *tA, *tB;
@@ -1129,10 +1155,6 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
- } else
- if (imm0.reg.data.s32 == -1) {
- i->op = OP_NEG;
- i->setSrc(1, NULL);
} else {
LValue *tA, *tB;
LValue *tD;
--
2.14.4
More information about the mesa-dev
mailing list