[Mesa-dev] [PATCH] nv50/ir: Improve performance of signed division by powers of two
Rhys Perry
pendingchaos02 at gmail.com
Fri Jun 8 22:03:38 UTC 2018
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 29 +++++++++++++++++++---
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044..7a18a5fe73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1095,10 +1095,35 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->op = OP_MOV;
i->setSrc(1, NULL);
} else
+ if (imm0.reg.data.s32 == -1) {
+ i->op = OP_NEG;
+ i->setSrc(1, NULL);
+ } else
if (i->dType == TYPE_U32 && imm0.isPow2()) {
i->op = OP_SHR;
i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
} else
+ if (i->dType == TYPE_S32 && util_is_power_of_two_or_zero(abs(imm0.reg.data.s32))) {
+ Value *a = i->getSrc(0);
+ int32_t b = imm0.reg.data.s32;
+
+ if (b < 0) {
+ a = bld.getSSA();
+ bld.mkOp1(OP_NEG, TYPE_S32, a, i->getSrc(0));
+ b = -b;
+ }
+
+ Value *sign = bld.getSSA();
+ Value *tmp0 = bld.getSSA();
+ Value *tmp1 = bld.getSSA();
+ bld.mkOp2(OP_SHR, TYPE_U32, sign, a, bld.mkImm(31));
+ bld.mkOp2(OP_ADD, TYPE_U32, tmp0, a, bld.mkImm(b - 1));
+ bld.mkOp3(OP_SELP, TYPE_U32, tmp1, tmp0, a, sign);
+
+ i->op = OP_SHR;
+ i->setSrc(0, tmp1);
+ i->setSrc(1, bld.mkImm(util_logbase2(b)));
+ } else
if (i->dType == TYPE_U32) {
Instruction *mul;
Value *tA, *tB;
@@ -1129,10 +1154,6 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
- } else
- if (imm0.reg.data.s32 == -1) {
- i->op = OP_NEG;
- i->setSrc(1, NULL);
} else {
LValue *tA, *tB;
LValue *tD;
--
2.14.4
More information about the mesa-dev
mailing list