[Beignet] [PATCH V3] backend: refine fdiv to rcp at some cases
rander.wang
rander.wang at intel.com
Tue Jul 4 07:20:08 UTC 2017
when the src0 of fdiv is a immedia value and it is
exactly pow of 2, like 2.0f, 4.0f, 1.0/8.0f,
fdiv %0, imm, %1 can be convert to
rcp %0, %1
mul %0, %0, imm.
for fdiv cost 8cycle, rcp 4cycle. it will save at least
3cycle.
pass the conformance test and utests
V2: refine negation flag
V3: modify negation by negate
Signed-off-by: rander.wang <rander.wang at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 7498f38..c89a83e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3279,6 +3279,34 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.MATH(dst, function, src0, src1);
} else if(type == TYPE_FLOAT) {
GBE_ASSERT(op != OP_REM);
+ SelectionDAG *child0 = dag.child[0];
+ if (child0 && child0->insn.getOpcode() == OP_LOADI) {
+ const auto &loadimm = cast<LoadImmInstruction>(child0->insn);
+ const Immediate imm = loadimm.getImmediate();
+ float immVal = imm.getFloatValue();
+ int* dwPtr = (int*)&immVal;
+
+ //if immedia is a exactly pow of 2, it can be converted to RCP
+ if((*dwPtr & 0x7FFFFF) == 0) {
+ if(immVal == -1.0f)
+ {
+ GenRegister tmp = GenRegister::negate(src1);
+ sel.MATH(dst, GEN_MATH_FUNCTION_INV, tmp);
+ }
+ else {
+ sel.MATH(dst, GEN_MATH_FUNCTION_INV, src1);
+ if(immVal != 1.0f) {
+ GenRegister isrc = GenRegister::immf(immVal);
+ sel.MUL(dst, dst, isrc);
+ }
+ }
+
+ if(dag.child[1])
+ dag.child[1]->isRoot = 1;
+ return true;
+ }
+ }
+
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
GenRegister tmp[15];
--
2.7.4
More information about the Beignet
mailing list