[Beignet] [PATCH V2] Add FCMP UNO support.
Yang Rong
rong.r.yang at intel.com
Sun Dec 1 20:50:13 PST 2013
And also correct some UXX compares.
V2: Not use OCL_OPTIMIZE_IMMEDIATE for XOR and ORD compare.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 10 ++++--
backend/src/llvm/llvm_gen_backend.cpp | 54 +++++++++++++++++++++++-------
2 files changed, 48 insertions(+), 16 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index f532158..9f6e615 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1679,14 +1679,17 @@ namespace gbe
SelectionDAG *dag1 = dag.child[1];
// Right source can always be an immediate
- if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+ //logica ops of bool shouldn't use 0xffff, may use flag reg, so can't optimize
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI &&
+ canGetRegisterFromImmediate(dag1->insn) && type != TYPE_BOOL) {
const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
src0 = sel.selReg(insn.getSrc(0), type);
src1 = getRegisterFromImmediate(childInsn.getImmediate());
if (dag0) dag0->isRoot = 1;
}
// Left source cannot be immediate but it is OK if we can commute
- else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag0->insn)) {
+ else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI &&
+ canGetRegisterFromImmediate(dag0->insn) && type != TYPE_BOOL) {
const auto &childInsn = cast<LoadImmInstruction>(dag0->insn);
src0 = sel.selReg(insn.getSrc(1), type);
src1 = getRegisterFromImmediate(childInsn.getImmediate());
@@ -2526,7 +2529,8 @@ namespace gbe
SelectionDAG *dag1 = dag.child[1];
// Right source can always be an immediate
- if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI &&
+ canGetRegisterFromImmediate(dag1->insn) && opcode != OP_ORD) {
const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
src0 = sel.selReg(insn.getSrc(0), type);
Immediate imm = childInsn.getImmediate();
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index c53d674..1316d4b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1606,26 +1606,54 @@ namespace gbe
// Get the element type and the number of elements
Type *operandType = I.getOperand(0)->getType();
const ir::Type type = getType(ctx, operandType);
+ const ir::Type insnType = getType(ctx, I.getType());
// Emit the instructions in a row
const ir::Register dst = this->getRegister(&I);
const ir::Register src0 = this->getRegister(I.getOperand(0));
const ir::Register src1 = this->getRegister(I.getOperand(1));
+ const ir::Register tmp = ctx.reg(getFamily(ctx, I.getType()));
+ Value *cv = ConstantInt::get(I.getType(), 1);
switch (I.getPredicate()) {
- case ICmpInst::FCMP_OEQ:
- case ICmpInst::FCMP_UEQ: ctx.EQ(type, dst, src0, src1); break;
- case ICmpInst::FCMP_ONE:
- case ICmpInst::FCMP_UNE: ctx.NE(type, dst, src0, src1); break;
- case ICmpInst::FCMP_OLE:
- case ICmpInst::FCMP_ULE: ctx.LE(type, dst, src0, src1); break;
- case ICmpInst::FCMP_OGE:
- case ICmpInst::FCMP_UGE: ctx.GE(type, dst, src0, src1); break;
- case ICmpInst::FCMP_OLT:
- case ICmpInst::FCMP_ULT: ctx.LT(type, dst, src0, src1); break;
- case ICmpInst::FCMP_OGT:
- case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
- case ICmpInst::FCMP_ORD: ctx.ORD(type, dst, src0, src0); break;
+ case ICmpInst::FCMP_OEQ: ctx.EQ(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_ONE: ctx.NE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLE: ctx.LE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGE: ctx.GE(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OLT: ctx.LT(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_OGT: ctx.GT(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_ORD: ctx.ORD(type, dst, src0, src1); break;
+ case ICmpInst::FCMP_UNO:
+ ctx.ORD(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv)); //TODO: Use NOT directly
+ break;
+ case ICmpInst::FCMP_UEQ:
+ ctx.NE(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_UGT:
+ ctx.LE(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_UGE:
+ ctx.LT(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_ULT:
+ ctx.GE(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_ULE:
+ ctx.GT(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_UNE:
+ ctx.EQ(type, tmp, src0, src1);
+ ctx.XOR(insnType, dst, tmp, getRegister(cv));
+ break;
+ case ICmpInst::FCMP_TRUE:
+ ctx.MOV(insnType, dst, getRegister(cv));
+ break;
default: NOT_SUPPORTED;
}
}
--
1.8.1.2
More information about the Beignet
mailing list