[Beignet] [PATCH 5/5] GBE: enable uniform analysis for bool data type.
Zhigang Gong
zhigang.gong at intel.com
Thu May 8 00:57:33 PDT 2014
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 25 ++++++++++++---
backend/src/backend/gen_reg_allocation.cpp | 51 ++++++++++++++++++++++++------
backend/src/ir/liveness.cpp | 3 +-
3 files changed, 63 insertions(+), 16 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ee896c5..62e000c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1958,8 +1958,6 @@ namespace gbe
return true;
}
- //printf("reg = %d isscalarorbool %d \n", insn.getDst(0), sel.isScalarOrBool(insn.getDst(0)));
-
// Look for immediate values
GenRegister src0, src1;
SelectionDAG *dag0 = dag.child[0];
@@ -1990,8 +1988,7 @@ namespace gbe
}
// Output the binary instruction
- if (sel.curr.execWidth != 1 &&
- sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) {
+ if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) {
GBE_ASSERT(insn.getOpcode() == OP_AND ||
insn.getOpcode() == OP_OR ||
insn.getOpcode() == OP_XOR);
@@ -2626,7 +2623,7 @@ namespace gbe
GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD, simdWidth == 1));
sel.push();
- if (sel.isScalarOrBool(addr.reg())) {
+ if (simdWidth == 1) {
sel.curr.noMask = 1;
sel.curr.execWidth = 1;
}
@@ -2913,6 +2910,7 @@ namespace gbe
tmpDst = GenRegister::nullud();
else
tmpDst = sel.selReg(dst, TYPE_BOOL);
+
// Look for immediate values for the right source
GenRegister src0, src1;
SelectionDAG *dag0 = dag.child[0];
@@ -2935,6 +2933,8 @@ namespace gbe
}
sel.push();
+ if (sel.isScalarOrBool(dst))
+ sel.curr.noMask = 1;
sel.curr.physicalFlag = 0;
sel.curr.modFlag = 1;
sel.curr.flagIndex = (uint16_t)dst;
@@ -2957,6 +2957,15 @@ namespace gbe
type == TYPE_DOUBLE || type == TYPE_FLOAT ||
type == TYPE_U32 || type == TYPE_S32))
sel.curr.flagGen = 1;
+ else if (sel.isScalarOrBool(dst)) {
+ // If the dest reg is a scalar bool, we can't set it as
+ // dst register, as the execution width is still 8 or 16.
+ // Instead, we set the needStoreBool to flagGen, and change
+ // the dst to null register. And let the flag reg allocation
+ // function to generate the flag grf on demand correctly latter.
+ sel.curr.flagGen = needStoreBool;
+ tmpDst = GenRegister::nullud();
+ }
sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
}
sel.pop();
@@ -3195,6 +3204,12 @@ namespace gbe
const Register pred = insn.getPredicate();
sel.push();
+ if (sel.isScalarOrBool(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = (uint16_t) pred;
sel.curr.predicate = GEN_PREDICATE_NORMAL;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 718f618..ae6ce86 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -514,17 +514,30 @@ namespace gbe
(insn.opcode == SEL_OP_MOV ||
insn.opcode == SEL_OP_AND ||
insn.opcode == SEL_OP_OR ||
- insn.opcode == SEL_OP_XOR))
- insn.extra.function = GEN_CONDITIONAL_NEQ;
+ insn.opcode == SEL_OP_XOR)) {
+ if (!selection.isScalarOrBool(ir::Register(insn.state.flagIndex)))
+ insn.extra.function = GEN_CONDITIONAL_NEQ;
+ else {
+ // If this is a modFlag on a scalar bool, we need to remove it
+ // from the allocated flags map. Then latter, the user could
+ // validate the flag from the scalar value correctly.
+ allocatedFlags.erase(ir::Register(insn.state.flagIndex));
+ continue;
+ }
+ }
if ((insn.state.externFlag &&
- insn.state.predicate != GEN_PREDICATE_NONE &&
- validatedFlags.find(insn.state.flagIndex) == validatedFlags.end())) {
+ insn.state.predicate != GEN_PREDICATE_NONE &&
+ validatedFlags.find(insn.state.flagIndex) == validatedFlags.end())) {
// This is an external bool, we need to validate it if it is not validated yet.
SelectionInstruction *cmp0 = selection.create(SEL_OP_CMP, 1, 2);
- cmp0->state = GenInstructionState(insn.state.execWidth);
+ cmp0->state = GenInstructionState(ctx.getSimdWidth());
cmp0->state.flag = insn.state.flag;
cmp0->state.subFlag = insn.state.subFlag;
- cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
+ if (selection.isScalarOrBool(ir::Register(insn.state.flagIndex)))
+ cmp0->state.noMask = 1;
+ cmp0->src(0) = selection.isScalarOrBool(ir::Register(insn.state.flagIndex)) ?
+ GenRegister::uw1grf(ir::Register(insn.state.flagIndex)) :
+ GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
cmp0->src(1) = GenRegister::immuw(0);
cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
cmp0->extra.function = GEN_CONDITIONAL_NEQ;
@@ -539,13 +552,27 @@ namespace gbe
insn.state.flag = 0;
insn.state.subFlag = 1;
insn.state.physicalFlag = 1;
+ // If this is for MOV/AND/OR/... we don't need to waste an extra instruction
+ // to generate the flag here, just continue to next instruction. And the validTempFlagReg
+ // will not be destroyed.
+ if (insn.state.modFlag == 1 &&
+ (insn.opcode == SEL_OP_MOV ||
+ insn.opcode == SEL_OP_AND ||
+ insn.opcode == SEL_OP_OR ||
+ insn.opcode == SEL_OP_XOR))
+ continue;
+
if ((insn.state.predicate != GEN_PREDICATE_NONE)
&& validTempFlagReg != insn.state.flagIndex) {
SelectionInstruction *cmp0 = selection.create(SEL_OP_CMP, 1, 2);
- cmp0->state = GenInstructionState(insn.state.execWidth);
+ cmp0->state = GenInstructionState(ctx.getSimdWidth());
cmp0->state.flag = insn.state.flag;
cmp0->state.subFlag = insn.state.subFlag;
- cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
+ if (selection.isScalarOrBool(ir::Register(insn.state.flagIndex)))
+ cmp0->state.noMask = 1;
+ cmp0->src(0) = selection.isScalarOrBool(ir::Register(insn.state.flagIndex)) ?
+ GenRegister::uw1grf(ir::Register(insn.state.flagIndex)) :
+ GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
cmp0->src(1) = GenRegister::immuw(0);
cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
cmp0->extra.function = GEN_CONDITIONAL_NEQ;
@@ -569,7 +596,13 @@ namespace gbe
if (insn.state.flagGen == 1 &&
!flagBooleans.contains((ir::Register)(insn.state.flagIndex))) {
SelectionInstruction *sel0 = selection.create(SEL_OP_SEL, 1, 2);
- sel0->state = GenInstructionState(ctx.getSimdWidth());
+ uint32_t simdWidth;
+ simdWidth = selection.isScalarOrBool((ir::Register)insn.state.flagIndex) ?
+ 1 : ctx.getSimdWidth();
+
+ sel0->state = GenInstructionState(simdWidth);
+ if (selection.isScalarOrBool(ir::Register(insn.state.flagIndex)))
+ sel0->state.noMask = 1;
sel0->state.flag = insn.state.flag;
sel0->state.subFlag = insn.state.subFlag;
sel0->state.predicate = GEN_PREDICATE_NORMAL;
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index e36b194..3469a20 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -76,8 +76,7 @@ namespace ir {
// A destination is a killed value
for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
const Register reg = insn.getDst(dstID);
- if ( uniform &&
- fn.getRegisterFamily(reg) != ir::FAMILY_BOOL &&
+ if (uniform &&
fn.getRegisterFamily(reg) != ir::FAMILY_QWORD &&
!info.bb.definedPhiRegs.contains(reg) &&
insn.getOpcode() != ir::OP_ATOMIC &&
--
1.8.3.2
More information about the Beignet
mailing list