[Beignet] [PATCH] add selection IR SEL_OP_LABEL and SEL_OP_BRANCH.
xionghu.luo at intel.com
xionghu.luo at intel.com
Wed Feb 8 10:58:36 UTC 2017
From: Luo Xionghu <xionghu.luo at intel.com>
lower LABEL, backward/forward BRANCH after instruction selection.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/CMakeLists.txt | 1 +
backend/src/backend/gen_context.cpp | 2 +
backend/src/backend/gen_insn_selection.cpp | 170 +++-----
backend/src/backend/gen_insn_selection.hpp | 17 +-
backend/src/backend/gen_insn_selection.hxx | 1 +
.../backend/gen_insn_selection_branch_lowering.cpp | 468 +++++++++++++++++++++
backend/src/backend/gen_insn_selection_passes.hpp | 30 ++
backend/src/sys/intrusive_list.hpp | 4 +
8 files changed, 581 insertions(+), 112 deletions(-)
create mode 100644 backend/src/backend/gen_insn_selection_branch_lowering.cpp
create mode 100644 backend/src/backend/gen_insn_selection_passes.hpp
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 6ff25e7..d1f3f32 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -104,6 +104,7 @@ set (GBE_SRC
backend/gen_insn_selection.cpp
backend/gen_insn_selection.hpp
backend/gen_insn_selection_optimize.cpp
+ backend/gen_insn_selection_branch_lowering.cpp
backend/gen_insn_scheduling.cpp
backend/gen_insn_scheduling.hpp
backend/gen_insn_selection_output.cpp
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c8019e3..19550a3 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -27,6 +27,7 @@
#include "backend/gen_defs.hpp"
#include "backend/gen_encoder.hpp"
#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_selection_passes.hpp"
#include "backend/gen_insn_scheduling.hpp"
#include "backend/gen_insn_selection_output.hpp"
#include "backend/gen_reg_allocation.hpp"
@@ -4047,6 +4048,7 @@ namespace gbe
if (OCL_OUTPUT_SEL_IR)
outputSelectionIR(*this, this->sel, genKernel->getName());
schedulePreRegAllocation(*this, *this->sel);
+ lowerBranch(this->sel);
sel->addID();
if (UNLIKELY(ra->allocate(*this->sel) == false))
return false;
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 128c2bc..e7ff970 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -291,6 +291,11 @@ namespace gbe
insn->parent = this;
}
+ void SelectionBlock::insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn) {
+ this->insnList.insert_after(prevInsn, insn);
+ insn->parent = this;
+ }
+
void SelectionBlock::append(SelectionVector *vec) {
this->vectorList.push_back(vec);
}
@@ -646,7 +651,7 @@ namespace gbe
/*! Encode a barrier instruction */
void FENCE(GenRegister dst);
/*! Encode a label instruction */
- void LABEL(ir::LabelIndex label);
+ void LABEL(ir::LabelIndex label, ir::LabelIndex jip);
/*! Jump indexed instruction, return the encoded instruction count according to jump distance. */
int JMPI(Reg src, ir::LabelIndex target, ir::LabelIndex origin);
/*! IF indexed instruction */
@@ -661,6 +666,8 @@ namespace gbe
void BRD(Reg src, ir::LabelIndex jip);
/*! BRC indexed instruction */
void BRC(Reg src, ir::LabelIndex jip, ir::LabelIndex uip);
+ /*! BRANCH instruction */
+ void BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip);
/*! Compare instructions */
void CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst = GenRegister::null());
/*! Select instruction with embedded comparison */
@@ -843,6 +850,12 @@ namespace gbe
return temps;
}
+ INLINE ir::LabelIndex newAuxLabel()
+ {
+ currAuxLabel++;
+ return (ir::LabelIndex)currAuxLabel;
+ }
+
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -858,12 +871,6 @@ namespace gbe
bool bHasSends;
uint32_t ldMsgOrder;
bool slowByteGather;
- INLINE ir::LabelIndex newAuxLabel()
- {
- currAuxLabel++;
- return (ir::LabelIndex)currAuxLabel;
- }
-
};
///////////////////////////////////////////////////////////////////////////
@@ -1244,9 +1251,10 @@ namespace gbe
/*! Syntactic sugar for method declaration */
typedef const GenRegister &Reg;
- void Selection::Opaque::LABEL(ir::LabelIndex index) {
+ void Selection::Opaque::LABEL(ir::LabelIndex index, ir::LabelIndex jip) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0);
insn->index = index.value();
+ insn->index1 = jip.value();
}
void Selection::Opaque::BARRIER(GenRegister src, GenRegister fence, uint32_t barrierType) {
@@ -1294,6 +1302,15 @@ namespace gbe
insn->index1 = uip.value();
}
+ void Selection::Opaque::BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_BRANCH, 0, 1);
+ insn->src(0) = reg;
+ insn->index = src.value();
+ insn->index1 = dst.value();
+ insn->jip = jip;
+ insn->extra.pred_index = pred_index;
+ }
+
void Selection::Opaque::IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_IF, 0, 1);
insn->src(0) = src;
@@ -1306,7 +1323,7 @@ namespace gbe
SelectionInstruction *insn = this->appendInsn(SEL_OP_ELSE, 0, 1);
insn->src(0) = src;
insn->index = jip.value();
- this->LABEL(elseLabel);
+ this->LABEL(elseLabel, ir::LabelIndex(0));
}
void Selection::Opaque::ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex endifLabel) {
@@ -1314,7 +1331,7 @@ namespace gbe
this->block->endifLabel = this->newAuxLabel();
else
this->block->endifLabel = endifLabel;
- this->LABEL(this->block->endifLabel);
+ this->LABEL(this->block->endifLabel, ir::LabelIndex(0));
SelectionInstruction *insn = this->appendInsn(SEL_OP_ENDIF, 0, 1);
insn->src(0) = src;
insn->index = this->block->endifLabel.value();
@@ -2530,7 +2547,7 @@ namespace gbe
this->block->hasBranch = bb.getLastInstruction()->getOpcode() == OP_BRA ||
bb.getLastInstruction()->getOpcode() == OP_RET;
if (!this->block->hasBranch)
- this->block->endifOffset = -1;
+ this->block->needJump = false;
// Build the DAG on the fly
uint32_t insnNum = 0;
@@ -2604,7 +2621,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
// Bottom up code generation
bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier;
needEndif = needEndif && bb.needEndif;
- this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, insnNum);
+ this->block->removeSimpleIfEndif = false;//insnNum < 10 && isSimpleBlock(bb, insnNum);
if (needEndif && !this->block->removeSimpleIfEndif) {
if(!bb.needIf) // this basic block is the exit of a structure
this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
@@ -2750,7 +2767,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
///////////////////////////////////////////////////////////////////////////
// Code selection public implementation
///////////////////////////////////////////////////////////////////////////
- const GenContext& Selection::getCtx()
+ GenContext& Selection::getCtx()
{
return this->opaque->ctx;
}
@@ -2904,6 +2921,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
return this->opaque->isPartialWrite(reg);
}
+ GenRegister Selection::selReg(ir::Register reg, ir::Type type) const {
+ return this->opaque->selReg(reg, type);
+ }
+
+ ir::LabelIndex Selection::newAuxLabel() { return this->opaque->newAuxLabel(); }
+
SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
return this->opaque->create(opcode, dstNum, srcNum);
}
@@ -6602,18 +6625,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
{
using namespace ir;
const LabelIndex label = insn.getLabelIndex();
- const GenRegister src0 = sel.getBlockIP();
- const GenRegister src1 = sel.getLabelImmReg(label);
- const uint32_t simdWidth = sel.ctx.getSimdWidth();
GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling");
- sel.LABEL(label);
- if(!insn.getParent()->needIf)
+ if(!insn.getParent()->needIf) {
+ sel.LABEL(label, LabelIndex(0));
return true;
+ }
// Do not emit any code for the "returning" block. There is no need for it
- if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock())
+ if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) {
+ sel.LABEL(label, LabelIndex(0));
return true;
+ }
LabelIndex jip;
const LabelIndex nextLabel = insn.getParent()->getNextBlock()->getLabelIndex();
@@ -6621,85 +6644,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
jip = sel.ctx.getLabelIndex(&insn);
else
jip = nextLabel;
-
- // Emit the mask computation at the head of each basic block
- sel.push();
- sel.curr.noMask = 1;
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
- sel.pop();
-
- if (sel.block->hasBarrier) {
- // If this block has barrier, we don't execute the block until all lanes
- // are 1s. Set each reached lane to 1, then check all lanes. If there is any
- // lane not reached, we jump to jip. And no need to issue if/endif for
- // this block, as it will always excute with all lanes activated.
- sel.push();
- sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.setBlockIP(src0, sel.ctx.getMaxLabel());
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.noMask = 1;
- sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
- if (simdWidth == 8)
- sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
- else if (simdWidth == 16)
- sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
- else
- NOT_IMPLEMENTED;
- sel.curr.noMask = 1;
- sel.curr.execWidth = 1;
- sel.curr.inversePredicate = 1;
- sel.JMPI(GenRegister::immd(0), jip, label);
- sel.pop();
- // FIXME, if the last BRA is unconditional jump, we don't need to update the label here.
- sel.push();
- sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.setBlockIP(src0, label.value());
- sel.pop();
- }
- else {
- if (sel.ctx.hasJIP(&insn) &&
- // If jump to next label and the endif offset is -1, then
- // We don't need to add a jmpi here, as the following IF will do the same
- // thing if all channels are disabled.
- (jip != nextLabel || sel.block->endifOffset != -1)) {
- // If it is required, insert a JUMP to bypass the block
- sel.push();
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- if (simdWidth == 8)
- sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
- else if (simdWidth == 16)
- sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
- else
- NOT_IMPLEMENTED;
- sel.curr.noMask = 1;
- sel.curr.execWidth = 1;
- sel.curr.inversePredicate = 1;
- sel.JMPI(GenRegister::immd(0), jip, label);
- sel.pop();
- }
- if(!sel.block->removeSimpleIfEndif){
- sel.push();
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.curr.predicate = GEN_PREDICATE_NORMAL;
- if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
- ir::LabelIndex label = insn.getParent()->endifLabel;
- sel.IF(GenRegister::immd(0), label, label);
- }
- else
- sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel);
- sel.pop();
- }
- }
-
+ sel.LABEL(label, jip);
return true;
}
DECL_CTOR(LabelInstruction, 1, 1);
@@ -7259,7 +7204,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
sel.ENDIF(GenRegister::immd(0), nextLabel);
- sel.block->endifOffset = -1;
+ sel.block->needJump = false;
sel.pop();
} else {
// Update the PcIPs
@@ -7275,7 +7220,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
else if(insn.getParent()->needEndif)
sel.ENDIF(GenRegister::immd(0), nextLabel);
}
- sel.block->endifOffset = -1;
+ sel.block->needJump = false;
if (nextLabel == jip) return;
// Branch to the jump target
sel.push();
@@ -7283,7 +7228,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.noMask = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
// Actually, the origin of this JMPI should be the beginning of next BB.
- sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1));
+ sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1));
+ sel.block->needJump = true;
sel.pop();
}
}
@@ -7317,7 +7263,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.flagIndex = pred.value();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
sel.setBlockIP(ip, dst.value());
- sel.block->endifOffset = -1;
+ sel.block->needJump = false;
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
sel.ENDIF(GenRegister::immd(0), next);
@@ -7327,7 +7273,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
else
sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
sel.curr.noMask = 1;
- sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
+ sel.JMPI(GenRegister::immd(0), jip, label);
+ sel.block->needJump = true;
sel.pop();
} else {
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
@@ -7336,7 +7283,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.subFlag = 1;
if(insn.getParent()->needEndif)
sel.setBlockIP(ip, dst.value());
- sel.block->endifOffset = -1;
+ sel.block->needJump = false;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
if(insn.getParent()->needEndif && !insn.getParent()->needIf)
sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel);
@@ -7348,7 +7295,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.execWidth = 1;
sel.curr.noMask = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
+ sel.JMPI(GenRegister::immd(0), jip, label);
+ sel.block->needJump = true;
sel.pop();
}
}
@@ -7362,6 +7310,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
else if (opcode == OP_BRA) {
const LabelIndex dst = insn.getLabelIndex();
const LabelIndex src = insn.getParent()->getLabelIndex();
+ const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
sel.push();
if (insn.isPredicated() == true) {
@@ -7369,11 +7318,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.externFlag = 1;
}
- // We handle foward and backward branches differently
- if (uint32_t(dst) <= uint32_t(src))
- this->emitBackwardBranch(sel, insn, dst, src);
- else
- this->emitForwardBranch(sel, insn, dst, src);
+ if (insn.isPredicated() == true) {
+ const Register pred = insn.getPredicateIndex();
+ sel.BRANCH(GenRegister::immd(0), dst, src, pred.value(), jip.value());
+ } else {
+ sel.BRANCH(GenRegister::immd(0), dst, src, 0, jip.value());
+ }
sel.pop();
}
else if(opcode == OP_IF) {
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 01999a2..fbbb826 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -68,6 +68,8 @@ namespace gbe
public:
/*! Owns the instruction */
SelectionBlock *parent;
+ /*! Get the parent Selection block */
+ SelectionBlock *getParent(void) { return parent; }
/*! Append an instruction before this one */
void prepend(SelectionInstruction &insn);
/*! Append an instruction after this one */
@@ -160,6 +162,7 @@ namespace gbe
uint16_t printfSize;
};
uint32_t workgroupOp;
+ uint32_t pred_index;
} extra;
/*! Gen opcode */
uint8_t opcode;
@@ -171,6 +174,8 @@ namespace gbe
uint32_t index;
/*! For BRC/IF to store the UIP */
uint32_t index1;
+ /*! for BRANCH to store jip */
+ uint32_t jip;
/*! instruction ID used for vector allocation. */
uint32_t ID;
DebugInfo DBGInfo;
@@ -258,8 +263,10 @@ namespace gbe
void append(SelectionInstruction *insn);
/*! Append a new selection instruction at the beginning of the block */
void prepend(SelectionInstruction *insn);
+ /*! insert a new selection instruction after prevInsn */
+ void insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn);
ir::LabelIndex endifLabel;
- int endifOffset;
+ bool needJump;
bool hasBarrier;
bool hasBranch;
bool removeSimpleIfEndif;
@@ -305,6 +312,10 @@ namespace gbe
bool isScalarReg(const ir::Register ®) const;
/*! is this register a partially written register.*/
bool isPartialWrite(const ir::Register ®) const;
+ /*! create GenRegister for ir Register.*/
+ GenRegister selReg(ir::Register reg, ir::Type type) const;
+
+ ir::LabelIndex newAuxLabel();
/*! Create a new selection instruction */
SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
/*! List of emitted blocks */
@@ -316,11 +327,13 @@ namespace gbe
/* optimize at selection IR level */
void optimize(void);
+ /* branch lower at selection IR level */
+ void branchLowering(void);
uint32_t opt_features;
/* Add insn ID for sel IR */
void addID(void);
- const GenContext &getCtx();
+ GenContext &getCtx();
/*! Use custom allocators */
GBE_CLASS(Selection);
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 5d96e9e..9e4806b 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -90,6 +90,7 @@ DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
DECL_SELECTION_IR(CONVF_TO_I64, FloatToI64Instruction)
DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction)
+DECL_SELECTION_IR(BRANCH, UnaryInstruction)
DECL_SELECTION_IR(BRC, UnaryInstruction)
DECL_SELECTION_IR(BRD, UnaryInstruction)
DECL_SELECTION_IR(IF, UnaryInstruction)
diff --git a/backend/src/backend/gen_insn_selection_branch_lowering.cpp b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
new file mode 100644
index 0000000..92efbdb
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
@@ -0,0 +1,468 @@
+
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_selection_passes.hpp"
+#include "backend/gen_context.hpp"
+#include "ir/function.hpp"
+#include "ir/liveness.hpp"
+#include "ir/profile.hpp"
+#include "sys/cvar.hpp"
+#include "sys/vector.hpp"
+#include <algorithm>
+#include <climits>
+#include <map>
+
+namespace gbe
+{
+ class BranchLowering
+ {
+ public:
+ /*! To make function prototypes more readable */
+ typedef const GenRegister &Reg;
+ typedef const GenInstructionState &State;
+ BranchLowering(GenContext &ctx, SelectionBlock &selblock)
+ : ctx(ctx), selBlock(selblock) {}
+ void run();
+ void insnLower();
+ void lowerBRANCH(SelectionInstruction &insn);
+ void lowerForwardBRANCH(SelectionInstruction &insn);
+ void lowerBackwardBRANCH(SelectionInstruction &insn);
+ void lowerLABEL(SelectionInstruction &insn);
+
+ /* Get current block IP register according to label width. */
+ GenRegister getBlockIP() {
+ return ctx.isDWLabel() ? ctx.sel->selReg(ir::ocl::dwblockip, ir::TYPE_U32) : ctx.sel->selReg(ir::ocl::blockip, ir::TYPE_U32);
+ }
+
+ SelectionInstruction *setBlockIP(GenRegister blockip, uint32_t labelValue,
+ State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
+ GenRegister labelReg, State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
+ uint32_t labelValue, State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *MOV(Reg dst, Reg src, State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst,
+ State state, SelectionInstruction *prevInsn);
+ SelectionInstruction *JMPI(Reg src, ir::LabelIndex index,
+ ir::LabelIndex origin, State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip,
+ State state, SelectionInstruction *prevInsn);
+ SelectionInstruction *ENDIF(Reg src, ir::LabelIndex jip,
+ ir::LabelIndex endifLabel, State state,
+ SelectionInstruction *prevInsn);
+ SelectionInstruction *LABEL(ir::LabelIndex index, ir::LabelIndex jip,
+ State state, SelectionInstruction *prevInsn);
+ SelectionInstruction *generateInsn(SelectionOpcode opcode, uint32_t dstNum,
+ uint32_t srcNum, State state,
+ SelectionInstruction *prevInsn);
+ ~BranchLowering() {}
+
+ protected:
+ GenContext &ctx; //in case that we need it
+ SelectionBlock &selBlock;
+ bool lowered;
+ };
+
+ void BranchLowering::insnLower()
+ {
+ //for (auto &insn : selBlock.insnList) {
+ for (auto iter = selBlock.insnList.end() ; iter != selBlock.insnList.begin(); ) {
+ iter--;
+ SelectionInstruction &insn = *iter;
+ if (insn.opcode == SEL_OP_BRANCH) {
+ lowerBRANCH(insn);
+ }
+ else if (insn.opcode == SEL_OP_LABEL) {
+ lowerLABEL(insn);
+ }
+ }
+ for (auto iter = selBlock.insnList.begin() ; iter != selBlock.insnList.end(); iter++) {
+ SelectionInstruction &insn = *iter;
+ if (insn.opcode == SEL_OP_BRANCH) {
+ iter = selBlock.insnList.erase(&insn);
+ }
+ }
+
+ }
+
+ void BranchLowering::lowerBackwardBRANCH(SelectionInstruction &insn)
+ {
+ using namespace ir;
+ SelectionInstruction *prev_insn = &insn;
+ const GenRegister ip = getBlockIP();
+ const BasicBlock *currBB = insn.getParent()->bb;
+ const BasicBlock *nextBB = currBB->getNextBlock();
+ const LabelIndex nextLabel = nextBB->getLabelIndex();
+ const uint32_t jip = insn.jip;
+ const LabelIndex label = LabelIndex(insn.index1);
+ uint32_t dst = insn.index;
+ uint32_t predIndex = insn.extra.pred_index;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ GBE_ASSERT(nextBB != NULL);
+
+ if (predIndex != 0) {
+
+ // Update the PcIPs for all the branches. Just put the IPs of the next
+ // block. Next instruction will properly update the IPs of the lanes
+ // that actually take the branch
+ {
+ GenInstructionState curr = insn.state;
+ prev_insn = setBlockIP(ip, nextLabel.value(), curr, prev_insn);
+ }
+ GBE_ASSERT(jip == dst);
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.physicalFlag = 0;
+ curr.flagIndex = predIndex;
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+ curr.predicate = GEN_PREDICATE_NONE;
+ if (!selBlock.hasBarrier)
+ prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+ curr.execWidth = 1;
+ if (simdWidth == 16)
+ curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ curr.noMask = 1;
+ prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn);
+ selBlock.needJump = true;
+ } else {
+ // Update the PcIPs
+ GenInstructionState curr = insn.state;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ if(insn.getParent()->bb->needEndif)
+ prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+ if (!selBlock.hasBarrier) {
+ if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
+ prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel, curr, prev_insn);
+ else if(insn.getParent()->bb->needEndif)
+ prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+ }
+ // Branch to the jump target
+ {
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.execWidth = 1;
+ curr.noMask = 1;
+ curr.predicate = GEN_PREDICATE_NONE;
+ prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn);
+ selBlock.needJump = true;
+ }
+ }
+ }
+
+ void BranchLowering::lowerForwardBRANCH(SelectionInstruction &insn)
+ {
+ using namespace ir;
+ SelectionInstruction *prev_insn = &insn;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ uint32_t dst = insn.index;
+ uint32_t predIndex = insn.extra.pred_index;
+ const uint32_t jipValue = insn.jip;
+ const GenRegister ip = getBlockIP();
+ // We will not emit any jump if we must go the next block anyway
+ const BasicBlock *currBB = insn.getParent()->bb;
+ const BasicBlock *nextBB = currBB->getNextBlock();
+ const LabelIndex nextLabel = nextBB->getLabelIndex();
+ if (predIndex != 0) {
+ GenInstructionState curr = insn.state;
+ // we don't need to set next label to the pcip
+ // as if there is no backward jump latter, then obviously everything will work fine.
+ // If there is backward jump latter, then all the pcip will be updated correctly there.
+ curr.execWidth = simdWidth;
+ curr.physicalFlag = 0;
+ curr.flagIndex = predIndex;
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+ curr.predicate = GEN_PREDICATE_NONE;
+ if (!selBlock.hasBarrier)
+ prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+ selBlock.needJump = false;
+ } else {
+ // Update the PcIPs
+ GenInstructionState curr = insn.state;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ if(insn.getParent()->bb->needEndif)
+ prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+
+ if (!selBlock.hasBarrier) {
+ if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
+ prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->bb->endifLabel, insn.getParent()->bb->endifLabel, curr, prev_insn);
+ else if(insn.getParent()->bb->needEndif)
+ prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+ }
+ selBlock.needJump = false;
+ if (nextLabel == jipValue) return;
+ // Branch to the jump target
+ {
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.execWidth = 1;
+ curr.noMask = 1;
+ curr.predicate = GEN_PREDICATE_NONE;
+ // Actually, the origin of this JMPI should be the beginning of next
+ // BB.
+ prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+ ir::LabelIndex(currBB->getLabelIndex().value() + 1), curr,
+ prev_insn);
+ selBlock.needJump = true;
+ }
+ }
+ }
+
+ void BranchLowering::lowerBRANCH(SelectionInstruction& insn)
+ {
+ uint32_t dst = insn.index;
+ uint32_t src = insn.index1;
+ if (dst <= src) {
+ lowerBackwardBRANCH(insn);
+ } else {
+ lowerForwardBRANCH(insn);
+ }
+ }
+
+ void BranchLowering::lowerLABEL(SelectionInstruction& insn)
+ {
+ //src0, src1, jip, nextLabel, hasBarrier, sel
+ using namespace ir;
+ uint32_t labelValue = insn.index;
+ const GenRegister src0 = getBlockIP();
+ const GenRegister src1 = ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+
+ if (insn.getParent()->bb == &ctx.getFunction().getBottomBlock()) {
+ return;
+ }
+
+ const LabelIndex nextLabel = insn.getParent()->bb->getNextBlock()->getLabelIndex();
+ uint32_t jipValue = insn.index1;
+ if(jipValue == 0)
+ return;
+
+ SelectionInstruction* prev_insn = &insn;
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.noMask = 1;
+ curr.predicate = GEN_PREDICATE_NONE;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ SelectionInstruction *insn_cmp = cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1, curr, prev_insn);
+ prev_insn = insn_cmp;
+
+ if (selBlock.hasBarrier) {
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ prev_insn = setBlockIP(src0, ctx.getMaxLabel(), curr, prev_insn);
+ curr.predicate = GEN_PREDICATE_NONE;
+ curr.noMask = 1;
+ prev_insn = cmpBlockIP(GEN_CONDITIONAL_EQ, src0, ctx.getMaxLabel(), curr, prev_insn);
+
+ if (simdWidth == 8)
+ curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+ else if (simdWidth == 16)
+ curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+ else
+ NOT_IMPLEMENTED;
+ curr.noMask = 1;
+ curr.execWidth = 1;
+ curr.inversePredicate = 1;
+ prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+ LabelIndex(labelValue), curr, prev_insn);
+ {
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ prev_insn = setBlockIP(src0, labelValue, curr, prev_insn);
+ }
+ } else {
+ if (
+ // If jump to next label and the endif offset is -1, then
+ // We don't need to add a jmpi here, as the following IF will do the
+ // same
+ // thing if all channels are disabled.
+ (jipValue != nextLabel.value() || selBlock.needJump != false)) {
+ // If it is required, insert a JUMP to bypass the block
+ GenInstructionState curr;
+ curr.noMask = 1;
+ curr.execWidth = 1;
+ curr.inversePredicate = 1;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ if (simdWidth == 8)
+ curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else if (simdWidth == 16)
+ curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ NOT_IMPLEMENTED;
+ prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+ LabelIndex(labelValue), curr, prev_insn);
+ }
+ GenInstructionState curr;
+ curr.execWidth = simdWidth;
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ curr.flag = 0;
+ curr.subFlag = 1;
+ if(!insn.getParent()->bb->needEndif && insn.getParent()->bb->needIf) {
+ ir::LabelIndex label = insn.getParent()->bb->endifLabel;
+ prev_insn = IF(GenRegister::immd(0), label, label, curr, prev_insn);
+ }
+ else {
+ prev_insn = IF(GenRegister::immd(0), selBlock.endifLabel, selBlock.endifLabel, curr, prev_insn);
+ }
+ }
+ }
+ /* Set current label register to a label value. */
+ SelectionInstruction *
+ BranchLowering::setBlockIP(GenRegister blockip, uint32_t labelValue,
+ State state, SelectionInstruction *prevInsn) {
+ if (!ctx.isDWLabel())
+ return MOV(GenRegister::retype(blockip, GEN_TYPE_UW),
+ GenRegister::immuw(labelValue), state, prevInsn);
+ else
+ return MOV(GenRegister::retype(blockip, GEN_TYPE_UD),
+ GenRegister::immud(labelValue), state, prevInsn);
+ }
+
+ SelectionInstruction *
+ BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
+ GenRegister labelReg, State state,
+ SelectionInstruction *prevInsn) {
+ if (!ctx.isDWLabel())
+ return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW), labelReg,
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state, prevInsn);
+ else
+ return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD), labelReg,
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state, prevInsn);
+ }
+
+ SelectionInstruction *
+ BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
+ uint32_t labelValue, State state,
+ SelectionInstruction *prevInsn) {
+ if (!ctx.isDWLabel())
+ return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW),
+ GenRegister::immuw(labelValue),
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state,
+ prevInsn);
+ else
+ return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD),
+ GenRegister::immuw(labelValue),
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state,
+ prevInsn);
+ }
+
+ SelectionInstruction *BranchLowering::MOV(Reg dst, Reg src, State state,
+ SelectionInstruction *prevInsn) {
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_MOV, 1, 1, state, prevInsn);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ return insn;
+ }
+
+ SelectionInstruction *BranchLowering::CMP(uint32_t conditional, Reg src0,
+ Reg src1, Reg dst, State state,
+ SelectionInstruction *prevInsn) {
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_CMP, 1, 2, state, prevInsn);
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->dst(0) = dst;
+ insn->extra.function = conditional;
+ return insn;
+ }
+
+ SelectionInstruction *BranchLowering::JMPI(Reg src, ir::LabelIndex index,
+ ir::LabelIndex origin, State state,
+ SelectionInstruction *prevInsn) {
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_JMPI, 0, 1, state, prevInsn);
+ insn->src(0) = src;
+ insn->index = index.value();
+ ir::LabelIndex start, end;
+ if (origin.value() < index.value()) {
+ // Forward Jump, need to exclude the target BB. Because we
+ // need to jump to the beginning of it.
+ start = origin;
+ end = ir::LabelIndex(index.value() - 1);
+ } else {
+ start = index;
+ end = origin;
+ }
+ // FIXME, this longjmp check is too hacky. We need to support instruction
+ // insertion at code emission stage in the future.
+ insn->extra.longjmp = ctx.getFunction().getDistance(start, end) > 3000;
+ return insn;
+ }
+
+ SelectionInstruction *BranchLowering::IF(Reg src, ir::LabelIndex jip,
+ ir::LabelIndex uip, State state,
+ SelectionInstruction *prevInsn) {
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_IF, 0, 1, state, prevInsn);
+ insn->src(0) = src;
+ insn->index = jip.value();
+ insn->index1 = uip.value();
+ return insn;
+ }
+
+ SelectionInstruction *BranchLowering::ENDIF(Reg src, ir::LabelIndex jip,
+ ir::LabelIndex endifLabel, State state,
+ SelectionInstruction *prevInsn) {
+ if(endifLabel == 0)
+ selBlock.endifLabel = ctx.sel->newAuxLabel();
+ else
+ selBlock.endifLabel = endifLabel;
+ SelectionInstruction * insn_label = LABEL(selBlock.endifLabel, ir::LabelIndex(0), state, prevInsn);
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_ENDIF, 0, 1, state, insn_label);
+ insn->src(0) = src;
+ insn->index = selBlock.endifLabel.value();
+ return insn;
+ }
+
+ SelectionInstruction *BranchLowering::LABEL(ir::LabelIndex index,
+ ir::LabelIndex jip, State state,
+ SelectionInstruction *prevInsn) {
+ SelectionInstruction *insn = this->generateInsn(SEL_OP_LABEL, 0, 0, state, prevInsn);
+ insn->index = index.value();
+ insn->index1 = jip.value();
+ return insn;
+ }
+
+ SelectionInstruction *
+ BranchLowering::generateInsn(SelectionOpcode opcode, uint32_t dstNum,
+ uint32_t srcNum, State state,
+ SelectionInstruction *prevInsn) {
+ GBE_ASSERT(dstNum <= SelectionInstruction::MAX_DST_NUM &&
+ srcNum <= SelectionInstruction::MAX_SRC_NUM);
+ SelectionInstruction *insn = ctx.sel->create(opcode, dstNum, srcNum);
+ selBlock.insertAfter(prevInsn, insn);
+ insn->state = state;
+ return insn;
+ }
+
+ void BranchLowering::run() {
+ lowered = false;
+ insnLower();
+ }
+
+ void lowerBranch(Selection *sel)
+ {
+ //do basic block level optimization
+ for (SelectionBlock &block : *sel->blockList) {
+ BranchLowering branch_lower(sel->getCtx(), block);
+ branch_lower.run();
+ }
+
+ }
+} /* namespace gbe */
+
+
diff --git a/backend/src/backend/gen_insn_selection_passes.hpp b/backend/src/backend/gen_insn_selection_passes.hpp
new file mode 100644
index 0000000..19d3f4c
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection_passes.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GEN_INSN_SELECTION_PASSES_HPP__
+#define __GEN_INSN_SELECTION_PASSES_HPP__
+
+#include "backend/gen_insn_selection.hpp"
+
+namespace gbe
+{
+ /* !optimize functions after instruction selection */
+
+ /* !lower functions after instruction selection */
+ void lowerBranch(Selection* sel);
+}
+
+#endif
diff --git a/backend/src/sys/intrusive_list.hpp b/backend/src/sys/intrusive_list.hpp
index 2e2f2a9..6677c77 100644
--- a/backend/src/sys/intrusive_list.hpp
+++ b/backend/src/sys/intrusive_list.hpp
@@ -147,6 +147,10 @@ namespace gbe
link(v, pos.node());
return iterator(v);
}
+ iterator insert_after(iterator pos, value_type* v) {
+ append(v, pos.node());
+ return iterator(v);
+ }
iterator erase(iterator it) {
iterator itErase(it);
++it;
--
2.5.0
More information about the Beignet
mailing list