[Beignet] [PATCH v2] use global flag 0.0 to control unstructured simple block.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu Oct 16 00:04:40 PDT 2014
Xionghu,
This version still has bug, it fails the unit test case compiler_switch. A quick look at the assembly code
( 180) cmp.le(16) null:UW g1<8,8,1>:UW 0xfUW { align1 WE_all 1H switch };
( 182) add(1) g121.7<1>:D g8.3<0,1,0>:D 16D { align1 WE_all };
( 184) (+f0) add(16) g112<1>:UD g8.2<0,1,0>:UD -g8.2<0,1,0>:UD { align1 WE_all 1H };
( 186) (+f0) add(16) g118<1>:UD g121.7<0,1,0>:UD -g8.3<0,1,0>:UD { align1 WE_all 1H };
( 188) (+f0) mov(16) g1<1>:UW 0x17UW { align1 WE_normal 1H };
( 190) (+f0) send(16) g116<1>:UW g118<8,8,1>:UD
data (bti: 3, rgba: 14, SIMD16, legacy, Untyped Surface Read) mlen 2 rlen 2 { align1 WE_all 1H };
( 192) mov(1) g121.6<1>:UD g116<0,1,0>:UD { align1 WE_all };
( 194) mov(16) g114<1>:F g121.6<0,1,0>:F { align1 WE_all 1H };
( 196) (+f0) send(16) null:UW g112<8,8,1>:UD
data (bti: 2, rgba: 14, SIMD16, legacy, Untyped Surface Write) mlen 4 rlen 0 { align1 WE_normal 1H };
You can easily find that the wrong instructions are 192 and 194.
Those instructions are for uniform scalar load and store. You need to
find out those code path and fix this type of cases.
On Thu, Oct 16, 2014 at 02:50:57PM +0800, Zhigang Gong wrote:
> This version LGTM, will push with slight change which is adding a FIXME tag
> at the belongToStructure checking to indicate we need to add support for
> structured simple block in the future.
>
> Thanks.
>
> On Wed, Oct 15, 2014 at 11:49:59AM +0800, xionghu.luo at intel.com wrote:
> > From: Luo Xionghu <xionghu.luo at intel.com>
> >
> > filter the simple block out and replace the if/endif with global flag
> > to control.
> >
> > v2: fix the luxmark sala performance degression due to extern flag in a
> > BRA instruction.
> >
> > Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> > ---
> > backend/src/backend/gen_insn_selection.cpp | 80 ++++++++++++++++++++++------
> > backend/src/backend/gen_insn_selection.hpp | 1 +
> > backend/src/backend/gen_reg_allocation.cpp | 3 +-
> > 3 files changed, 68 insertions(+), 16 deletions(-)
> >
> > diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> > index b2df76f..f0fd494 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -217,7 +217,7 @@ namespace gbe
> > // SelectionBlock
> > ///////////////////////////////////////////////////////////////////////////
> >
> > - SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0){}
> > + SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb), isLargeBlock(false), endifLabel( (ir::LabelIndex) 0), removeSimpleIfEndif(false){}
> >
> > void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); }
> >
> > @@ -405,6 +405,8 @@ namespace gbe
> > uint32_t buildBasicBlockDAG(const ir::BasicBlock &bb);
> > /*! Perform the selection on the basic block */
> > void matchBasicBlock(const ir::BasicBlock &bb, uint32_t insnNum);
> > + /*! a simple block can use predication instead of if/endif*/
> > + bool isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum);
> > /*! A root instruction needs to be generated */
> > bool isRoot(const ir::Instruction &insn) const;
> >
> > @@ -1483,6 +1485,37 @@ namespace gbe
> > return false;
> > }
> >
> > + bool Selection::Opaque::isSimpleBlock(const ir::BasicBlock &bb, uint32_t insnNum) {
> > +
> > + if(bb.belongToStructure)
> > + return false;
> > +
> > + for (int32_t insnID = insnNum-1; insnID >= 0; --insnID) {
> > + SelectionDAG &dag = *insnDAG[insnID];
> > + const ir::Instruction& insn = dag.insn;
> > + if(insn.isMemberOf<ir::CompareInstruction>() ||
> > + insn.isMemberOf<ir::SelectInstruction>() ||
> > + insn.getOpcode() == ir::OP_SIMD_ANY ||
> > + insn.getOpcode() == ir::OP_SIMD_ALL ||
> > + insn.getOpcode() == ir::OP_ELSE)
> > + return false;
> > + }
> > +
> > + // there would generate a extra CMP instruction for predicated BRA with extern flag,
> > + // should retrun false to keep the if/endif.
> > + if((insnDAG[insnNum-1]->insn.isMemberOf<ir::BranchInstruction>())){
> > + if (insnDAG[insnNum-1]->insn.getOpcode() == ir::OP_BRA) {
> > + const ir::BranchInstruction &insn = ir::cast<ir::BranchInstruction>(insnDAG[insnNum-1]->insn);
> > + if(insn.isPredicated() && insnDAG[insnNum-1]->child[0] == NULL){
> > + return false;
> > + }
> > + }
> > + }
> > +
> > + return true;
> > + }
> > +
> > +
> > uint32_t Selection::Opaque::buildBasicBlockDAG(const ir::BasicBlock &bb)
> > {
> > using namespace ir;
> > @@ -1563,7 +1596,8 @@ namespace gbe
> > // Bottom up code generation
> > bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier;
> > needEndif = needEndif && bb.needEndif;
> > - if (needEndif) {
> > + this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, insnNum);
> > + if (needEndif && !this->block->removeSimpleIfEndif) {
> > if(!bb.needIf) // this basic block is the exit of a structure
> > this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
> > else {
> > @@ -1584,6 +1618,13 @@ namespace gbe
> >
> > // Start a new code fragment
> > this->startBackwardGeneration();
> > +
> > + if(this->block->removeSimpleIfEndif){
> > + this->push();
> > + this->curr.predicate = GEN_PREDICATE_NORMAL;
> > + this->curr.flag = 0;
> > + this->curr.subFlag = 0;
> > + }
> > // If there is no branch at the end of this block.
> >
> > // Try all the patterns from best to worst
> > @@ -1593,6 +1634,13 @@ namespace gbe
> > ++it;
> > } while (it != end);
> > GBE_ASSERT(it != end);
> > +
> > + if(this->block->removeSimpleIfEndif){
> > + this->curr.predicate = GEN_PREDICATE_NONE;
> > + this->curr.flag = 0;
> > + this->curr.subFlag = 0;
> > + this->pop();
> > + }
> > // If we are in if/endif fix mode, and this block is
> > // large enough, we need to insert endif/if pair to eliminate
> > // the too long if/endif block.
> > @@ -3836,15 +3884,17 @@ namespace gbe
> > sel.JMPI(GenRegister::immd(0), jip, label);
> > sel.pop();
> > }
> > - sel.push();
> > - sel.curr.predicate = GEN_PREDICATE_NORMAL;
> > - if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
> > - ir::LabelIndex label = insn.getParent()->endifLabel;
> > - sel.IF(GenRegister::immd(0), label, label);
> > - }
> > - else
> > - sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel);
> > - sel.pop();
> > + if(!sel.block->removeSimpleIfEndif){
> > + sel.push();
> > + sel.curr.predicate = GEN_PREDICATE_NORMAL;
> > + if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
> > + ir::LabelIndex label = insn.getParent()->endifLabel;
> > + sel.IF(GenRegister::immd(0), label, label);
> > + }
> > + else
> > + sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel);
> > + sel.pop();
> > + }
> > }
> >
> > return true;
> > @@ -4105,7 +4155,7 @@ namespace gbe
> > sel.curr.predicate = GEN_PREDICATE_NORMAL;
> > sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
> > sel.curr.predicate = GEN_PREDICATE_NONE;
> > - if (!sel.block->hasBarrier)
> > + if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
> > sel.ENDIF(GenRegister::immd(0), nextLabel);
> > sel.block->endifOffset = -1;
> > sel.pop();
> > @@ -4115,7 +4165,7 @@ namespace gbe
> > if(insn.getParent()->needEndif)
> > sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
> >
> > - if (!sel.block->hasBarrier) {
> > + if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
> > if(insn.getParent()->needEndif && !insn.getParent()->needIf)
> > sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel);
> > else if(insn.getParent()->needEndif)
> > @@ -4163,7 +4213,7 @@ namespace gbe
> > sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
> > sel.block->endifOffset = -1;
> > sel.curr.predicate = GEN_PREDICATE_NONE;
> > - if (!sel.block->hasBarrier)
> > + if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
> > sel.ENDIF(GenRegister::immd(0), next);
> > sel.curr.execWidth = 1;
> > if (simdWidth == 16)
> > @@ -4179,7 +4229,7 @@ namespace gbe
> > if(insn.getParent()->needEndif)
> > sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
> > sel.block->endifOffset = -1;
> > - if (!sel.block->hasBarrier) {
> > + if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
> > if(insn.getParent()->needEndif && !insn.getParent()->needIf)
> > sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel);
> > else if(insn.getParent()->needEndif)
> > diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
> > index e39aa6e..761c5c1 100644
> > --- a/backend/src/backend/gen_insn_selection.hpp
> > +++ b/backend/src/backend/gen_insn_selection.hpp
> > @@ -233,6 +233,7 @@ namespace gbe
> > int endifOffset;
> > bool hasBarrier;
> > bool hasBranch;
> > + bool removeSimpleIfEndif;
> > };
> >
> > /*! Owns the selection engine */
> > diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> > index 067c9ce..a57edb3 100644
> > --- a/backend/src/backend/gen_reg_allocation.cpp
> > +++ b/backend/src/backend/gen_reg_allocation.cpp
> > @@ -1072,7 +1072,8 @@ namespace gbe
> > insn.opcode == SEL_OP_JMPI ||
> > insn.state.predicate == GEN_PREDICATE_NONE ||
> > (block.hasBarrier && insn.opcode == SEL_OP_MOV) ||
> > - (insn.state.flag == 0 && insn.state.subFlag == 1)));
> > + (insn.state.flag == 0 && insn.state.subFlag == 1) ||
> > + (block.removeSimpleIfEndif && insn.state.flag == 0 && insn.state.subFlag == 0) ));
> > }
> > lastID = insnID;
> > insnID++;
> > --
> > 1.7.9.5
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list