[Beignet] [PATCH] add selection IR SEL_OP_LABEL and SEL_OP_BRANCH.

Song, Ruiling ruiling.song at intel.com
Mon Feb 13 08:37:28 UTC 2017



> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> xionghu.luo at intel.com
> Sent: Wednesday, February 8, 2017 6:59 PM
> To: beignet at lists.freedesktop.org
> Cc: Luo, Xionghu <xionghu.luo at intel.com>
> Subject: [Beignet] [PATCH] add selection IR SEL_OP_LABEL and
> SEL_OP_BRANCH.
> 
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
>  lower LABEL, backward/forward BRANCH after instruction selection.
> 
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  backend/src/CMakeLists.txt                         |   1 +
>  backend/src/backend/gen_context.cpp                |   2 +
>  backend/src/backend/gen_insn_selection.cpp         | 170 +++-----
>  backend/src/backend/gen_insn_selection.hpp         |  17 +-
>  backend/src/backend/gen_insn_selection.hxx         |   1 +
>  .../backend/gen_insn_selection_branch_lowering.cpp | 468
> +++++++++++++++++++++
>  backend/src/backend/gen_insn_selection_passes.hpp  |  30 ++
>  backend/src/sys/intrusive_list.hpp                 |   4 +
>  8 files changed, 581 insertions(+), 112 deletions(-)
>  create mode 100644
> backend/src/backend/gen_insn_selection_branch_lowering.cpp
>  create mode 100644 backend/src/backend/gen_insn_selection_passes.hpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 6ff25e7..d1f3f32 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -104,6 +104,7 @@ set (GBE_SRC
>      backend/gen_insn_selection.cpp
>      backend/gen_insn_selection.hpp
>      backend/gen_insn_selection_optimize.cpp
> +    backend/gen_insn_selection_branch_lowering.cpp
>      backend/gen_insn_scheduling.cpp
>      backend/gen_insn_scheduling.hpp
>      backend/gen_insn_selection_output.cpp
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index c8019e3..19550a3 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -27,6 +27,7 @@
>  #include "backend/gen_defs.hpp"
>  #include "backend/gen_encoder.hpp"
>  #include "backend/gen_insn_selection.hpp"
> +#include "backend/gen_insn_selection_passes.hpp"
>  #include "backend/gen_insn_scheduling.hpp"
>  #include "backend/gen_insn_selection_output.hpp"
>  #include "backend/gen_reg_allocation.hpp"
> @@ -4047,6 +4048,7 @@ namespace gbe
>      if (OCL_OUTPUT_SEL_IR)
>        outputSelectionIR(*this, this->sel, genKernel->getName());
>      schedulePreRegAllocation(*this, *this->sel);
> +    lowerBranch(this->sel);
>      sel->addID();
>      if (UNLIKELY(ra->allocate(*this->sel) == false))
>        return false;
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 128c2bc..e7ff970 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -291,6 +291,11 @@ namespace gbe
>      insn->parent = this;
>    }
> 
> +  void SelectionBlock::insertAfter(SelectionInstruction *prevInsn,
> SelectionInstruction *insn) {
> +    this->insnList.insert_after(prevInsn, insn);
> +    insn->parent = this;
> +  }
> +
>    void SelectionBlock::append(SelectionVector *vec) {
>      this->vectorList.push_back(vec);
>    }
> @@ -646,7 +651,7 @@ namespace gbe
>      /*! Encode a barrier instruction */
>      void FENCE(GenRegister dst);
>      /*! Encode a label instruction */
> -    void LABEL(ir::LabelIndex label);
> +    void LABEL(ir::LabelIndex label, ir::LabelIndex jip);
>      /*! Jump indexed instruction, return the encoded instruction count according
> to jump distance. */
>      int JMPI(Reg src, ir::LabelIndex target, ir::LabelIndex origin);
>      /*! IF indexed instruction */
> @@ -661,6 +666,8 @@ namespace gbe
>      void BRD(Reg src, ir::LabelIndex jip);
>      /*! BRC indexed instruction */
>      void BRC(Reg src, ir::LabelIndex jip, ir::LabelIndex uip);
> +    /*! BRANCH instruction */
> +    void BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t
> pred_index, uint32_t jip);
>      /*! Compare instructions */
>      void CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst =
> GenRegister::null());
>      /*! Select instruction with embedded comparison */
> @@ -843,6 +850,12 @@ namespace gbe
>        return temps;
>      }
> 
> +    INLINE ir::LabelIndex newAuxLabel()
> +    {
> +      currAuxLabel++;
> +      return (ir::LabelIndex)currAuxLabel;
> +    }
> +
>      /*! Use custom allocators */
>      GBE_CLASS(Opaque);
>      friend class SelectionBlock;
> @@ -858,12 +871,6 @@ namespace gbe
>      bool bHasSends;
>      uint32_t ldMsgOrder;
>      bool slowByteGather;
> -    INLINE ir::LabelIndex newAuxLabel()
> -    {
> -      currAuxLabel++;
> -      return (ir::LabelIndex)currAuxLabel;
> -    }
> -
>    };
> 
>    ///////////////////////////////////////////////////////////////////////////
> @@ -1244,9 +1251,10 @@ namespace gbe
>    /*! Syntactic sugar for method declaration */
>    typedef const GenRegister &Reg;
> 
> -  void Selection::Opaque::LABEL(ir::LabelIndex index) {
> +  void Selection::Opaque::LABEL(ir::LabelIndex index, ir::LabelIndex jip) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0);
>      insn->index = index.value();
> +    insn->index1 = jip.value();
>    }
> 
>    void Selection::Opaque::BARRIER(GenRegister src, GenRegister fence, uint32_t
> barrierType) {
> @@ -1294,6 +1302,15 @@ namespace gbe
>      insn->index1 = uip.value();
It is hard to guess what "index1" means for. Could you change it to a meaningful name?
And also other occurrences of "index1".

>    }
> 
> +  void Selection::Opaque::BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst,
> uint32_t pred_index, uint32_t jip) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_BRANCH, 0, 1);
> +    insn->src(0) = reg;
> +    insn->index = src.value();
> +    insn->index1 = dst.value();
> +    insn->jip = jip;
> +    insn->extra.pred_index = pred_index;
> +  }
> +
>    void Selection::Opaque::IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_IF, 0, 1);
>      insn->src(0) = src;
> @@ -1306,7 +1323,7 @@ namespace gbe
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_ELSE, 0, 1);
>      insn->src(0) = src;
>      insn->index = jip.value();
> -    this->LABEL(elseLabel);
> +    this->LABEL(elseLabel, ir::LabelIndex(0));
>    }
> 
>    void Selection::Opaque::ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex
> endifLabel) {
> @@ -1314,7 +1331,7 @@ namespace gbe
>        this->block->endifLabel = this->newAuxLabel();
>      else
>        this->block->endifLabel = endifLabel;
> -    this->LABEL(this->block->endifLabel);
> +    this->LABEL(this->block->endifLabel, ir::LabelIndex(0));
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_ENDIF, 0, 1);
>      insn->src(0) = src;
>      insn->index = this->block->endifLabel.value();
> @@ -2530,7 +2547,7 @@ namespace gbe
>      this->block->hasBranch = bb.getLastInstruction()->getOpcode() == OP_BRA ||
>                               bb.getLastInstruction()->getOpcode() == OP_RET;
>      if (!this->block->hasBranch)
> -      this->block->endifOffset = -1;
> +      this->block->needJump = false;
> 
>      // Build the DAG on the fly
>      uint32_t insnNum = 0;
> @@ -2604,7 +2621,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>      // Bottom up code generation
>      bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier;
>      needEndif = needEndif && bb.needEndif;
> -    this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb,
> insnNum);
> +    this->block->removeSimpleIfEndif = false;//insnNum < 10 &&
> isSimpleBlock(bb, insnNum);
>      if (needEndif && !this->block->removeSimpleIfEndif) {
>        if(!bb.needIf) // this basic block is the exit of a structure
>          this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
> @@ -2750,7 +2767,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>    ///////////////////////////////////////////////////////////////////////////
>    // Code selection public implementation
>    ///////////////////////////////////////////////////////////////////////////
> -  const GenContext& Selection::getCtx()
> +  GenContext& Selection::getCtx()
>    {
>      return this->opaque->ctx;
>    }
> @@ -2904,6 +2921,12 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>      return this->opaque->isPartialWrite(reg);
>    }
> 
> +  GenRegister Selection::selReg(ir::Register reg, ir::Type type) const {
> +    return this->opaque->selReg(reg, type);
> +  }
> +
> +  ir::LabelIndex Selection::newAuxLabel() { return this->opaque-
> >newAuxLabel(); }
> +
>    SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t
> dstNum, uint32_t srcNum) {
>      return this->opaque->create(opcode, dstNum, srcNum);
>    }
> @@ -6602,18 +6625,18 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>      {
>        using namespace ir;
>        const LabelIndex label = insn.getLabelIndex();
> -      const GenRegister src0 = sel.getBlockIP();
> -      const GenRegister src1 = sel.getLabelImmReg(label);
> -      const uint32_t simdWidth = sel.ctx.getSimdWidth();
>        GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum
> label number which is reserved for barrier handling");
> -      sel.LABEL(label);
> 
> -      if(!insn.getParent()->needIf)
> +      if(!insn.getParent()->needIf) {
> +        sel.LABEL(label, LabelIndex(0));
>          return true;
> +      }
> 
>        // Do not emit any code for the "returning" block. There is no need for it
> -      if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock())
> +      if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) {
> +        sel.LABEL(label, LabelIndex(0));
>          return true;
> +      }
> 
>        LabelIndex jip;
>        const LabelIndex nextLabel = insn.getParent()->getNextBlock()-
> >getLabelIndex();
> @@ -6621,85 +6644,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>          jip = sel.ctx.getLabelIndex(&insn);
>        else
>          jip = nextLabel;
> -
> -      // Emit the mask computation at the head of each basic block
> -      sel.push();
> -        sel.curr.noMask = 1;
> -        sel.curr.predicate = GEN_PREDICATE_NONE;
> -        sel.curr.flag = 0;
> -        sel.curr.subFlag = 1;
> -        sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
> -      sel.pop();
> -
> -      if (sel.block->hasBarrier) {
> -        // If this block has barrier, we don't execute the block until all lanes
> -        // are 1s. Set each reached lane to 1, then check all lanes. If there is any
> -        // lane not reached, we jump to jip. And no need to issue if/endif for
> -        // this block, as it will always excute with all lanes activated.
> -        sel.push();
> -          sel.curr.predicate = GEN_PREDICATE_NORMAL;
> -          sel.curr.flag = 0;
> -          sel.curr.subFlag = 1;
> -          sel.setBlockIP(src0, sel.ctx.getMaxLabel());
> -          sel.curr.predicate = GEN_PREDICATE_NONE;
> -          sel.curr.noMask = 1;
> -          sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
> -          if (simdWidth == 8)
> -            sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> -          else if (simdWidth == 16)
> -            sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
> -          else
> -            NOT_IMPLEMENTED;
> -          sel.curr.noMask = 1;
> -          sel.curr.execWidth = 1;
> -          sel.curr.inversePredicate = 1;
> -          sel.JMPI(GenRegister::immd(0), jip, label);
> -        sel.pop();
> -        // FIXME, if the last BRA is unconditional jump, we don't need to update the
> label here.
> -        sel.push();
> -         sel.curr.predicate = GEN_PREDICATE_NORMAL;
> -         sel.curr.flag = 0;
> -         sel.curr.subFlag = 1;
> -         sel.setBlockIP(src0, label.value());
> -        sel.pop();
> -      }
> -      else {
> -        if (sel.ctx.hasJIP(&insn) &&
> -            // If jump to next label and the endif offset is -1, then
> -            // We don't need to add a jmpi here, as the following IF will do the same
> -            // thing if all channels are disabled.
> -            (jip != nextLabel || sel.block->endifOffset != -1)) {
> -          // If it is required, insert a JUMP to bypass the block
> -          sel.push();
> -            sel.curr.flag = 0;
> -            sel.curr.subFlag = 1;
> -            if (simdWidth == 8)
> -              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
> -            else if (simdWidth == 16)
> -              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
> -            else
> -              NOT_IMPLEMENTED;
> -            sel.curr.noMask = 1;
> -            sel.curr.execWidth = 1;
> -            sel.curr.inversePredicate = 1;
> -            sel.JMPI(GenRegister::immd(0), jip, label);
> -          sel.pop();
> -        }
> -        if(!sel.block->removeSimpleIfEndif){
> -          sel.push();
> -            sel.curr.flag = 0;
> -            sel.curr.subFlag = 1;
> -            sel.curr.predicate = GEN_PREDICATE_NORMAL;
> -            if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
> -              ir::LabelIndex label = insn.getParent()->endifLabel;
> -              sel.IF(GenRegister::immd(0), label, label);
> -            }
> -            else
> -              sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block-
> >endifLabel);
> -          sel.pop();
> -        }
> -      }
> -
> +      sel.LABEL(label, jip);
>        return true;
>      }
>      DECL_CTOR(LabelInstruction, 1, 1);
> @@ -7259,7 +7204,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            sel.curr.predicate = GEN_PREDICATE_NONE;
>            if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
>              sel.ENDIF(GenRegister::immd(0), nextLabel);
> -          sel.block->endifOffset = -1;
> +          sel.block->needJump = false;
>          sel.pop();
>        } else {
>          // Update the PcIPs
> @@ -7275,7 +7220,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            else if(insn.getParent()->needEndif)
>              sel.ENDIF(GenRegister::immd(0), nextLabel);
>          }
> -        sel.block->endifOffset = -1;
> +        sel.block->needJump = false;
>          if (nextLabel == jip) return;
>          // Branch to the jump target
>          sel.push();
> @@ -7283,7 +7228,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            sel.curr.noMask = 1;
>            sel.curr.predicate = GEN_PREDICATE_NONE;
>            // Actually, the origin of this JMPI should be the beginning of next BB.
> -          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip,
> ir::LabelIndex(curr->getLabelIndex().value() + 1));
> +          sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr-
> >getLabelIndex().value() + 1));
> +          sel.block->needJump = true;
>          sel.pop();
>        }
>      }
> @@ -7317,7 +7263,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            sel.curr.flagIndex = pred.value();
>            sel.curr.predicate = GEN_PREDICATE_NORMAL;
>            sel.setBlockIP(ip, dst.value());
> -          sel.block->endifOffset = -1;
> +          sel.block->needJump = false;
>            sel.curr.predicate = GEN_PREDICATE_NONE;
>            if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
>              sel.ENDIF(GenRegister::immd(0), next);
> @@ -7327,7 +7273,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            else
>              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
>            sel.curr.noMask = 1;
> -          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
> +          sel.JMPI(GenRegister::immd(0), jip, label);
> +          sel.block->needJump = true;
>          sel.pop();
>        } else {
>          const LabelIndex next = bb.getNextBlock()->getLabelIndex();
> @@ -7336,7 +7283,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>          sel.curr.subFlag = 1;
>          if(insn.getParent()->needEndif)
>          sel.setBlockIP(ip, dst.value());
> -        sel.block->endifOffset = -1;
> +        sel.block->needJump = false;
>          if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
>            if(insn.getParent()->needEndif && !insn.getParent()->needIf)
>              sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel,
> insn.getParent()->endifLabel);
> @@ -7348,7 +7295,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>            sel.curr.execWidth = 1;
>            sel.curr.noMask = 1;
>            sel.curr.predicate = GEN_PREDICATE_NONE;
> -          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
> +          sel.JMPI(GenRegister::immd(0), jip, label);
> +          sel.block->needJump = true;
>          sel.pop();
>        }
>      }
> @@ -7362,6 +7310,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>        else if (opcode == OP_BRA) {
>          const LabelIndex dst = insn.getLabelIndex();
>          const LabelIndex src = insn.getParent()->getLabelIndex();
> +        const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
> 
>          sel.push();
>          if (insn.isPredicated() == true) {
> @@ -7369,11 +7318,12 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>              sel.curr.externFlag = 1;
>          }
> 
> -        // We handle foward and backward branches differently
> -        if (uint32_t(dst) <= uint32_t(src))
> -          this->emitBackwardBranch(sel, insn, dst, src);
> -        else
> -          this->emitForwardBranch(sel, insn, dst, src);
> +        if (insn.isPredicated() == true) {
> +            const Register pred = insn.getPredicateIndex();
> +            sel.BRANCH(GenRegister::immd(0), dst, src, pred.value(), jip.value());
> +        } else {
> +            sel.BRANCH(GenRegister::immd(0), dst, src, 0, jip.value());
> +        }
>          sel.pop();
>        }
>        else if(opcode == OP_IF) {
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 01999a2..fbbb826 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -68,6 +68,8 @@ namespace gbe
>    public:
>      /*! Owns the instruction */
>      SelectionBlock *parent;
> +    /*! Get the parent Selection block */
> +    SelectionBlock *getParent(void) { return parent; }
>      /*! Append an instruction before this one */
>      void prepend(SelectionInstruction &insn);
>      /*! Append an instruction after this one */
> @@ -160,6 +162,7 @@ namespace gbe
>          uint16_t printfSize;
>        };
>        uint32_t workgroupOp;
> +      uint32_t pred_index;
>      } extra;
>      /*! Gen opcode */
>      uint8_t opcode;
> @@ -171,6 +174,8 @@ namespace gbe
>      uint32_t index;
>      /*! For BRC/IF to store the UIP */
>      uint32_t index1;
> +    /*! for BRANCH to store jip */
> +    uint32_t jip;
>      /*! instruction ID used for vector allocation. */
>      uint32_t ID;
>      DebugInfo DBGInfo;
> @@ -258,8 +263,10 @@ namespace gbe
>      void append(SelectionInstruction *insn);
>      /*! Append a new selection instruction at the beginning of the block */
>      void prepend(SelectionInstruction *insn);
> +    /*! insert a new selection instruction after prevInsn */
> +    void insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn);
>      ir::LabelIndex endifLabel;
> -    int endifOffset;
> +    bool needJump;
>      bool hasBarrier;
>      bool hasBranch;
>      bool removeSimpleIfEndif;
> @@ -305,6 +312,10 @@ namespace gbe
>      bool isScalarReg(const ir::Register &reg) const;
>      /*! is this register a partially written register.*/
>      bool isPartialWrite(const ir::Register &reg) const;
> +    /*! create GenRegister for ir Register.*/
> +    GenRegister selReg(ir::Register reg, ir::Type type) const;
> +
> +    ir::LabelIndex newAuxLabel();
>      /*! Create a new selection instruction */
>      SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t
> srcNum);
>      /*! List of emitted blocks */
> @@ -316,11 +327,13 @@ namespace gbe
> 
>      /* optimize at selection IR level */
>      void optimize(void);
> +    /* branch lower at selection IR level */
> +    void branchLowering(void);
>      uint32_t opt_features;
> 
>      /* Add insn ID for sel IR */
>      void addID(void);
> -    const GenContext &getCtx();
> +    GenContext &getCtx();
> 
>      /*! Use custom allocators */
>      GBE_CLASS(Selection);
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 5d96e9e..9e4806b 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -90,6 +90,7 @@ DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
>  DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
>  DECL_SELECTION_IR(CONVF_TO_I64, FloatToI64Instruction)
>  DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction)
> +DECL_SELECTION_IR(BRANCH, UnaryInstruction)
>  DECL_SELECTION_IR(BRC, UnaryInstruction)
>  DECL_SELECTION_IR(BRD, UnaryInstruction)
>  DECL_SELECTION_IR(IF, UnaryInstruction)
> diff --git a/backend/src/backend/gen_insn_selection_branch_lowering.cpp
> b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
> new file mode 100644
> index 0000000..92efbdb
> --- /dev/null
> +++ b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
> @@ -0,0 +1,468 @@
> +
> +#include "backend/gen_insn_selection.hpp"
> +#include "backend/gen_insn_selection_passes.hpp"
> +#include "backend/gen_context.hpp"
> +#include "ir/function.hpp"
> +#include "ir/liveness.hpp"
> +#include "ir/profile.hpp"
> +#include "sys/cvar.hpp"
> +#include "sys/vector.hpp"
> +#include <algorithm>
> +#include <climits>
> +#include <map>
> +
> +namespace gbe
> +{
> +  class BranchLowering
> +  {
> +  public:
> +    /*! To make function prototypes more readable */
> +    typedef const GenRegister &Reg;
> +    typedef const GenInstructionState &State;
> +    BranchLowering(GenContext &ctx, SelectionBlock &selblock)
> +        : ctx(ctx), selBlock(selblock) {}
> +    void run();
> +    void insnLower();
> +    void lowerBRANCH(SelectionInstruction &insn);
> +    void lowerForwardBRANCH(SelectionInstruction &insn);
> +    void lowerBackwardBRANCH(SelectionInstruction &insn);
> +    void lowerLABEL(SelectionInstruction &insn);
> +
> +    /* Get current block IP register according to label width. */
> +    GenRegister getBlockIP() {
> +      return ctx.isDWLabel() ? ctx.sel->selReg(ir::ocl::dwblockip, ir::TYPE_U32) :
> ctx.sel->selReg(ir::ocl::blockip, ir::TYPE_U32);
> +    }
> +
> +    SelectionInstruction *setBlockIP(GenRegister blockip, uint32_t labelValue,
> +                                     State state,
> +                                     SelectionInstruction *prevInsn);
> +    SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
> +                                     GenRegister labelReg, State state,
> +                                     SelectionInstruction *prevInsn);
> +    SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
> +                                     uint32_t labelValue, State state,
> +                                     SelectionInstruction *prevInsn);
> +    SelectionInstruction *MOV(Reg dst, Reg src, State state,
> +                              SelectionInstruction *prevInsn);
> +    SelectionInstruction *CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst,
> +                              State state, SelectionInstruction *prevInsn);
> +    SelectionInstruction *JMPI(Reg src, ir::LabelIndex index,
> +                               ir::LabelIndex origin, State state,
> +                               SelectionInstruction *prevInsn);
> +    SelectionInstruction *IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip,
> +                             State state, SelectionInstruction *prevInsn);
> +    SelectionInstruction *ENDIF(Reg src, ir::LabelIndex jip,
> +                                ir::LabelIndex endifLabel, State state,
> +                                SelectionInstruction *prevInsn);
> +    SelectionInstruction *LABEL(ir::LabelIndex index, ir::LabelIndex jip,
> +                                State state, SelectionInstruction *prevInsn);
> +    SelectionInstruction *generateInsn(SelectionOpcode opcode, uint32_t
> dstNum,
> +                                       uint32_t srcNum, State state,
> +                                       SelectionInstruction *prevInsn);
> +    ~BranchLowering() {}
> +
> +  protected:
> +    GenContext &ctx;      //in case that we need it
> +    SelectionBlock &selBlock;
> +    bool lowered;
> +  };
> +
> +  void BranchLowering::insnLower()
> +  {
> +      //for (auto &insn : selBlock.insnList) {
> +      for (auto iter = selBlock.insnList.end() ; iter != selBlock.insnList.begin(); ) {
> +          iter--;
If you have specific reason to iterate the list in backward order, add an explicit comment here is better.
And pre-increment/decrement is preferred over post-inrement/decrement. So please use "--iter" here.
> +        SelectionInstruction &insn = *iter;
> +        if (insn.opcode == SEL_OP_BRANCH) {
> +          lowerBRANCH(insn);
> +        }
> +        else if (insn.opcode == SEL_OP_LABEL) {
> +          lowerLABEL(insn);
> +        }
> +      }
I think it is better to erase the branch instruction right after lowerBranch().
iterating the instructions a second pass just to erase the branch instruction seems a little waste of time I think.

> +      for (auto iter = selBlock.insnList.begin() ; iter != selBlock.insnList.end();
> iter++) {
> +        SelectionInstruction &insn = *iter;
> +        if (insn.opcode == SEL_OP_BRANCH) {
> +          iter = selBlock.insnList.erase(&insn);
> +        }
> +      }
> +
> +  }
> +
> +  void BranchLowering::lowerBackwardBRANCH(SelectionInstruction &insn)
> +  {
> +      using namespace ir;
> +      SelectionInstruction *prev_insn = &insn;
> +      const GenRegister ip = getBlockIP();
> +      const BasicBlock *currBB = insn.getParent()->bb;
> +      const BasicBlock *nextBB = currBB->getNextBlock();
> +      const LabelIndex nextLabel = nextBB->getLabelIndex();
> +      const uint32_t jip = insn.jip;
> +      const LabelIndex label = LabelIndex(insn.index1);
> +      uint32_t dst = insn.index;
> +      uint32_t predIndex = insn.extra.pred_index;
> +      const uint32_t simdWidth = ctx.getSimdWidth();
> +      GBE_ASSERT(nextBB != NULL);
> +
> +      if (predIndex != 0) {
> +
> +        // Update the PcIPs for all the branches. Just put the IPs of the next
> +        // block. Next instruction will properly update the IPs of the lanes
> +        // that actually take the branch
> +        {
> +          GenInstructionState curr = insn.state;
> +          prev_insn = setBlockIP(ip, nextLabel.value(), curr, prev_insn);
> +        }
> +        GBE_ASSERT(jip == dst);
> +        GenInstructionState curr;
> +        curr.execWidth = simdWidth;
> +        curr.physicalFlag = 0;
> +        curr.flagIndex = predIndex;
> +        curr.predicate = GEN_PREDICATE_NORMAL;
> +        prev_insn = setBlockIP(ip, dst, curr, prev_insn);
> +        curr.predicate = GEN_PREDICATE_NONE;
> +        if (!selBlock.hasBarrier)
> +          prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr,
> prev_insn);
> +        curr.execWidth = 1;
> +        if (simdWidth == 16)
> +          curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
> +        else
> +          curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
> +        curr.noMask = 1;
> +        prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label),
> curr, prev_insn);
> +        selBlock.needJump = true;
> +      } else {
> +        // Update the PcIPs
> +        GenInstructionState curr = insn.state;
> +        curr.flag = 0;
> +        curr.subFlag = 1;
> +        if(insn.getParent()->bb->needEndif)
> +        prev_insn = setBlockIP(ip, dst, curr, prev_insn);
> +        if (!selBlock.hasBarrier) {
> +          if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
> +            prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel,
> insn.getParent()->endifLabel, curr, prev_insn);
> +          else if(insn.getParent()->bb->needEndif)
> +            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr,
> prev_insn);
> +        }
> +        // Branch to the jump target
> +        {
> +          GenInstructionState curr;

No need to set execWidth twice.
> +          curr.execWidth = simdWidth;
> +          curr.execWidth = 1;
> +          curr.noMask = 1;
> +          curr.predicate = GEN_PREDICATE_NONE;
> +          prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label),
> curr, prev_insn);
> +          selBlock.needJump = true;
> +        }
> +      }
> +  }
> +
> +  void BranchLowering::lowerForwardBRANCH(SelectionInstruction &insn)
> +  {
> +    using namespace ir;
> +    SelectionInstruction *prev_insn = &insn;
> +    const uint32_t simdWidth = ctx.getSimdWidth();
> +    uint32_t dst = insn.index;
> +    uint32_t predIndex = insn.extra.pred_index;
> +    const uint32_t jipValue = insn.jip;
> +    const GenRegister ip = getBlockIP();
> +    // We will not emit any jump if we must go the next block anyway
> +    const BasicBlock *currBB = insn.getParent()->bb;
> +    const BasicBlock *nextBB = currBB->getNextBlock();
> +    const LabelIndex nextLabel = nextBB->getLabelIndex();
> +    if (predIndex != 0) {
> +        GenInstructionState curr = insn.state;
> +          // we don't need to set next label to the pcip
> +          // as if there is no backward jump latter, then obviously everything will
> work fine.
> +          // If there is backward jump latter, then all the pcip will be updated
> correctly there.
> +          curr.execWidth = simdWidth;
> +          curr.physicalFlag = 0;
> +          curr.flagIndex = predIndex;
> +          curr.predicate = GEN_PREDICATE_NORMAL;
> +          prev_insn = setBlockIP(ip, dst, curr, prev_insn);
> +          curr.predicate = GEN_PREDICATE_NONE;
> +          if (!selBlock.hasBarrier)
> +            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr,
> prev_insn);
> +          selBlock.needJump = false;
> +    } else {
> +        // Update the PcIPs
> +        GenInstructionState curr = insn.state;
> +        curr.flag = 0;
> +        curr.subFlag = 1;
> +        if(insn.getParent()->bb->needEndif)
> +          prev_insn = setBlockIP(ip, dst, curr, prev_insn);
> +
> +        if (!selBlock.hasBarrier) {
> +          if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
> +            prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->bb-
> >endifLabel, insn.getParent()->bb->endifLabel, curr, prev_insn);
> +          else if(insn.getParent()->bb->needEndif)
> +            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr,
> prev_insn);
> +        }
> +        selBlock.needJump = false;
> +        if (nextLabel == jipValue) return;
> +        // Branch to the jump target
> +        {
> +          GenInstructionState curr;
> +          curr.execWidth = simdWidth;
No need to set it twice.
> +          curr.execWidth = 1;
> +          curr.noMask = 1;
> +          curr.predicate = GEN_PREDICATE_NONE;
> +          // Actually, the origin of this JMPI should be the beginning of next
> +          // BB.
> +          prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
> +               ir::LabelIndex(currBB->getLabelIndex().value() + 1), curr,
> +               prev_insn);
> +          selBlock.needJump = true;
> +        }
> +    }
> +  }
> +
> +  void BranchLowering::lowerBRANCH(SelectionInstruction& insn)
> +  {
> +    uint32_t dst = insn.index;
> +    uint32_t src = insn.index1;
> +    if (dst <= src) {
> +        lowerBackwardBRANCH(insn);
> +    } else {
> +        lowerForwardBRANCH(insn);
> +    }
> +  }
> +
> +  void BranchLowering::lowerLABEL(SelectionInstruction& insn)
> +  {
> +    //src0, src1, jip, nextLabel, hasBarrier, sel
> +      using namespace ir;
> +      uint32_t labelValue = insn.index;
> +      const GenRegister src0 = getBlockIP();
> +      const GenRegister src1 = ctx.isDWLabel() ? GenRegister::immud(labelValue) :
> GenRegister::immuw(labelValue);
> +      const uint32_t simdWidth = ctx.getSimdWidth();
> +
> +      if (insn.getParent()->bb == &ctx.getFunction().getBottomBlock()) {
> +        return;
> +      }
> +
> +      const LabelIndex nextLabel = insn.getParent()->bb->getNextBlock()-
> >getLabelIndex();
> +      uint32_t jipValue = insn.index1;
> +      if(jipValue == 0)
> +          return;
> +
> +      SelectionInstruction* prev_insn = &insn;
> +      GenInstructionState curr;
> +      curr.execWidth = simdWidth;
> +      curr.noMask = 1;
> +      curr.predicate = GEN_PREDICATE_NONE;
> +      curr.flag = 0;
> +      curr.subFlag = 1;
> +      SelectionInstruction *insn_cmp = cmpBlockIP(GEN_CONDITIONAL_LE, src0,
> src1, curr, prev_insn);
> +      prev_insn = insn_cmp;
> +
> +      if (selBlock.hasBarrier) {
> +        GenInstructionState curr;
> +        curr.execWidth = simdWidth;
> +        curr.predicate = GEN_PREDICATE_NORMAL;
> +        curr.flag = 0;
> +        curr.subFlag = 1;
> +        prev_insn = setBlockIP(src0, ctx.getMaxLabel(), curr, prev_insn);
> +        curr.predicate = GEN_PREDICATE_NONE;
> +        curr.noMask = 1;
> +        prev_insn = cmpBlockIP(GEN_CONDITIONAL_EQ, src0, ctx.getMaxLabel(),
> curr, prev_insn);
> +
> +        if (simdWidth == 8)
> +          curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> +        else if (simdWidth == 16)
> +          curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
> +        else
> +          NOT_IMPLEMENTED;
> +        curr.noMask = 1;
> +        curr.execWidth = 1;
> +        curr.inversePredicate = 1;
> +        prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
> +                         LabelIndex(labelValue), curr, prev_insn);
> +        {
> +          GenInstructionState curr;
> +          curr.execWidth = simdWidth;
> +          curr.predicate = GEN_PREDICATE_NORMAL;
> +          curr.flag = 0;
> +          curr.subFlag = 1;
> +          prev_insn = setBlockIP(src0, labelValue, curr, prev_insn);
> +        }
> +      } else {
> +        if (
> +            // If jump to next label and the endif offset is -1, then
> +            // We don't need to add a jmpi here, as the following IF will do the
> +            // same
> +            // thing if all channels are disabled.
> +            (jipValue != nextLabel.value() || selBlock.needJump != false)) {
> +          // If it is required, insert a JUMP to bypass the block
> +            GenInstructionState curr;
> +            curr.noMask = 1;
> +            curr.execWidth = 1;
> +            curr.inversePredicate = 1;
> +            curr.flag = 0;
> +            curr.subFlag = 1;
> +            if (simdWidth == 8)
> +                curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
> +            else if (simdWidth == 16)
> +                curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
> +            else
> +                NOT_IMPLEMENTED;
> +            prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
> +                             LabelIndex(labelValue), curr, prev_insn);
> +        }
> +        GenInstructionState curr;
> +        curr.execWidth = simdWidth;
> +        curr.predicate = GEN_PREDICATE_NORMAL;
> +        curr.flag = 0;
> +        curr.subFlag = 1;
> +        if(!insn.getParent()->bb->needEndif && insn.getParent()->bb->needIf) {
> +            ir::LabelIndex label = insn.getParent()->bb->endifLabel;
> +            prev_insn = IF(GenRegister::immd(0), label, label, curr, prev_insn);
> +        }
> +        else {
> +            prev_insn = IF(GenRegister::immd(0), selBlock.endifLabel,
> selBlock.endifLabel, curr, prev_insn);
> +        }
> +      }
> +  }
> +    /* Set current label register to a label value. */
> +  SelectionInstruction *
> +  BranchLowering::setBlockIP(GenRegister blockip, uint32_t labelValue,
> +                             State state, SelectionInstruction *prevInsn) {
> +    if (!ctx.isDWLabel())
> +      return MOV(GenRegister::retype(blockip, GEN_TYPE_UW),
> +                 GenRegister::immuw(labelValue), state, prevInsn);
> +    else
> +      return MOV(GenRegister::retype(blockip, GEN_TYPE_UD),
> +                 GenRegister::immud(labelValue), state, prevInsn);
> +  }
> +
> +  SelectionInstruction *
> +  BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
> +                             GenRegister labelReg, State state,
> +                             SelectionInstruction *prevInsn) {
> +    if (!ctx.isDWLabel())
> +      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW), labelReg,
> +          GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state, prevInsn);
> +    else
> +      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD), labelReg,
> +          GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state, prevInsn);
> +  }
> +
> +  SelectionInstruction *
> +  BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
> +                             uint32_t labelValue, State state,
> +                             SelectionInstruction *prevInsn) {
> +    if (!ctx.isDWLabel())
> +      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW),
> +                 GenRegister::immuw(labelValue),
> +                 GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state,
> +                 prevInsn);
> +    else
> +      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD),
> +                 GenRegister::immuw(labelValue),
> +                 GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state,
> +                 prevInsn);
> +    }
> +

Please move below functions into a separate file naming like SelectInstrBuilder.cpp
And instead of pass the prevInsn for every function, please define a separate function like
setInsertPos(pos) to assign the insert position. The newly created instruction will be inserted before the insertpos.

> +  SelectionInstruction *BranchLowering::MOV(Reg dst, Reg src, State state,
> +                                            SelectionInstruction *prevInsn) {
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_MOV, 1, 1, state,
> prevInsn);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src;
> +    return insn;
> +  }
> +
> +  SelectionInstruction *BranchLowering::CMP(uint32_t conditional, Reg src0,
> +                                            Reg src1, Reg dst, State state,
> +                                            SelectionInstruction *prevInsn) {
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_CMP, 1, 2, state,
> prevInsn);
> +    insn->src(0) = src0;
> +    insn->src(1) = src1;
> +    insn->dst(0) = dst;
> +    insn->extra.function = conditional;
> +    return insn;
> +  }
> +
> +  SelectionInstruction *BranchLowering::JMPI(Reg src, ir::LabelIndex index,
> +                                             ir::LabelIndex origin, State state,
> +                                             SelectionInstruction *prevInsn) {
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_JMPI, 0, 1, state,
> prevInsn);
> +    insn->src(0) = src;
> +    insn->index = index.value();
> +    ir::LabelIndex start, end;
> +    if (origin.value() < index.value()) {
> +    // Forward Jump, need to exclude the target BB. Because we
> +    // need to jump to the beginning of it.
I don't understand what do you mean here, could you explain a little bit more?
What do you mean by exclude target BB? And why do we need to exclude it now that we need to jump to the beginning of it?
May be using some example instructions will help people understand.

And directly manipulating LabelIndex is a really really bad idea. Please try your best to avoid +1/-1 to a LabelIndex.
Instead please get the previous/next basicBlock and get the LabelIndex of that BB. There are also many other places manipulating LabelIndex in the patch.
Please also fix them.

> +      start = origin;
> +      end = ir::LabelIndex(index.value() - 1);
> +    } else {
> +      start = index;
> +      end = origin;
> +    }
> +    // FIXME, this longjmp check is too hacky. We need to support instruction
> +    // insertion at code emission stage in the future.
> +    insn->extra.longjmp = ctx.getFunction().getDistance(start, end) > 3000;
> +    return insn;
> +  }
> +
> +  SelectionInstruction *BranchLowering::IF(Reg src, ir::LabelIndex jip,
> +                                           ir::LabelIndex uip, State state,
> +                                           SelectionInstruction *prevInsn) {
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_IF, 0, 1, state,
> prevInsn);
> +    insn->src(0) = src;
> +    insn->index = jip.value();
> +    insn->index1 = uip.value();
> +    return insn;
> +  }
> +
> +  SelectionInstruction *BranchLowering::ENDIF(Reg src, ir::LabelIndex jip,
> +                                              ir::LabelIndex endifLabel, State state,
> +                                              SelectionInstruction *prevInsn) {
> +    if(endifLabel == 0)
> +      selBlock.endifLabel = ctx.sel->newAuxLabel();
> +    else
> +      selBlock.endifLabel = endifLabel;
> +    SelectionInstruction * insn_label = LABEL(selBlock.endifLabel,
> ir::LabelIndex(0), state, prevInsn);
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_ENDIF, 0, 1, state,
> insn_label);
> +    insn->src(0) = src;
> +    insn->index = selBlock.endifLabel.value();
> +    return insn;
> +  }
> +
> +  SelectionInstruction *BranchLowering::LABEL(ir::LabelIndex index,
> +                                              ir::LabelIndex jip, State state,
> +                                              SelectionInstruction *prevInsn) {
> +    SelectionInstruction *insn = this->generateInsn(SEL_OP_LABEL, 0, 0, state,
> prevInsn);
> +    insn->index = index.value();
> +    insn->index1 = jip.value();
> +    return insn;
> +  }
> +
> +  SelectionInstruction *
> +  BranchLowering::generateInsn(SelectionOpcode opcode, uint32_t dstNum,
> +                               uint32_t srcNum, State state,
> +                               SelectionInstruction *prevInsn) {
> +    GBE_ASSERT(dstNum <= SelectionInstruction::MAX_DST_NUM &&
> +               srcNum <= SelectionInstruction::MAX_SRC_NUM);
> +    SelectionInstruction *insn = ctx.sel->create(opcode, dstNum, srcNum);
> +    selBlock.insertAfter(prevInsn, insn);
> +    insn->state = state;
> +    return insn;
> +  }
> +



More information about the Beignet mailing list