[Beignet] [PATCH 2/2] GBE: optimize CMP instruction encoding.

Zhigang Gong zhigang.gong at linux.intel.com
Thu May 22 18:21:09 PDT 2014


Ping for review.

On Mon, May 19, 2014 at 12:33:06PM +0800, Zhigang Gong wrote:
> This patch fixes the following two things.
> 1. Use a temporary register as dst register for the CMP
> instruction in the middle of a block.
> 2. fix the switch flag for the CMP instruction at the begining
> of each block. As the compact instruction handling will handle
> the cmp instruction directly, and will ignore the switch
> flag which is incorrect.
> 
> This patch could get about 1-2% performance gain for luxmark.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/backend/gen_encoder.cpp        |  9 +++-
>  backend/src/backend/gen_insn_selection.cpp | 84 +++++++++++++++++-------------
>  backend/src/backend/gen_insn_selection.hpp |  4 +-
>  backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++----
>  4 files changed, 79 insertions(+), 51 deletions(-)
> 
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index aa30b05..ca587dd 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1148,13 +1148,14 @@ namespace gbe
>  
>    void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
>      if (needToSplitCmp(this, src0, src1) == false) {
> -      if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
> +      if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
>          return;
>        }
>        GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
>        this->setHeader(insn);
>        insn->header.destreg_or_condmod = conditional;
> -      insn->header.thread_control = GEN_THREAD_SWITCH;
> +      if (GenRegister::isNull(dst))
> +        insn->header.thread_control = GEN_THREAD_SWITCH;
>        this->setDst(insn, dst);
>        this->setSrc0(insn, src0);
>        this->setSrc1(insn, src1);
> @@ -1164,6 +1165,8 @@ namespace gbe
>        // Instruction for the first quarter
>        insnQ1 = this->next(GEN_OPCODE_CMP);
>        this->setHeader(insnQ1);
> +      if (GenRegister::isNull(dst))
> +        insnQ1->header.thread_control = GEN_THREAD_SWITCH;
>        insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
>        insnQ1->header.execution_size = GEN_WIDTH_8;
>        insnQ1->header.destreg_or_condmod = conditional;
> @@ -1174,6 +1177,8 @@ namespace gbe
>        // Instruction for the second quarter
>        insnQ2 = this->next(GEN_OPCODE_CMP);
>        this->setHeader(insnQ2);
> +      if (GenRegister::isNull(dst))
> +        insnQ2->header.thread_control = GEN_THREAD_SWITCH;
>        insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
>        insnQ2->header.execution_size = GEN_WIDTH_8;
>        insnQ2->header.destreg_or_condmod = conditional;
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 0cb633f..ed69bd0 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -311,9 +311,9 @@ namespace gbe
>      /*! Implement public class */
>      INLINE uint32_t getVectorNum(void) const { return this->vectorNum; }
>      /*! Implement public class */
> -    INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
> +    INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
>      /*! Implement public class */
> -    INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
> +    INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
>      /*! spill a register (insert spill/unspill instructions) */
>      INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
>      /*! indicate whether a register is a scalar/uniform register. */
> @@ -843,48 +843,56 @@ namespace gbe
>      return true;
>    }
>  
> -  ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
> +  ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
>      SelectionBlock *block = insn->parent;
>      const uint32_t simdWidth = insn->state.execWidth;
>      ir::Register tmp;
> +    GenRegister gr;
>  
>      // This will append the temporary register in the instruction block
>      this->block = block;
> -    tmp = this->reg(ir::FAMILY_DWORD);
> -
> -    // Generate the MOV instruction and replace the register in the instruction
> -    SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> -    mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F);
> -    mov->state = GenInstructionState(simdWidth);
> -    if (this->isScalarReg(insn->src(regID).reg()))
> -      mov->state.noMask = 1;
> -    insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp);
> -    insn->prepend(*mov);
> +    tmp = this->reg(ir::getFamily(type), simdWidth == 1);
> +    gr =  this->selReg(tmp, type);
> +    if (needMov) {
> +      // Generate the MOV instruction and replace the register in the instruction
> +      SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> +      mov->src(0) = GenRegister::retype(insn->src(regID), gr.type);
> +      mov->state = GenInstructionState(simdWidth);
> +      if (this->isScalarReg(insn->src(regID).reg()))
> +        mov->state.noMask = 1;
> +      mov->dst(0) = gr;
> +      insn->prepend(*mov);
> +    }
> +    insn->src(regID) = gr;
>  
>      return tmp;
>    }
>  
> -  ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
> +  ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
>      SelectionBlock *block = insn->parent;
> -    uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
> +    uint32_t simdWidth;
> +    if (!GenRegister::isNull(insn->dst(regID)))
> +      simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
> +    else {
> +      GBE_ASSERT(needMov == false);
> +      simdWidth = insn->state.execWidth;
> +    }
>      ir::Register tmp;
> -    ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family;
> -    int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F;
>      GenRegister gr;
> -
> -    // This will append the temporary register in the instruction block
>      this->block = block;
> -    tmp = this->reg(f);
> -
> +    tmp = this->reg(ir::getFamily(type));
> +    gr = this->selReg(tmp, type);
> +    if (needMov) {
>      // Generate the MOV instruction and replace the register in the instruction
> -    SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> -    mov->dst(0) = GenRegister::retype(insn->dst(regID), genType);
> -    mov->state = GenInstructionState(simdWidth);
> -    if (simdWidth == 1)
> -      mov->state.noMask = 1;
> -    gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp);
> -    insn->dst(regID) = mov->src(0) = gr;
> -    insn->append(*mov);
> +      SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> +      mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type);
> +      mov->state = GenInstructionState(simdWidth);
> +      if (simdWidth == 1)
> +        mov->state.noMask = 1;
> +      mov->src(0) = gr;
> +      insn->append(*mov);
> +    }
> +    insn->dst(regID) = gr;
>      return tmp;
>    }
>  
> @@ -1625,12 +1633,12 @@ namespace gbe
>      return this->opaque->getRegisterData(reg);
>    }
>  
> -  ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
> -    return this->opaque->replaceSrc(insn, regID);
> +  ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> +    return this->opaque->replaceSrc(insn, regID, type, needMov);
>    }
>  
> -  ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) {
> -    return this->opaque->replaceDst(insn, regID);
> +  ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> +    return this->opaque->replaceDst(insn, regID, type, needMov);
>    }
>    bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool) {
>      return this->opaque->spillRegs(spilledRegs, registerPool);
> @@ -2895,7 +2903,7 @@ namespace gbe
>           type == TYPE_DOUBLE || type == TYPE_FLOAT ||
>           type == TYPE_U32 ||  type == TYPE_S32 /*||
>           (!needStoreBool)*/)
> -        tmpDst = GenRegister::nullud();
> +        tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F);
>        else
>          tmpDst = sel.selReg(dst, TYPE_BOOL);
>  
> @@ -2952,7 +2960,7 @@ namespace gbe
>              // the dst to null register. And let the flag reg allocation
>              // function to generate the flag grf on demand correctly latter.
>              sel.curr.flagGen = needStoreBool;
> -            tmpDst = GenRegister::nullud();
> +            tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
>            }
>            sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
>          }
> @@ -3280,7 +3288,8 @@ namespace gbe
>        sel.push();
>          sel.curr.noMask = 1;
>          sel.curr.predicate = GEN_PREDICATE_NONE;
> -        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
> +        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
> +                GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
>        sel.pop();
>  
>        if (sel.block->hasBarrier) {
> @@ -3293,7 +3302,8 @@ namespace gbe
>            sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
>            sel.curr.predicate = GEN_PREDICATE_NONE;
>            sel.curr.noMask = 1;
> -          sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
> +          sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
> +                  GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
>            if (simdWidth == 8)
>              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
>            else if (simdWidth == 16)
> diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
> index df0a10e..1f48b23 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -220,9 +220,9 @@ namespace gbe
>      /*! Get the data for the given register */
>      ir::RegisterData getRegisterData(ir::Register reg) const;
>      /*! Replace a source by the returned temporary register */
> -    ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
> +    ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
>      /*! Replace a destination to the returned temporary register */
> -    ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
> +    ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
>      /*! spill a register (insert spill/unspill instructions) */
>      bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
>      /*! Indicate if a register is scalar or not */
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index 880a267..8349e9a 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -188,6 +188,21 @@ namespace gbe
>      INLINE bool spillReg(ir::Register reg, bool isAllocated = false);
>      INLINE bool vectorCanSpill(SelectionVector *vector);
>      INLINE void allocateScratchForSpilled();
> +
> +    /*! replace specified source/dst register with temporary register and update interval */
> +    INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction *insn,
> +                                   uint32_t regID, bool isSrc,
> +                                   ir::Type type = ir::TYPE_FLOAT, bool needMov = true) {
> +      ir::Register reg;
> +      if (isSrc)
> +        reg = sel.replaceSrc(insn, regID, type, needMov);
> +      else
> +        reg = sel.replaceDst(insn, regID, type, needMov);
> +      intervals.push_back(reg);
> +      intervals[reg].minID = insn->ID;
> +      intervals[reg].maxID = insn->ID;
> +      return reg;
> +    }
>      /*! Use custom allocator */
>      GBE_CLASS(Opaque);
>    };
> @@ -301,15 +316,9 @@ namespace gbe
>        // the MOVs
>        else {
>          ir::Register tmp;
> -        if (vector->isSrc)
> -          tmp = selection.replaceSrc(vector->insn, regID);
> -        else
> -          tmp = selection.replaceDst(vector->insn, regID);
> +        tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
>          const VectorLocation location = std::make_pair(vector, regID);
>          this->vectorMap.insert(std::make_pair(tmp, location));
> -        intervals.push_back(tmp);
> -        intervals[tmp].minID = vector->insn->ID;
> -        intervals[tmp].maxID = vector->insn->ID;
>        }
>      }
>    }
> @@ -590,12 +599,16 @@ namespace gbe
>              if (insn.state.predicate != GEN_PREDICATE_NONE)
>                validateFlag(selection, insn);
>            }
> -
>            // This is a CMP for a pure flag booleans, we don't need to write result to
>            // the grf. And latter, we will not allocate grf for it.
>            if (insn.opcode == SEL_OP_CMP &&
> -              flagBooleans.contains((ir::Register)(insn.dst(0).value.reg)))
> -            insn.dst(0) = GenRegister::null();
> +              (flagBooleans.contains(insn.dst(0).reg()) ||
> +               GenRegister::isNull(insn.dst(0)))) {
> +            // set a temporary register to avoid switch in this block.
> +            bool isSrc = false;
> +            bool needMov = false;
> +            this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, needMov);
> +          }
>            // If the instruction requires to generate (CMP for long/int/float..)
>            // the flag value to the register, and it's not a pure flag boolean,
>            // we need to use SEL instruction to generate the flag value to the UW8
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list