[Beignet] [PATCH 2/2] GBE: optimize CMP instruction encoding.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu May 22 18:21:09 PDT 2014
Ping for review.
On Mon, May 19, 2014 at 12:33:06PM +0800, Zhigang Gong wrote:
> This patch fixes the following two things.
> 1. Use a temporary register as dst register for the CMP
> instruction in the middle of a block.
> 2. fix the switch flag for the CMP instruction at the begining
> of each block. As the compact instruction handling will handle
> the cmp instruction directly, and will ignore the switch
> flag which is incorrect.
>
> This patch could get about 1-2% performance gain for luxmark.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/backend/gen_encoder.cpp | 9 +++-
> backend/src/backend/gen_insn_selection.cpp | 84 +++++++++++++++++-------------
> backend/src/backend/gen_insn_selection.hpp | 4 +-
> backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++----
> 4 files changed, 79 insertions(+), 51 deletions(-)
>
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index aa30b05..ca587dd 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1148,13 +1148,14 @@ namespace gbe
>
> void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
> if (needToSplitCmp(this, src0, src1) == false) {
> - if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
> + if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
> return;
> }
> GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
> this->setHeader(insn);
> insn->header.destreg_or_condmod = conditional;
> - insn->header.thread_control = GEN_THREAD_SWITCH;
> + if (GenRegister::isNull(dst))
> + insn->header.thread_control = GEN_THREAD_SWITCH;
> this->setDst(insn, dst);
> this->setSrc0(insn, src0);
> this->setSrc1(insn, src1);
> @@ -1164,6 +1165,8 @@ namespace gbe
> // Instruction for the first quarter
> insnQ1 = this->next(GEN_OPCODE_CMP);
> this->setHeader(insnQ1);
> + if (GenRegister::isNull(dst))
> + insnQ1->header.thread_control = GEN_THREAD_SWITCH;
> insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
> insnQ1->header.execution_size = GEN_WIDTH_8;
> insnQ1->header.destreg_or_condmod = conditional;
> @@ -1174,6 +1177,8 @@ namespace gbe
> // Instruction for the second quarter
> insnQ2 = this->next(GEN_OPCODE_CMP);
> this->setHeader(insnQ2);
> + if (GenRegister::isNull(dst))
> + insnQ2->header.thread_control = GEN_THREAD_SWITCH;
> insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
> insnQ2->header.execution_size = GEN_WIDTH_8;
> insnQ2->header.destreg_or_condmod = conditional;
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 0cb633f..ed69bd0 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -311,9 +311,9 @@ namespace gbe
> /*! Implement public class */
> INLINE uint32_t getVectorNum(void) const { return this->vectorNum; }
> /*! Implement public class */
> - INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
> + INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
> /*! Implement public class */
> - INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
> + INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
> /*! spill a register (insert spill/unspill instructions) */
> INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
> /*! indicate whether a register is a scalar/uniform register. */
> @@ -843,48 +843,56 @@ namespace gbe
> return true;
> }
>
> - ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
> + ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> SelectionBlock *block = insn->parent;
> const uint32_t simdWidth = insn->state.execWidth;
> ir::Register tmp;
> + GenRegister gr;
>
> // This will append the temporary register in the instruction block
> this->block = block;
> - tmp = this->reg(ir::FAMILY_DWORD);
> -
> - // Generate the MOV instruction and replace the register in the instruction
> - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> - mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F);
> - mov->state = GenInstructionState(simdWidth);
> - if (this->isScalarReg(insn->src(regID).reg()))
> - mov->state.noMask = 1;
> - insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp);
> - insn->prepend(*mov);
> + tmp = this->reg(ir::getFamily(type), simdWidth == 1);
> + gr = this->selReg(tmp, type);
> + if (needMov) {
> + // Generate the MOV instruction and replace the register in the instruction
> + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> + mov->src(0) = GenRegister::retype(insn->src(regID), gr.type);
> + mov->state = GenInstructionState(simdWidth);
> + if (this->isScalarReg(insn->src(regID).reg()))
> + mov->state.noMask = 1;
> + mov->dst(0) = gr;
> + insn->prepend(*mov);
> + }
> + insn->src(regID) = gr;
>
> return tmp;
> }
>
> - ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
> + ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> SelectionBlock *block = insn->parent;
> - uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
> + uint32_t simdWidth;
> + if (!GenRegister::isNull(insn->dst(regID)))
> + simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
> + else {
> + GBE_ASSERT(needMov == false);
> + simdWidth = insn->state.execWidth;
> + }
> ir::Register tmp;
> - ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family;
> - int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F;
> GenRegister gr;
> -
> - // This will append the temporary register in the instruction block
> this->block = block;
> - tmp = this->reg(f);
> -
> + tmp = this->reg(ir::getFamily(type));
> + gr = this->selReg(tmp, type);
> + if (needMov) {
> // Generate the MOV instruction and replace the register in the instruction
> - SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> - mov->dst(0) = GenRegister::retype(insn->dst(regID), genType);
> - mov->state = GenInstructionState(simdWidth);
> - if (simdWidth == 1)
> - mov->state.noMask = 1;
> - gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp);
> - insn->dst(regID) = mov->src(0) = gr;
> - insn->append(*mov);
> + SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
> + mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type);
> + mov->state = GenInstructionState(simdWidth);
> + if (simdWidth == 1)
> + mov->state.noMask = 1;
> + mov->src(0) = gr;
> + insn->append(*mov);
> + }
> + insn->dst(regID) = gr;
> return tmp;
> }
>
> @@ -1625,12 +1633,12 @@ namespace gbe
> return this->opaque->getRegisterData(reg);
> }
>
> - ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
> - return this->opaque->replaceSrc(insn, regID);
> + ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> + return this->opaque->replaceSrc(insn, regID, type, needMov);
> }
>
> - ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) {
> - return this->opaque->replaceDst(insn, regID);
> + ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
> + return this->opaque->replaceDst(insn, regID, type, needMov);
> }
> bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool) {
> return this->opaque->spillRegs(spilledRegs, registerPool);
> @@ -2895,7 +2903,7 @@ namespace gbe
> type == TYPE_DOUBLE || type == TYPE_FLOAT ||
> type == TYPE_U32 || type == TYPE_S32 /*||
> (!needStoreBool)*/)
> - tmpDst = GenRegister::nullud();
> + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F);
> else
> tmpDst = sel.selReg(dst, TYPE_BOOL);
>
> @@ -2952,7 +2960,7 @@ namespace gbe
> // the dst to null register. And let the flag reg allocation
> // function to generate the flag grf on demand correctly latter.
> sel.curr.flagGen = needStoreBool;
> - tmpDst = GenRegister::nullud();
> + tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
> }
> sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
> }
> @@ -3280,7 +3288,8 @@ namespace gbe
> sel.push();
> sel.curr.noMask = 1;
> sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
> + sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
> + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
> sel.pop();
>
> if (sel.block->hasBarrier) {
> @@ -3293,7 +3302,8 @@ namespace gbe
> sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
> sel.curr.predicate = GEN_PREDICATE_NONE;
> sel.curr.noMask = 1;
> - sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
> + sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
> + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
> if (simdWidth == 8)
> sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> else if (simdWidth == 16)
> diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
> index df0a10e..1f48b23 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -220,9 +220,9 @@ namespace gbe
> /*! Get the data for the given register */
> ir::RegisterData getRegisterData(ir::Register reg) const;
> /*! Replace a source by the returned temporary register */
> - ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
> + ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
> /*! Replace a destination to the returned temporary register */
> - ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
> + ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
> /*! spill a register (insert spill/unspill instructions) */
> bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
> /*! Indicate if a register is scalar or not */
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index 880a267..8349e9a 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -188,6 +188,21 @@ namespace gbe
> INLINE bool spillReg(ir::Register reg, bool isAllocated = false);
> INLINE bool vectorCanSpill(SelectionVector *vector);
> INLINE void allocateScratchForSpilled();
> +
> + /*! replace specified source/dst register with temporary register and update interval */
> + INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction *insn,
> + uint32_t regID, bool isSrc,
> + ir::Type type = ir::TYPE_FLOAT, bool needMov = true) {
> + ir::Register reg;
> + if (isSrc)
> + reg = sel.replaceSrc(insn, regID, type, needMov);
> + else
> + reg = sel.replaceDst(insn, regID, type, needMov);
> + intervals.push_back(reg);
> + intervals[reg].minID = insn->ID;
> + intervals[reg].maxID = insn->ID;
> + return reg;
> + }
> /*! Use custom allocator */
> GBE_CLASS(Opaque);
> };
> @@ -301,15 +316,9 @@ namespace gbe
> // the MOVs
> else {
> ir::Register tmp;
> - if (vector->isSrc)
> - tmp = selection.replaceSrc(vector->insn, regID);
> - else
> - tmp = selection.replaceDst(vector->insn, regID);
> + tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
> const VectorLocation location = std::make_pair(vector, regID);
> this->vectorMap.insert(std::make_pair(tmp, location));
> - intervals.push_back(tmp);
> - intervals[tmp].minID = vector->insn->ID;
> - intervals[tmp].maxID = vector->insn->ID;
> }
> }
> }
> @@ -590,12 +599,16 @@ namespace gbe
> if (insn.state.predicate != GEN_PREDICATE_NONE)
> validateFlag(selection, insn);
> }
> -
> // This is a CMP for a pure flag booleans, we don't need to write result to
> // the grf. And latter, we will not allocate grf for it.
> if (insn.opcode == SEL_OP_CMP &&
> - flagBooleans.contains((ir::Register)(insn.dst(0).value.reg)))
> - insn.dst(0) = GenRegister::null();
> + (flagBooleans.contains(insn.dst(0).reg()) ||
> + GenRegister::isNull(insn.dst(0)))) {
> + // set a temporary register to avoid switch in this block.
> + bool isSrc = false;
> + bool needMov = false;
> + this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, needMov);
> + }
> // If the instruction requires to generate (CMP for long/int/float..)
> // the flag value to the register, and it's not a pure flag boolean,
> // we need to use SEL instruction to generate the flag value to the UW8
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list