[Beignet] [PATCH 2/2] GBE: optimize CMP instruction encoding.

Zhigang Gong zhigang.gong at intel.com
Sun May 18 21:33:06 PDT 2014


This patch fixes the following two things.
1. Use a temporary register as dst register for the CMP
instruction in the middle of a block.
2. fix the switch flag for the CMP instruction at the begining
of each block. As the compact instruction handling will handle
the cmp instruction directly, and will ignore the switch
flag which is incorrect.

This patch could get about 1-2% performance gain for luxmark.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_encoder.cpp        |  9 +++-
 backend/src/backend/gen_insn_selection.cpp | 84 +++++++++++++++++-------------
 backend/src/backend/gen_insn_selection.hpp |  4 +-
 backend/src/backend/gen_reg_allocation.cpp | 33 ++++++++----
 4 files changed, 79 insertions(+), 51 deletions(-)

diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index aa30b05..ca587dd 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1148,13 +1148,14 @@ namespace gbe
 
   void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
     if (needToSplitCmp(this, src0, src1) == false) {
-      if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
+      if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
         return;
       }
       GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
       this->setHeader(insn);
       insn->header.destreg_or_condmod = conditional;
-      insn->header.thread_control = GEN_THREAD_SWITCH;
+      if (GenRegister::isNull(dst))
+        insn->header.thread_control = GEN_THREAD_SWITCH;
       this->setDst(insn, dst);
       this->setSrc0(insn, src0);
       this->setSrc1(insn, src1);
@@ -1164,6 +1165,8 @@ namespace gbe
       // Instruction for the first quarter
       insnQ1 = this->next(GEN_OPCODE_CMP);
       this->setHeader(insnQ1);
+      if (GenRegister::isNull(dst))
+        insnQ1->header.thread_control = GEN_THREAD_SWITCH;
       insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
       insnQ1->header.execution_size = GEN_WIDTH_8;
       insnQ1->header.destreg_or_condmod = conditional;
@@ -1174,6 +1177,8 @@ namespace gbe
       // Instruction for the second quarter
       insnQ2 = this->next(GEN_OPCODE_CMP);
       this->setHeader(insnQ2);
+      if (GenRegister::isNull(dst))
+        insnQ2->header.thread_control = GEN_THREAD_SWITCH;
       insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
       insnQ2->header.execution_size = GEN_WIDTH_8;
       insnQ2->header.destreg_or_condmod = conditional;
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 0cb633f..ed69bd0 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -311,9 +311,9 @@ namespace gbe
     /*! Implement public class */
     INLINE uint32_t getVectorNum(void) const { return this->vectorNum; }
     /*! Implement public class */
-    INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
+    INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
     /*! Implement public class */
-    INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+    INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov);
     /*! spill a register (insert spill/unspill instructions) */
     INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
     /*! indicate whether a register is a scalar/uniform register. */
@@ -843,48 +843,56 @@ namespace gbe
     return true;
   }
 
-  ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
+  ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
     SelectionBlock *block = insn->parent;
     const uint32_t simdWidth = insn->state.execWidth;
     ir::Register tmp;
+    GenRegister gr;
 
     // This will append the temporary register in the instruction block
     this->block = block;
-    tmp = this->reg(ir::FAMILY_DWORD);
-
-    // Generate the MOV instruction and replace the register in the instruction
-    SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
-    mov->src(0) = GenRegister::retype(insn->src(regID), GEN_TYPE_F);
-    mov->state = GenInstructionState(simdWidth);
-    if (this->isScalarReg(insn->src(regID).reg()))
-      mov->state.noMask = 1;
-    insn->src(regID) = mov->dst(0) = GenRegister::fxgrf(simdWidth, tmp);
-    insn->prepend(*mov);
+    tmp = this->reg(ir::getFamily(type), simdWidth == 1);
+    gr =  this->selReg(tmp, type);
+    if (needMov) {
+      // Generate the MOV instruction and replace the register in the instruction
+      SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
+      mov->src(0) = GenRegister::retype(insn->src(regID), gr.type);
+      mov->state = GenInstructionState(simdWidth);
+      if (this->isScalarReg(insn->src(regID).reg()))
+        mov->state.noMask = 1;
+      mov->dst(0) = gr;
+      insn->prepend(*mov);
+    }
+    insn->src(regID) = gr;
 
     return tmp;
   }
 
-  ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID) {
+  ir::Register Selection::Opaque::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
     SelectionBlock *block = insn->parent;
-    uint32_t simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
+    uint32_t simdWidth;
+    if (!GenRegister::isNull(insn->dst(regID)))
+      simdWidth = this->isScalarReg(insn->dst(regID).reg()) ? 1 : insn->state.execWidth;
+    else {
+      GBE_ASSERT(needMov == false);
+      simdWidth = insn->state.execWidth;
+    }
     ir::Register tmp;
-    ir::RegisterFamily f = file.get(insn->dst(regID).reg()).family;
-    int genType = f == ir::FAMILY_QWORD ? GEN_TYPE_DF : GEN_TYPE_F;
     GenRegister gr;
-
-    // This will append the temporary register in the instruction block
     this->block = block;
-    tmp = this->reg(f);
-
+    tmp = this->reg(ir::getFamily(type));
+    gr = this->selReg(tmp, type);
+    if (needMov) {
     // Generate the MOV instruction and replace the register in the instruction
-    SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
-    mov->dst(0) = GenRegister::retype(insn->dst(regID), genType);
-    mov->state = GenInstructionState(simdWidth);
-    if (simdWidth == 1)
-      mov->state.noMask = 1;
-    gr = f == ir::FAMILY_QWORD ? GenRegister::dfxgrf(simdWidth, tmp) : GenRegister::fxgrf(simdWidth, tmp);
-    insn->dst(regID) = mov->src(0) = gr;
-    insn->append(*mov);
+      SelectionInstruction *mov = this->create(SEL_OP_MOV, 1, 1);
+      mov->dst(0) = GenRegister::retype(insn->dst(regID), gr.type);
+      mov->state = GenInstructionState(simdWidth);
+      if (simdWidth == 1)
+        mov->state.noMask = 1;
+      mov->src(0) = gr;
+      insn->append(*mov);
+    }
+    insn->dst(regID) = gr;
     return tmp;
   }
 
@@ -1625,12 +1633,12 @@ namespace gbe
     return this->opaque->getRegisterData(reg);
   }
 
-  ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
-    return this->opaque->replaceSrc(insn, regID);
+  ir::Register Selection::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
+    return this->opaque->replaceSrc(insn, regID, type, needMov);
   }
 
-  ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) {
-    return this->opaque->replaceDst(insn, regID);
+  ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
+    return this->opaque->replaceDst(insn, regID, type, needMov);
   }
   bool Selection::spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool) {
     return this->opaque->spillRegs(spilledRegs, registerPool);
@@ -2895,7 +2903,7 @@ namespace gbe
          type == TYPE_DOUBLE || type == TYPE_FLOAT ||
          type == TYPE_U32 ||  type == TYPE_S32 /*||
          (!needStoreBool)*/)
-        tmpDst = GenRegister::nullud();
+        tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_F);
       else
         tmpDst = sel.selReg(dst, TYPE_BOOL);
 
@@ -2952,7 +2960,7 @@ namespace gbe
             // the dst to null register. And let the flag reg allocation
             // function to generate the flag grf on demand correctly latter.
             sel.curr.flagGen = needStoreBool;
-            tmpDst = GenRegister::nullud();
+            tmpDst = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
           }
           sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
         }
@@ -3280,7 +3288,8 @@ namespace gbe
       sel.push();
         sel.curr.noMask = 1;
         sel.curr.predicate = GEN_PREDICATE_NONE;
-        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
+                GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
       sel.pop();
 
       if (sel.block->hasBarrier) {
@@ -3293,7 +3302,8 @@ namespace gbe
           sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
           sel.curr.predicate = GEN_PREDICATE_NONE;
           sel.curr.noMask = 1;
-          sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
+          sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
+                  GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
           if (simdWidth == 8)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
           else if (simdWidth == 16)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index df0a10e..1f48b23 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -220,9 +220,9 @@ namespace gbe
     /*! Get the data for the given register */
     ir::RegisterData getRegisterData(ir::Register reg) const;
     /*! Replace a source by the returned temporary register */
-    ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
+    ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
     /*! Replace a destination to the returned temporary register */
-    ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+    ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID, ir::Type type = ir::TYPE_FLOAT, bool needMov = true);
     /*! spill a register (insert spill/unspill instructions) */
     bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
     /*! Indicate if a register is scalar or not */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 880a267..8349e9a 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -188,6 +188,21 @@ namespace gbe
     INLINE bool spillReg(ir::Register reg, bool isAllocated = false);
     INLINE bool vectorCanSpill(SelectionVector *vector);
     INLINE void allocateScratchForSpilled();
+
+    /*! replace specified source/dst register with temporary register and update interval */
+    INLINE ir::Register replaceReg(Selection &sel, SelectionInstruction *insn,
+                                   uint32_t regID, bool isSrc,
+                                   ir::Type type = ir::TYPE_FLOAT, bool needMov = true) {
+      ir::Register reg;
+      if (isSrc)
+        reg = sel.replaceSrc(insn, regID, type, needMov);
+      else
+        reg = sel.replaceDst(insn, regID, type, needMov);
+      intervals.push_back(reg);
+      intervals[reg].minID = insn->ID;
+      intervals[reg].maxID = insn->ID;
+      return reg;
+    }
     /*! Use custom allocator */
     GBE_CLASS(Opaque);
   };
@@ -301,15 +316,9 @@ namespace gbe
       // the MOVs
       else {
         ir::Register tmp;
-        if (vector->isSrc)
-          tmp = selection.replaceSrc(vector->insn, regID);
-        else
-          tmp = selection.replaceDst(vector->insn, regID);
+        tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
         const VectorLocation location = std::make_pair(vector, regID);
         this->vectorMap.insert(std::make_pair(tmp, location));
-        intervals.push_back(tmp);
-        intervals[tmp].minID = vector->insn->ID;
-        intervals[tmp].maxID = vector->insn->ID;
       }
     }
   }
@@ -590,12 +599,16 @@ namespace gbe
             if (insn.state.predicate != GEN_PREDICATE_NONE)
               validateFlag(selection, insn);
           }
-
           // This is a CMP for a pure flag booleans, we don't need to write result to
           // the grf. And latter, we will not allocate grf for it.
           if (insn.opcode == SEL_OP_CMP &&
-              flagBooleans.contains((ir::Register)(insn.dst(0).value.reg)))
-            insn.dst(0) = GenRegister::null();
+              (flagBooleans.contains(insn.dst(0).reg()) ||
+               GenRegister::isNull(insn.dst(0)))) {
+            // set a temporary register to avoid switch in this block.
+            bool isSrc = false;
+            bool needMov = false;
+            this->replaceReg(selection, &insn, 0, isSrc, ir::TYPE_FLOAT, needMov);
+          }
           // If the instruction requires to generate (CMP for long/int/float..)
           // the flag value to the register, and it's not a pure flag boolean,
           // we need to use SEL instruction to generate the flag value to the UW8
-- 
1.8.3.2



More information about the Beignet mailing list