[Beignet] [PATCH 1/2] Backend: Refine bool register patch and alloction

Xiuli Pan xiuli.pan at intel.com
Wed May 3 08:24:06 UTC 2017


From: Pan Xiuli <xiuli.pan at intel.com>

Bool values can just be flag registers and some operations need grf register
to be involved. So we add two kinds of helper register BOOL_BIT and BOOL_UW
to handle liveout bool values and bool operations.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp         |  43 +++-
 .../src/backend/gen_insn_selection_optimize.cpp    |   4 +
 backend/src/backend/gen_reg_allocation.cpp         | 151 +++++-------
 backend/src/backend/gen_register.hpp               |   2 +-
 backend/src/ir/context.hpp                         |   2 +-
 backend/src/ir/function.hpp                        |   3 +
 backend/src/ir/instruction.cpp                     |  11 +-
 backend/src/ir/instruction.hpp                     |   2 +
 backend/src/ir/register.cpp                        |   2 +
 backend/src/ir/register.hpp                        |   6 +-
 backend/src/llvm/llvm_gen_backend.cpp              | 257 +++++++++++++++++++++
 11 files changed, 370 insertions(+), 113 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 41ef7b8..aefae5e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1256,6 +1256,8 @@ namespace gbe
           SEL_REG(ul16grf, ul8grf, ul1grf);
         }
         break;
+      case FAMILY_BOOL_BIT: SEL_REG(uw1grf, uw1grf, uw1grf); break;
+      case FAMILY_BOOL_UW: SEL_REG(uw16grf, uw8grf, uw1grf); break;
       default: NOT_SUPPORTED;
     }
     GBE_ASSERT(false);
@@ -3105,13 +3107,25 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
             {
               sel.push();
                 auto dag = sel.regDAG[insn.getDst(0)];
-                if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
-                    dag->isUsed) {
-                sel.curr.physicalFlag = 0;
-                sel.curr.flagIndex = insn.getDst(0).value();
-                sel.curr.modFlag = 1;
-              }
-              sel.MOV(dst, src);
+                // BOOL now is flag register, we need handle these situtaion carefully
+                if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) {
+                  sel.curr.execWidth = 1;
+                  sel.curr.noMask = 1;
+                  sel.curr.physicalFlag = 0;
+                  sel.curr.flagIndex = insn.getDst(0).value();
+                  sel.curr.predicate = GEN_PREDICATE_NONE;
+                  sel.curr.modFlag = 1;
+                }
+                else if (sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL ||
+                         sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL_BIT)
+                {
+                  sel.curr.noMask = 1;
+                  sel.curr.execWidth = 1;
+                  sel.curr.predicate = GEN_PREDICATE_NONE;
+                  sel.curr.modFlag = 1;
+                }
+
+                sel.MOV(dst, src);
               sel.pop();
             }
             break;
@@ -3384,8 +3398,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
       bool inverse = false;
       sel.getSrcGenRegImm(dag, src0, src1, type, inverse);
       // Output the binary instruction
-      if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
-          dag.isUsed) {
+      if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL ) {
         GBE_ASSERT(insn.getOpcode() == OP_AND ||
                    insn.getOpcode() == OP_OR ||
                    insn.getOpcode() == OP_XOR);
@@ -3394,6 +3407,16 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         sel.curr.modFlag = 1;
       }
 
+      if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL_BIT ) {
+        GBE_ASSERT(insn.getOpcode() == OP_AND ||
+                   insn.getOpcode() == OP_OR ||
+                   insn.getOpcode() == OP_XOR);
+        sel.curr.execWidth = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+        sel.curr.modFlag = 1;
+      }
+
       switch (opcode) {
         case OP_ADD:
           if ((type == Type::TYPE_U64 || type == Type::TYPE_S64) && !sel.hasLongType()) {
@@ -5393,8 +5416,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
       const ir::Liveness &liveness = sel.ctx.getLiveness();
       const ir::Liveness::LiveOut &liveOut = liveness.getLiveOut(curr);
       bool needStoreBool = false;
-      if (liveOut.contains(dst) || dag.computeBool)
-        needStoreBool = true;
 
       // why we set the tmpDst to null?
       // because for the listed type compare instruction could not
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index d2e0fb9..d60ed41 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -162,6 +162,10 @@ namespace gbe
     assert(insn.opcode == SEL_OP_MOV);
     const GenRegister& src = insn.src(0);
     const GenRegister& dst = insn.dst(0);
+
+    if ( dst.file == GEN_GENERAL_REGISTER_FILE && ctx.sel->getRegisterFamily(dst.reg()) == ir::FAMILY_BOOL)
+      return;
+
     if (src.type != dst.type || src.file != dst.file)
       return;
 
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 9183a24..193e75c 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -87,14 +87,16 @@ namespace gbe
     INLINE void getRegAttrib(ir::Register reg, uint32_t &regSize, ir::RegisterFamily *regFamily = NULL) const {
       // Note that byte vector registers use two bytes per byte (and can be
       // interleaved)
-      static const size_t familyVectorSize[] = {2,2,2,4,8,16,32};
-      static const size_t familyScalarSize[] = {2,2,2,4,8,16,32};
+      static const size_t familyVectorSize[] = {2,2,2,4,8,16,32,4,2,2};
+      static const size_t familyScalarSize[] = {2,2,2,4,8,16,32,4,2,2};
       using namespace ir;
       const bool isScalar = ctx.sel->isScalarReg(reg);
       const RegisterData regData = ctx.sel->getRegisterData(reg);
       const RegisterFamily family = regData.family;
       if (family == ir::FAMILY_REG)
         regSize = 32;
+      else if (family == ir::FAMILY_BOOL_BIT || family == ir::FAMILY_BOOL)
+        regSize = 2;
       else {
         const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
         regSize = isScalar ? typeSize : ctx.getSimdWidth() * typeSize;
@@ -480,8 +482,8 @@ namespace gbe
                                           insn.opcode == SEL_OP_XOR))
 
   #define IS_SCALAR_FLAG(insn) selection.isScalarReg(ir::Register(insn.state.flagIndex))
-  #define GET_FLAG_REG(insn) GenRegister::uwxgrf(IS_SCALAR_FLAG(insn) ? 1 : 8,\
-                                                 ir::Register(insn.state.flagIndex));
+
+  #define GET_FLAG_REG(insn) GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
   #define IS_TEMP_FLAG(insn) (insn.state.flag == 0 && insn.state.subFlag == 1)
   #define NEED_DST_GRF_TYPE_FIX(ty) \
           (ty == GEN_TYPE_F ||      \
@@ -489,39 +491,6 @@ namespace gbe
            ty == GEN_TYPE_DF ||     \
            ty == GEN_TYPE_UL ||     \
            ty == GEN_TYPE_L)
-  // Flag is a virtual flag, this function is to validate the virtual flag
-  // to a physical flag. It is used to validate both temporary flag and the
-  // non-temporary flag registers.
-  // We track the last temporary validate register, if it's the same as
-  // current, we can avoid the revalidation.
-  void GenRegAllocator::Opaque::validateFlag(Selection &selection,
-                                             SelectionInstruction &insn) {
-    GBE_ASSERT(insn.state.physicalFlag == 1);
-    if (!IS_TEMP_FLAG(insn) && validatedFlags.find(insn.state.flagIndex) != validatedFlags.end())
-      return;
-    else if (IS_TEMP_FLAG(insn) && validTempFlagReg == insn.state.flagIndex)
-      return;
-    SelectionInstruction *cmp0 = selection.create(SEL_OP_CMP, 1, 2);
-    cmp0->state = GenInstructionState(ctx.getSimdWidth());
-    cmp0->state.flag = insn.state.flag;
-    cmp0->state.subFlag = insn.state.subFlag;
-    if (IS_SCALAR_FLAG(insn))
-      cmp0->state.noMask = 1;
-    cmp0->src(0) = GET_FLAG_REG(insn);
-    cmp0->src(1) = GenRegister::immuw(0);
-    cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
-    cmp0->extra.function = GEN_CONDITIONAL_NEQ;
-    insn.prepend(*cmp0);
-    if (!IS_TEMP_FLAG(insn))
-      validatedFlags.insert(insn.state.flagIndex);
-    else {
-      if (insn.state.modFlag == 0)
-        validTempFlagReg = insn.state.flagIndex;
-      else
-        validTempFlagReg = 0;
-    }
-  }
-
   
   void GenRegAllocator::Opaque::allocateFlags(Selection &selection) {
     // Previously, we have a global flag allocation implemntation.
@@ -653,53 +622,61 @@ namespace gbe
         // Patch the predicate now. Note that only compares actually modify it (it
         // is called a "conditional modifier"). The other instructions just read
         // it
-        if (insn.state.physicalFlag == 0) {
-          // SEL.bool instruction, the dst register should be stored in GRF
-          // the pred flag is used by flag register
-          if (insn.opcode == SEL_OP_SEL) {
-            ir::Register dst = insn.dst(0).reg();
-            if (ctx.sel->getRegisterFamily(dst) == ir::FAMILY_BOOL &&
-                allocatedFlags.find(dst) != allocatedFlags.end())
-              allocatedFlags.erase(dst);
+        if (IS_IMPLICITLY_MOD_FLAG(insn))
+        {
+          //outputSelectionInst(insn);
+          const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+          for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+            const GenRegister &selReg = insn.src(srcID);
+            const ir::Register reg = selReg.reg();
+            if (selReg.file != GEN_GENERAL_REGISTER_FILE)
+              continue;
+            if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL);
+            {
+              auto it = allocatedFlags.find(reg);
+              if (it != allocatedFlags.end())
+                insn.src(srcID) = GenRegister::flag(it->second / 2 ,it->second & 1);
+            }
           }
+          for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+            const GenRegister &selReg = insn.dst(dstID);
+            const ir::Register reg = selReg.reg();
+            if (selReg.file != GEN_GENERAL_REGISTER_FILE)
+              continue;
+            if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL);
+            {
+              auto it = allocatedFlags.find(reg);
+              if (it != allocatedFlags.end())
+                insn.dst(dstID) = GenRegister::flag(it->second / 2 ,it->second & 1);
+            }
+          }
+        }
+
+        if (insn.state.physicalFlag == 0) {
           auto it = allocatedFlags.find(ir::Register(insn.state.flagIndex));
           if (it != allocatedFlags.end()) {
             insn.state.physicalFlag = 1;
             insn.state.flag = it->second / 2;
             insn.state.subFlag = it->second & 1;
 
-            // modFlag is for the LOADI/MOV/AND/OR/XOR instructions which will modify a
-            // flag register. We set the condition for them to save one instruction if possible.
-            if (IS_IMPLICITLY_MOD_FLAG(insn)) {
-              // If this is a modFlag on a scalar bool, we need to remove it
-              // from the allocated flags map. Then latter, the user could
-              // validate the flag from the scalar value correctly.
-              // The reason is we can not predicate the active channel when we
-              // need to use this flag.
-              if (IS_SCALAR_FLAG(insn)) {
-                allocatedFlags.erase(ir::Register(insn.state.flagIndex));
-                continue;
-              }
-              insn.extra.function = GEN_CONDITIONAL_NEQ;
-            }
-            // If this is an external bool, we need to validate it if it is not validated yet.
-            if ((insn.state.externFlag &&
-                 insn.state.predicate != GEN_PREDICATE_NONE))
-              validateFlag(selection, insn);
           } else {
             insn.state.physicalFlag = 1;
             insn.state.flag = 0;
             insn.state.subFlag = 1;
 
-            // If this is for MOV/AND/OR/... we don't need to waste an extra instruction
-            // to generate the flag here, just continue to next instruction. And the validTempFlagReg
-            // will not be destroyed.
-            if (IS_IMPLICITLY_MOD_FLAG(insn))
-              continue;
             // This bool doesn't have a deadicated flag, we use temporary flag here.
             // each time we need to validate it from the grf register.
             if (insn.state.predicate != GEN_PREDICATE_NONE)
-              validateFlag(selection, insn);
+            {
+              SelectionInstruction *movf = selection.create(SEL_OP_MOV, 1, 1);
+              movf->state = GenInstructionState(1);
+              movf->state.noMask = 1;
+              movf->state.predicate = GEN_PREDICATE_NONE;
+              movf->state.execWidth = 1;
+              movf->src(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+              movf->dst(0) = GenRegister::flag(insn.state.flag,insn.state.subFlag);
+              insn.prepend(*movf);
+            }
           }
           if (insn.opcode == SEL_OP_CMP &&
               (flagBooleans.contains(insn.dst(0).reg()) ||
@@ -729,35 +706,15 @@ namespace gbe
           // register.
           if (insn.state.flagGen == 1 &&
               !flagBooleans.contains((ir::Register)(insn.state.flagIndex))) {
-            SelectionInstruction *sel0 = selection.create(SEL_OP_SEL, 1, 2);
-            uint32_t simdWidth;
-            simdWidth = IS_SCALAR_FLAG(insn) ? 1 : ctx.getSimdWidth();
-
-            sel0->state = GenInstructionState(simdWidth);
-            if (IS_SCALAR_FLAG(insn))
-              sel0->state.noMask = 1;
-            sel0->state.flag = insn.state.flag;
-            sel0->state.subFlag = insn.state.subFlag;
-            sel0->state.predicate = GEN_PREDICATE_NORMAL;
-            sel0->src(0) = GenRegister::uw1grf(ir::ocl::one);
-            sel0->src(1) = GenRegister::uw1grf(ir::ocl::zero);
-            sel0->dst(0) = GET_FLAG_REG(insn);
-            liveInSet01.insert(insn.parent->bb);
-            insn.append(*sel0);
-            // We use the zero one after the liveness analysis, we have to update
-            // the liveness data manually here.
-            GenRegInterval &interval0 = intervals[ir::ocl::zero];
-            GenRegInterval &interval1 = intervals[ir::ocl::one];
-            interval0.minID = std::min(interval0.minID, (int32_t)insn.ID);
-            interval0.maxID = std::max(interval0.maxID, (int32_t)insn.ID);
-            interval1.minID = std::min(interval1.minID, (int32_t)insn.ID);
-            interval1.maxID = std::max(interval1.maxID, (int32_t)insn.ID);
+
+            SelectionInstruction *movg = selection.create(SEL_OP_MOV, 1, 1);
+            movg->state = GenInstructionState(1);
+            movg->state.predicate = GEN_PREDICATE_NONE;
+            movg->state.noMask = 1;
+            movg->src(0) = GenRegister::flag(insn.state.flag, insn.state.subFlag);
+            movg->dst(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+            insn.append(*movg);
           }
-        } else {
-          // If the instruction use the temporary flag register manually,
-          // we should invalidate the temp flag reg here.
-          if (insn.state.flag == 0 && insn.state.subFlag == 1)
-            validTempFlagReg = 0;
         }
       }
     }
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 6c73f5e..da58805 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -120,7 +120,7 @@ namespace gbe
       this->noMask = 0;
       this->flag = 0;
       this->subFlag = 0;
-      this->grfFlag = 1;
+      this->grfFlag = 0;
       this->externFlag = 0;
       this->modFlag = 0;
       this->flagGen = 0;
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 877d639..6945bfe 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -142,7 +142,7 @@ namespace ir {
     /*! Append a new tuple */
     template <typename... Args> INLINE Tuple tuple(Args...args) {
       GBE_ASSERTM(fn != NULL, "No function currently defined");
-      return fn->file.appendTuple(args...);
+      return fn->makeTuple(args...);
     }
     /*! Make a tuple from an array of register */
     INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) {
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 64d9727..ce7412b 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -563,6 +563,9 @@ namespace ir {
     INLINE bool setUseDeviceEnqueue(bool useDeviceEnqueue) {
       return this->useDeviceEnqueue = useDeviceEnqueue;
     }
+    template <typename... Args> INLINE Tuple makeTuple(Args...args) {
+      return this->file.appendTuple(args...);
+    }
   private:
     friend class Context;           //!< Can freely modify a function
     std::string name;               //!< Function name
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index a9156ff..7525138 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2290,7 +2290,7 @@ END_FUNCTION(Instruction, Register)
 #if GBE_DEBUG
     const RegisterData oldData = this->getDstData(dstID);
     const RegisterData newData = fn.getRegisterData(reg);
-    GBE_ASSERT(oldData.family == newData.family);
+    GBE_ASSERT(oldData.family == newData.family || oldData.family == FAMILY_BOOL || oldData.family == FAMILY_BOOL_BIT);
 #endif /* GBE_DEBUG */
     const Opcode op = this->getOpcode();
     switch (op) {
@@ -2339,6 +2339,15 @@ END_FUNCTION(Instruction, Register)
       *new_ins = insn;
   }
 
+  void Instruction::insertbefore(Instruction *next, Instruction ** new_ins) {
+    Function &fn = next->getFunction();
+    Instruction *insn = fn.newInstruction(*this);
+    insn->parent = next->parent;
+    prepend(insn, next);
+    if (new_ins)
+      *new_ins = insn;
+  }
+
   bool Instruction::hasSideEffect(void) const {
     return opcode == OP_STORE ||
            opcode == OP_TYPED_WRITE ||
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 8685dd4..9dd775b 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -209,6 +209,8 @@ namespace ir {
     void remove(void);
     /* Insert the instruction after the previous one. */
     void insert(Instruction *prev, Instruction ** new_ins = NULL);
+    /* Insert the instruction before the next one. */
+    void insertbefore(Instruction *next, Instruction ** new_ins = NULL);
     void setDBGInfo(DebugInfo in) { DBGInfo = in; }
     /*! Indicates if the instruction belongs to instruction type T. Typically, T
      *  can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on
diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp
index 1e78722..ec59e61 100644
--- a/backend/src/ir/register.cpp
+++ b/backend/src/ir/register.cpp
@@ -38,6 +38,8 @@ namespace ir {
       case FAMILY_OWORD: return out << "oword";
       case FAMILY_HWORD: return out << "hword";
       case FAMILY_REG: return out << "reg";
+      case FAMILY_BOOL_BIT: return out << "boolbit";
+      case FAMILY_BOOL_UW: return out << "booluw";
     };
     return out;
   }
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 09af24e..e2194dd 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -48,11 +48,13 @@ namespace ir {
     FAMILY_QWORD = 4,
     FAMILY_OWORD = 5,
     FAMILY_HWORD = 6,
-    FAMILY_REG   = 7
+    FAMILY_REG   = 7,
+    FAMILY_BOOL_BIT = 8,
+    FAMILY_BOOL_UW  = 9
   };
 
   INLINE char getFamilyName(RegisterFamily family) {
-    static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R'};
+    static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R', 'T', 'U'};
     return registerFamilyName[family];
   }
 
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 9954021..c8e29c5 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -705,6 +705,8 @@ namespace gbe
                                  map <ir::Register, ir::Register> &redundantPhiCopyMap);
     /*! Will try to remove redundants LOADI in basic blocks */
     void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn);
+    /*! Will fix bool values live out basic blocks */
+    void fixBools(const ir::Liveness &liveness, ir::Function &fn);
     /*! To avoid lost copy, we need two values for PHI. This function create a
      * fake value for the copy (basically ptr+1)
      */
@@ -2919,6 +2921,260 @@ namespace gbe
     });
   }
 
+  INLINE ir::Register getRegFromMap(ir::Function &fn, map<ir::Register, ir::Register> &map, ir::Register reg, ir::RegisterFamily family = ir::FAMILY_BOOL_BIT)
+  {
+    auto it = map.find(reg);
+    ir::Register ret;
+    if (it != map.end())
+      ret = it->second;
+    else
+    {
+      ret = fn.newRegister(family);
+      map.insert(std::make_pair(reg, ret));
+    }
+    return ret;
+  }
+
+  void GenWriter::fixBools(const ir::Liveness &liveness, ir::Function &fn)
+  {
+    // We have two kinds of helper register BOOL_BIT adn BOOL_UW for BOOL registers
+    // BOOL_BIT use per bit for a channel's flag thus the same size as flag register
+    // BOOL_UW use a UW for a channel's flag thus same as UINT16 register.
+    // We store these helper register as well as all bool registers and PHI bool registers.
+    set<ir::Register> boolRegs;
+    set<ir::Register> boolPHIs;
+    map<ir::Register, ir::Register> boolBits;
+    map<ir::Register, ir::Register> boolUWs;
+
+    // Traverse all blocks and insert helper registers to help handle bool operation.
+    fn.foreachBlock([&](ir::BasicBlock &bb)
+    {
+      // Liveinfo helps us to know if the bool value outlives the block
+      const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb);
+
+      // Top bottom traversal to handle all of the bool registers
+      bb.foreach([&](ir::Instruction &insn)
+      {
+        bool isChanged = false;
+        ir::Opcode op = insn.getOpcode();
+
+        // Handle compare that generate the bool values
+        if (insn.isMemberOf<ir::CompareInstruction>()) {
+          ir::Register reg = insn.getDst(0);
+          if (info.inLiveOut(reg))
+          {
+            GBE_ASSERT(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL);
+            ir::Register boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT);
+            ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, reg);
+            mov.insert(&insn);
+            boolBits.insert(std::make_pair(reg, boolBit));
+          }
+        }
+
+        // Replace the bool calculate with helper register.
+        if (op == ir::OP_OR || op == ir::OP_AND ||
+            op == ir::OP_XOR)
+        {
+          ir::Register dst = insn.getDst(0);
+          ir::Register src0 = insn.getSrc(0);
+          ir::Register src1 = insn.getSrc(1);
+          if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL)
+          {
+            // In simple case, we could use BOOL_BIT to calculate bool values,
+            // but if the bool value is a PHI value thus it will be changed in
+            // differnt block and not all channal can be changed thus we need a
+            // UW register to enable channel mask.
+            // TODO: Now we use BOOL_UW to handle all cases for there will be
+            // some error when there are too many layers of IF branch.
+            if (boolPHIs.find(dst) != boolPHIs.end()  || 1 ||
+                boolPHIs.find(src0) != boolPHIs.end() ||
+                boolPHIs.find(src1) != boolPHIs.end())
+            {
+              auto it0 = boolUWs.find(src0);
+              ir::Register boolUW0;
+              if (it0 != boolUWs.end())
+                boolUW0 = it0->second;
+              else
+              {
+                boolUW0 = fn.newRegister(ir::FAMILY_BOOL_UW);
+                boolUWs.insert(std::make_pair(src0, boolUW0));
+                ir::Register flag = src0;
+                auto it = boolBits.find(src0);
+                if (it != boolBits.end())
+                {
+                  ir::Register boolBit = it->second;
+                  ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit);
+                  mov.insertbefore(&insn);
+                }
+                ir::Register zeroReg = ir::Register(ir::ocl::zero);
+                ir::Register oneReg = ir::Register(ir::ocl::one);
+                const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg);
+                ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW0 ,index);
+                sel.insertbefore(&insn);
+              }
+
+              auto it1 = boolUWs.find(src1);
+              ir::Register boolUW1;
+              if (it1 != boolUWs.end())
+                boolUW1 = it1->second;
+              else
+              {
+                boolUW1 = fn.newRegister(ir::FAMILY_BOOL_UW);
+                boolUWs.insert(std::make_pair(src1, boolUW1));
+                ir::Register flag = src1;
+                auto it = boolBits.find(src1);
+                if (it != boolBits.end())
+                {
+                  ir::Register boolBit = it->second;
+                  ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit);
+                  mov.insertbefore(&insn);
+                }
+                ir::Register zeroReg = ir::Register(ir::ocl::zero);
+                ir::Register oneReg = ir::Register(ir::ocl::one);
+                const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg);
+                ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW1 ,index);
+                sel.insertbefore(&insn);
+              }
+
+              auto it = boolUWs.find(dst);
+              ir::Register boolUW;
+              if (it != boolUWs.end())
+                boolUW = it->second;
+              else
+              {
+                boolUW = fn.newRegister(ir::FAMILY_BOOL_UW);
+                boolUWs.insert(std::make_pair(dst, boolUW));
+              }
+
+              ir::Register boolBit= getRegFromMap(fn, boolBits, dst);
+              ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, dst);
+              mov.insert(&insn);
+              // USE CMP to get UW ==> flag
+              ir::Register zeroReg = ir::Register(ir::ocl::zero);
+              ir::Instruction cmp = ir::NE(ir::TYPE_U16, dst, boolUW ,zeroReg);
+              cmp.insert(&insn);
+
+              if (op == ir::OP_OR)
+              {
+                ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+                newinsn.replace(&insn);
+              }
+              else if (op == ir::OP_AND)
+              {
+                ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+                newinsn.replace(&insn);
+              }
+              else if (op == ir::OP_XOR)
+              {
+                ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+                newinsn.replace(&insn);
+              }
+              else
+                GBE_ASSERT(0 && "UNSOPPORTED");
+
+            }
+            else
+            {
+              auto it = boolBits.find(dst);
+              ir::Register boolBit;
+              if (it != boolBits.end())
+                boolBit = it->second;
+              else
+                boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT);
+              boolBits.insert(std::make_pair(dst, boolBit));
+
+              auto it0 = boolBits.find(src0);
+              ir::Register boolBit0;
+              if (it != boolBits.end())
+                boolBit0 = it0->second;
+              else
+              {
+                boolBit0 = fn.newRegister(ir::FAMILY_BOOL_BIT);
+                ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit0, src0);
+                mov.insertbefore(&insn);
+                boolBits.insert(std::make_pair(src0, boolBit0));
+              }
+
+              auto it1 = boolBits.find(src1);
+              ir::Register boolBit1;
+              if (it != boolBits.end())
+                boolBit1 = it1->second;
+              else
+              {
+                boolBit1 = fn.newRegister(ir::FAMILY_BOOL_BIT);
+                ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit1, src1);
+                mov.insertbefore(&insn);
+                boolBits.insert(std::make_pair(src1, boolBit1));
+              }
+
+              if (op == ir::OP_OR)
+              {
+                ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+                newinsn.replace(&insn);
+              }
+              else if (op == ir::OP_AND)
+              {
+                ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+                newinsn.replace(&insn);
+              }
+              else if (op == ir::OP_XOR)
+              {
+                ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+                newinsn.replace(&insn);
+              }
+              else
+                GBE_ASSERT(0 && "UNSOPPORTED");
+            }
+            isChanged = true;
+          }
+        }
+
+        if (insn.getOpcode() == ir::OP_MOV)
+        {
+          ir::Register dst = insn.getDst(0);
+          ir::Register src = insn.getSrc(0);
+          if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL &&
+             fn.getRegisterFamily(src) == ir::FAMILY_BOOL)
+          {
+            boolPHIs.insert(dst);
+            ir::Register boolBit= getRegFromMap(fn, boolBits, dst);
+            ir::Register boolBit0= getRegFromMap(fn, boolBits, src);
+             ir::Instruction newinsn = ir::MOV(ir::TYPE_U16, boolBit, boolBit0);
+            newinsn.replace(&insn);
+            isChanged = true;
+          }
+        }
+
+        if (insn.getOpcode() == ir::OP_LOADI)
+        {
+          ir::Register reg = insn.getDst(0);
+          if(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL) {
+            ir::Register boolBit = getRegFromMap(fn, boolBits, reg);
+            replaceDst(&insn, reg, boolBit);
+          }
+        }
+
+        // Convert BOOL_BIT into BOOL
+        for (uint32_t i = 0; i < insn.getSrcNum(); ++i)
+        {
+          if (isChanged) break;
+          ir::Register reg = insn.getSrc(i);
+          if (fn.getRegisterFamily(reg) != ir::FAMILY_BOOL)
+            continue;
+          if (!(insn.getOpcode() == ir::OP_BRA || insn.getOpcode() == ir::OP_SEL))
+            continue;
+          auto it = boolBits.find(reg);
+          if (it == boolBits.end())
+            break;
+          ir::Register boolBit = it->second;
+          ir::Instruction mov = ir::MOV(ir::TYPE_U16, reg, boolBit);
+          mov.insertbefore(&insn);
+        }
+      });
+    });
+  }
+
+
   BVAR(OCL_OPTIMIZE_PHI_MOVES, true);
   BVAR(OCL_OPTIMIZE_LOADI, true);
 
@@ -3221,6 +3477,7 @@ namespace gbe
       this->postPhiCopyOptimization(liveness, fn, replaceMap, redundantPhiCopyMap);
       this->removeMOVs(liveness, fn);
     }
+    this->fixBools(liveness, fn);
   }
 
   void GenWriter::regAllocateReturnInst(ReturnInst &I) {}
-- 
2.7.4



More information about the Beignet mailing list