[Beignet] [PATCH 3/5] GBE: don't treat btiUtil as a curbe payload register.

Zhigang Gong zhigang.gong at intel.com
Sun Sep 13 23:19:34 PDT 2015


Btiutil should be just a normal temporary register and only
alive for those specific laod/store instructions with mixed
BTI used.

Although btiutil only takes one DW register space, but in
practice, it may waste one entire 32-byte register space
as it has very long live range.

This patch fix this issue completely.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen8_context.cpp       |  10 +-
 backend/src/backend/gen_context.cpp        |  47 +++++----
 backend/src/backend/gen_context.hpp        |   4 +-
 backend/src/backend/gen_insn_selection.cpp | 156 +++++++++++++++++------------
 backend/src/backend/gen_reg_allocation.cpp |   2 -
 backend/src/backend/program.h              |   1 -
 backend/src/ir/profile.cpp                 |   4 +-
 backend/src/ir/profile.hpp                 |   3 +-
 8 files changed, 128 insertions(+), 99 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index b497ee5..7e51963 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -854,9 +854,10 @@ namespace gbe
       p->UNTYPED_READ(dst, src, bti, 2*elemNum);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(2*elemNum));
+      const GenRegister btiTmp = ra->genReg(insn.dst(2*elemNum + 1));
       unsigned desc = p->generateUntypedReadMessageDesc(0, 2*elemNum);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -864,7 +865,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), 2*elemNum);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
 
     for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
@@ -893,9 +894,10 @@ namespace gbe
       p->UNTYPED_WRITE(addr, bti, elemNum*2);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+      const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
       unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -903,7 +905,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
   void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index ae02fbe..5980db2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1769,16 +1769,17 @@ namespace gbe
       p->ATOMIC(dst, function, src, bti, srcNum);
     } else {
       GenRegister flagTemp = ra->genReg(insn.dst(1));
+      GenRegister btiTmp = ra->genReg(insn.dst(2));
 
       unsigned desc = p->generateAtomicMessageDesc(function, 0, srcNum);
 
-      unsigned jip0 = beforeMessage(insn, bti, flagTemp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, flagTemp, btiTmp, desc);
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->ATOMIC(dst, function, src, GenRegister::addr1(0), srcNum);
       p->pop();
-      afterMessage(insn, bti, flagTemp, jip0);
+      afterMessage(insn, bti, flagTemp, btiTmp, jip0);
     }
   }
 
@@ -1920,9 +1921,10 @@ namespace gbe
       p->UNTYPED_READ(dst, src, bti, elemNum);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+      const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
       unsigned desc = p->generateUntypedReadMessageDesc(0, elemNum);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -1930,17 +1932,17 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), elemNum);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
-  unsigned GenContext::beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, unsigned desc) {
+  unsigned GenContext::beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, GenRegister btiTmp, unsigned desc) {
       const GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
       setFlag(flagReg, GenRegister::immuw(0));
       p->CMP(GEN_CONDITIONAL_NZ, flagReg, GenRegister::immuw(1));
 
-      GenRegister btiUD = ra->genReg(GenRegister::ud1grf(ir::ocl::btiUtil));
-      GenRegister btiUW = ra->genReg(GenRegister::uw1grf(ir::ocl::btiUtil));
-      GenRegister btiUB = ra->genReg(GenRegister::ub1grf(ir::ocl::btiUtil));
+      GenRegister btiUD = GenRegister::retype(btiTmp, GEN_TYPE_UD);
+      GenRegister btiUW = GenRegister::retype(btiTmp, GEN_TYPE_UW);
+      GenRegister btiUB = GenRegister::retype(btiTmp, GEN_TYPE_UB);
       unsigned jip0 = p->n_instruction();
       p->push();
         p->curr.execWidth = 1;
@@ -1963,8 +1965,8 @@ namespace gbe
       p->pop();
       return jip0;
   }
-  void GenContext::afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, unsigned jip0) {
-    const GenRegister btiUD = ra->genReg(GenRegister::ud1grf(ir::ocl::btiUtil));
+  void GenContext::afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, GenRegister btiTmp, unsigned jip0) {
+    const GenRegister btiUD = GenRegister::retype(btiTmp, GEN_TYPE_UD);
       //restore flag
       setFlag(GenRegister::flag(insn.state.flag, insn.state.subFlag), tmp);
       // get active channel
@@ -1988,9 +1990,10 @@ namespace gbe
       p->UNTYPED_READ(dst, src, bti, elemNum);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+      const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
       unsigned desc = p->generateUntypedReadMessageDesc(0, elemNum);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -1998,7 +2001,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), elemNum);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
 
@@ -2011,9 +2014,10 @@ namespace gbe
       p->UNTYPED_WRITE(src, bti, elemNum*2);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
+      const GenRegister btiTmp = ra->genReg(insn.dst(1));
       unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -2021,7 +2025,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
 
@@ -2033,9 +2037,10 @@ namespace gbe
       p->UNTYPED_WRITE(src, bti, elemNum);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
+      const GenRegister btiTmp = ra->genReg(insn.dst(1));
       unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -2043,7 +2048,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
 
@@ -2057,9 +2062,10 @@ namespace gbe
       p->BYTE_GATHER(dst, src, bti, elemSize);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(1));
+      const GenRegister btiTmp = ra->genReg(insn.dst(2));
       unsigned desc = p->generateByteGatherMessageDesc(0, elemSize);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -2067,7 +2073,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->BYTE_GATHER(dst, src, GenRegister::addr1(0), elemSize);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
 
@@ -2080,9 +2086,10 @@ namespace gbe
       p->BYTE_SCATTER(src, bti, elemSize);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
+      const GenRegister btiTmp = ra->genReg(insn.dst(1));
       unsigned desc = p->generateByteScatterMessageDesc(0, elemSize);
 
-      unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+      unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
       //predicated load
       p->push();
@@ -2090,7 +2097,7 @@ namespace gbe
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
         p->BYTE_SCATTER(src, GenRegister::addr1(0), elemSize);
       p->pop();
-      afterMessage(insn, bti, tmp, jip0);
+      afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
 
   }
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index b03097e..155b68e 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -175,8 +175,8 @@ namespace gbe
     virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
     void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
-    unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned desc);
-    void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned jip0);
+    unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned desc);
+    void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);
 
     /*! Implements base class */
     virtual Kernel *allocateKernel(void);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 57dbec9..d258beb 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -620,19 +620,19 @@ namespace gbe
     /*! Wait instruction (used for the barrier) */
     void WAIT(void);
     /*! Atomic instruction */
-    void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, GenRegister *flagTemp);
+    void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
     /*! Read 64 bits float/int array */
-    void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, GenRegister *flagTemp);
+    void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
     /*! Write 64 bits float/int array */
-    void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, GenRegister *flagTemp);
+    void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, vector<GenRegister> temps);
     /*! Untyped read (up to 4 elements) */
-    void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, GenRegister *flagTemp);
+    void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
     /*! Untyped write (up to 4 elements) */
-    void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, GenRegister bti, GenRegister *flagTemp);
+    void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
     /*! Byte gather (for unaligned bytes, shorts and ints) */
-    void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp);
+    void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, vector<GenRegister> temps);
     /*! Byte scatter (for unaligned bytes, shorts and ints) */
-    void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp);
+    void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, vector <GenRegister> temps);
     /*! DWord scatter (for constant cache read) */
     void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
     /*! Unpack the uint to charN */
@@ -736,6 +736,15 @@ namespace gbe
             GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
     }
 
+    INLINE vector<GenRegister> getBTITemps(const ir::BTI &bti) {
+      vector<GenRegister> temps;
+      if (!bti.isConst) {
+        temps.push_back(selReg(reg(ir::FAMILY_WORD, true), ir::TYPE_U16));
+        temps.push_back(selReg(reg(ir::FAMILY_DWORD, true), ir::TYPE_U32));
+      }
+      return temps;
+    }
+
     /*! Use custom allocators */
     GBE_CLASS(Opaque);
     friend class SelectionBlock;
@@ -1228,9 +1237,10 @@ namespace gbe
   }
 
   void Selection::Opaque::ATOMIC(Reg dst, uint32_t function,
-                                     uint32_t srcNum, Reg src0,
-                                     Reg src1, Reg src2, GenRegister bti, GenRegister *flagTemp) {
-    unsigned dstNum = flagTemp == NULL ? 1 : 2;
+                                 uint32_t srcNum, Reg src0,
+                                 Reg src1, Reg src2, GenRegister bti,
+                                 vector<GenRegister> temps) {
+    unsigned dstNum = 1 + temps.size();
     SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, srcNum + 1);
 
     if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1239,7 +1249,10 @@ namespace gbe
     }
 
     insn->dst(0) = dst;
-    if(flagTemp) insn->dst(1) = *flagTemp;
+    if(temps.size()) {
+      insn->dst(1) = temps[0];
+      insn->dst(2) = temps[1];
+    }
 
     insn->src(0) = src0;
     if(srcNum > 1) insn->src(1) = src1;
@@ -1265,14 +1278,14 @@ namespace gbe
                                  uint32_t elemNum,
                                  const GenRegister bti,
                                  bool native_long,
-                                 GenRegister *flagTemp)
+                                 vector<GenRegister> temps)
   {
     SelectionInstruction *insn = NULL;
     SelectionVector *srcVector = NULL;
     SelectionVector *dstVector = NULL;
 
     if (!native_long) {
-      unsigned dstNum = flagTemp == NULL ? elemNum : elemNum+1;
+      unsigned dstNum = elemNum + temps.size();
       insn = this->appendInsn(SEL_OP_READ64, dstNum, 2);
       srcVector = this->appendVector();
       dstVector = this->appendVector();
@@ -1281,10 +1294,12 @@ namespace gbe
         insn->dst(elemID) = dst[elemID];
 
       // flagTemp don't need to be put in SelectionVector
-      if (flagTemp)
-        insn->dst(elemNum) = *flagTemp;
+      if (temps.size()) {
+        insn->dst(elemNum) = temps[0];
+        insn->dst(elemNum + 1) = temps[1];
+      }
     } else {
-      unsigned dstNum = flagTemp == NULL ? elemNum*2 : elemNum*2+1;
+      unsigned dstNum = elemNum*2 + temps.size();
       insn = this->appendInsn(SEL_OP_READ64, dstNum, 2);
       srcVector = this->appendVector();
       dstVector = this->appendVector();
@@ -1296,8 +1311,10 @@ namespace gbe
         insn->dst(elemID + elemNum) = dst[elemID];
 
       // flagTemp don't need to be put in SelectionVector
-      if (flagTemp)
-        insn->dst(2*elemNum) = *flagTemp;
+      if (temps.size()) {
+        insn->dst(2*elemNum) = temps[0];
+        insn->dst(2*elemNum + 1) = temps[1];
+      }
     }
 
     if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1325,9 +1342,9 @@ namespace gbe
                                        const GenRegister *dst,
                                        uint32_t elemNum,
                                        GenRegister bti,
-                                       GenRegister *flagTemp)
+                                       vector<GenRegister> temps)
   {
-    unsigned dstNum = flagTemp == NULL ? elemNum : elemNum+1;
+    unsigned dstNum = elemNum + temps.size();
     SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READ, dstNum, 2);
     SelectionVector *srcVector = this->appendVector();
     SelectionVector *dstVector = this->appendVector();
@@ -1336,8 +1353,10 @@ namespace gbe
     // Regular instruction to encode
     for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
       insn->dst(elemID) = dst[elemID];
-    if (flagTemp)
-      insn->dst(elemNum) = *flagTemp;
+    if (temps.size()) {
+      insn->dst(elemNum) = temps[0];
+      insn->dst(elemNum + 1) = temps[1];
+    }
 
     insn->src(0) = addr;
     insn->src(1) = bti;
@@ -1366,13 +1385,13 @@ namespace gbe
                                   uint32_t srcNum,
                                   GenRegister bti,
                                   bool native_long,
-                                  GenRegister *flagTemp)
+                                  vector<GenRegister> temps)
   {
     SelectionVector *vector = NULL;
     SelectionInstruction *insn = NULL;
 
     if (!native_long) {
-      unsigned dstNum = flagTemp == NULL ? 0 : 1;
+      unsigned dstNum = temps.size();
       insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 2);
       vector = this->appendVector();
       // Register layout:
@@ -1383,8 +1402,10 @@ namespace gbe
         insn->src(elemID + 1) = src[elemID];
 
       insn->src(srcNum+1) = bti;
-      if (flagTemp)
-        insn->dst(0) = *flagTemp;
+      if (temps.size()) {
+        insn->dst(0) = temps[0];
+        insn->dst(1) = temps[1];
+      }
       insn->extra.elem = srcNum;
 
       vector->regNum = srcNum + 1;
@@ -1392,7 +1413,7 @@ namespace gbe
       vector->reg = &insn->src(0);
       vector->isSrc = 1;
     } else { // handle the native long case
-      unsigned dstNum = flagTemp == NULL ? srcNum : srcNum+1;
+      unsigned dstNum = srcNum + temps.size();
       // Register layout:
       // dst: srcNum, (flagTemp)
       // src: srcNum, addr, srcNum, bti.
@@ -1412,8 +1433,10 @@ namespace gbe
       for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
         insn->dst(elemID) = tmp[0];
 
-      if (flagTemp)
-        insn->dst(srcNum) = *flagTemp;
+      if (temps.size()) {
+        insn->dst(srcNum) = temps[0];
+        insn->dst(srcNum + 1) = temps[1];
+      }
       insn->extra.elem = srcNum;
 
       vector->regNum = srcNum + 1;
@@ -1432,10 +1455,11 @@ namespace gbe
                                         const GenRegister *src,
                                         uint32_t elemNum,
                                         GenRegister bti,
-                                        GenRegister *flagTemp)
+                                        vector<GenRegister> temps)
   {
-    unsigned dstNum = flagTemp == NULL ? 0 : 1;
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, elemNum+2);
+    unsigned dstNum = temps.size();
+    unsigned srcNum = elemNum + 2 + temps.size();
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, srcNum);
     SelectionVector *vector = this->appendVector();
 
     if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1443,14 +1467,17 @@ namespace gbe
       insn->state.subFlag = 1;
     }
 
-    if (flagTemp) insn->dst(0) = *flagTemp;
     // Regular instruction to encode
     insn->src(0) = addr;
     for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
       insn->src(elemID+1) = src[elemID];
     insn->src(elemNum+1) = bti;
-    if (flagTemp)
-      insn->src(elemNum+2) = *flagTemp;
+    if (temps.size()) {
+      insn->dst(0) = temps[0];
+      insn->dst(1) = temps[1];
+      insn->src(elemNum + 2) = temps[0];
+      insn->src(elemNum + 3) = temps[1];
+    }
     insn->extra.elem = elemNum;
 
     // Sends require contiguous allocation for the sources
@@ -1460,8 +1487,11 @@ namespace gbe
     vector->isSrc = 1;
   }
 
-  void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp) {
-    unsigned dstNum = flagTemp == NULL ? 1 : 2;
+  void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr,
+                                      uint32_t elemSize,
+                                      GenRegister bti,
+                                      vector<GenRegister> temps) {
+    unsigned dstNum = 1 + temps.size();
     SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_GATHER, dstNum, 2);
     SelectionVector *srcVector = this->appendVector();
     SelectionVector *dstVector = this->appendVector();
@@ -1477,8 +1507,10 @@ namespace gbe
     insn->src(0) = addr;
     insn->src(1) = bti;
     insn->dst(0) = dst;
-    if (flagTemp)
-      insn->dst(1) = *flagTemp;
+    if (temps.size()) {
+      insn->dst(1) = temps[0];
+      insn->dst(2) = temps[1];
+    }
 
     insn->extra.elem = elemSize;
 
@@ -1494,8 +1526,9 @@ namespace gbe
     srcVector->reg = &insn->src(0);
   }
 
-  void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp) {
-    unsigned dstNum = flagTemp == NULL ? 0 : 1;
+  void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize,
+                                       GenRegister bti, vector<GenRegister> temps) {
+    unsigned dstNum = temps.size();
     SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_SCATTER, dstNum, 3);
     SelectionVector *vector = this->appendVector();
 
@@ -1504,8 +1537,10 @@ namespace gbe
       insn->state.subFlag = 1;
     }
 
-    if (flagTemp)
-      insn->dst(0) = *flagTemp;
+    if (temps.size()) {
+      insn->dst(0) = temps[0];
+      insn->dst(1) = temps[1];
+    }
     // Instruction to encode
     insn->src(0) = addr;
     insn->src(1) = src;
@@ -3321,8 +3356,7 @@ namespace gbe
         //GenRegister temp = getRelativeAddress(sel, addr, sel.selReg(bti.base, ir::TYPE_U32));
 
         GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
-        GenRegister tmp = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
-        sel.UNTYPED_READ(addr, dst.data(), valueNum, b, bti.isConst ? NULL : &tmp);
+        sel.UNTYPED_READ(addr, dst.data(), valueNum, b, sel.getBTITemps(bti));
     }
 
     void emitUntypedRead(Selection::Opaque &sel,
@@ -3383,7 +3417,6 @@ namespace gbe
       GBE_ASSERT(bti.isConst == 1);
       vector<GenRegister> dst(valueNum);
       GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
-      GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
       for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
         dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
 
@@ -3393,9 +3426,9 @@ namespace gbe
           tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
         }
 
-        sel.READ64(addr, dst.data(), tmp.data(), valueNum, b, true, bti.isConst ? NULL : &tmpFlag);
+        sel.READ64(addr, dst.data(), tmp.data(), valueNum, b, true, sel.getBTITemps(bti));
       } else {
-        sel.READ64(addr, dst.data(), NULL, valueNum, b, false, bti.isConst ? NULL : &tmpFlag);
+        sel.READ64(addr, dst.data(), NULL, valueNum, b, false, sel.getBTITemps(bti));
       }
     }
 
@@ -3412,7 +3445,6 @@ namespace gbe
         GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
 
         GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
-        GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
 
         // Get dword aligned addr
         sel.push();
@@ -3425,7 +3457,7 @@ namespace gbe
         sel.push();
           if (isUniform)
             sel.curr.noMask = 1;
-          sel.UNTYPED_READ(tmpAddr, &tmpData, 1, b, bti.isConst ? NULL : &tmpFlag);
+          sel.UNTYPED_READ(tmpAddr, &tmpData, 1, b, sel.getBTITemps(bti));
 
           if (isUniform)
             sel.curr.execWidth = 1;
@@ -3593,14 +3625,13 @@ namespace gbe
           readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
         else {
           GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
-          GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
 
           // We need a temporary register if we read bytes or words
           Register dst = sel.reg(FAMILY_DWORD, isUniform);
           sel.push();
             if (isUniform)
               sel.curr.noMask = 1;
-            sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address, elemSize, b, bti.isConst ? NULL : & tmpFlag);
+            sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address, elemSize, b, sel.getBTITemps(bti));
           sel.pop();
 
           sel.push();
@@ -3691,6 +3722,7 @@ namespace gbe
       return true;
     }
   };
+
   class StoreInstructionPattern : public SelectionPattern
   {
   public:
@@ -3705,13 +3737,12 @@ namespace gbe
     {
       using namespace ir;
       const uint32_t valueNum = insn.getValueNum();
-      vector<GenRegister> value(valueNum);
+      vector<GenRegister> value(valueNum), tmps;
       GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
 
       for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
         value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_UD);
-      GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
-      sel.UNTYPED_WRITE(address, value.data(), valueNum, b, bti.isConst? NULL : &tmp);
+      sel.UNTYPED_WRITE(address, value.data(), valueNum, b, sel.getBTITemps(bti));
     }
 
     void emitWrite64(Selection::Opaque &sel,
@@ -3729,16 +3760,14 @@ namespace gbe
       for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
         src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
 
-      GenRegister tmpFlag = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
-
       if (sel.hasLongType()) {
         vector<GenRegister> tmp(valueNum);
         for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
           tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
         }
-        sel.WRITE64(address, src.data(), tmp.data(), valueNum, b, true, bti.isConst? NULL : &tmpFlag);
+        sel.WRITE64(address, src.data(), tmp.data(), valueNum, b, true, sel.getBTITemps(bti));
       } else {
-        sel.WRITE64(address, src.data(), NULL, valueNum, b, false, bti.isConst? NULL : &tmpFlag);
+        sel.WRITE64(address, src.data(), NULL, valueNum, b, false, sel.getBTITemps(bti));
       }
     }
 
@@ -3753,7 +3782,6 @@ namespace gbe
       uint32_t valueNum = insn.getValueNum();
 
       GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
-      GenRegister tmpFlag = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
       if(valueNum > 1) {
         const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
         vector<GenRegister> value(valueNum);
@@ -3773,7 +3801,7 @@ namespace gbe
           sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize, 4/typeSize);
         }
 
-        sel.UNTYPED_WRITE(address, tmp.data(), tmpRegNum, b, bti.isConst ? NULL : &tmpFlag);
+        sel.UNTYPED_WRITE(address, tmp.data(), tmpRegNum, b, sel.getBTITemps(bti));
       } else {
         const GenRegister value = sel.selReg(insn.getValue(0));
         GBE_ASSERT(insn.getValueNum() == 1);
@@ -3790,7 +3818,7 @@ namespace gbe
           else if (elemSize == GEN_BYTE_SCATTER_BYTE)
             sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UB));
         sel.pop();
-        sel.BYTE_SCATTER(address, tmp, elemSize, b, bti.isConst ? NULL : &tmpFlag);
+        sel.BYTE_SCATTER(address, tmp, elemSize, b, sel.getBTITemps(bti));
       }
     }
 
@@ -4558,11 +4586,9 @@ namespace gbe
       if(srcNum > 2) src1 = sel.selReg(insn.getSrc(2), TYPE_U32);
       if(srcNum > 3) src2 = sel.selReg(insn.getSrc(3), TYPE_U32);
 
-      GenRegister flagTemp = sel.selReg(sel.reg(FAMILY_WORD, true), TYPE_U16);
-
       GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
 
-      sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti, b.isConst ? NULL : &flagTemp);
+      sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti, sel.getBTITemps(b));
 
       // for fixed bti, don't generate the useless loadi
       if (insn.isFixedBTI())
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index c3d5e29..4430ca5 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1039,8 +1039,6 @@ namespace gbe
         // FIXME stack buffer is not used, we may need to remove it in the furture.
         if (curbeType == GBE_CURBE_EXTRA_ARGUMENT && subType == GBE_STACK_BUFFER)
           intervals[regID].maxID = 1;
-        if (curbeType == GBE_CURBE_BTI_UTIL)
-          intervals[regID].maxID = INT_MAX;
       }
     }
 
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 0ba0bd5..0758820 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -100,7 +100,6 @@ enum gbe_curbe_type {
   GBE_CURBE_THREAD_NUM,
   GBE_CURBE_ZERO,
   GBE_CURBE_ONE,
-  GBE_CURBE_BTI_UTIL,
   GBE_GEN_REG,
 };
 
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 75522eb..484e82d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -43,8 +43,7 @@ namespace ir {
         "zero", "one",
         "retVal",
         "printf_buffer_pointer", "printf_index_buffer_pointer",
-        "dwblockip",
-        "bti_utility"
+        "dwblockip"
     };
 
 #if GBE_DEBUG
@@ -87,7 +86,6 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
       DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, GBE_CURBE_DW_BLOCK_IP);
-      DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1, GBE_CURBE_BTI_UTIL);
     }
 #undef DECL_NEW_REG
 
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index b3f2a21..a8445c4 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -71,8 +71,7 @@ namespace ir {
     static const Register printfbptr = Register(27); // printf buffer address .
     static const Register printfiptr = Register(28); // printf index buffer address.
     static const Register dwblockip = Register(29);  // blockip
-    static const Register btiUtil = Register(30);  // used for mixed pointer as bti utility.
-    static const uint32_t regNum = 31;             // number of special registers
+    static const uint32_t regNum = 30;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
-- 
1.9.1



More information about the Beignet mailing list