[Beignet] [PATCH 6/8] GBE: Extend front label ip to 32 bit on demand.

Zhigang Gong zhigang.gong at intel.com
Tue Mar 31 19:05:41 PDT 2015


If the front end label ip exceed 0xffff, then the backend will
use real DW to represent each block's IP address. This is
a dynamic behaviour according to the actual front end's label
number.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/context.cpp            |  2 +
 backend/src/backend/context.hpp            |  8 +++
 backend/src/backend/gen_context.cpp        | 29 ++++++++--
 backend/src/backend/gen_insn_selection.cpp | 89 ++++++++++++++++++++++++------
 backend/src/backend/gen_insn_selection.hpp |  1 -
 backend/src/backend/program.h              |  1 +
 backend/src/ir/function.cpp                |  4 +-
 backend/src/ir/profile.cpp                 |  2 +
 backend/src/ir/profile.hpp                 |  5 +-
 src/cl_command_queue_gen7.c                | 16 ++++--
 10 files changed, 128 insertions(+), 29 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 5e33ddd..59ccc79 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -354,6 +354,8 @@ namespace gbe
     this->kernel = this->allocateKernel();
     this->kernel->simdWidth = this->simdWidth;
     this->buildArgList();
+    if (fn.labelNum() > 0xffff)
+      this->useDWLabel = true;
     if (usedLabels.size() == 0)
       this->buildUsedLabels();
     if (JIPs.size() == 0)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 1b3744b..faa7c8a 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -100,6 +100,13 @@ namespace gbe
     /*! Preallocated curbe register set including special registers. */
     map<ir::Register, uint32_t> curbeRegs;
     ir::Register getSurfaceBaseReg(unsigned char bti);
+    /* Indicate whether we should use DW label or W label in backend.*/
+    bool isDWLabel(void) const {
+      return useDWLabel;
+    }
+    uint32_t getMaxLabel(void) const {
+      return this->isDWLabel() ? 0xffffffff : 0xffff;
+    }
   protected:
     /*! Build the instruction stream. Return false if failed */
     virtual bool emitCode(void) = 0;
@@ -140,6 +147,7 @@ namespace gbe
     set<ir::LabelIndex> usedLabels;       //!< Set of all used labels
     JIPMap JIPs;                          //!< Where to jump all labels/branches
     uint32_t simdWidth;                   //!< Number of lanes per HW threads
+    bool useDWLabel;                      //!< false means using u16 label, true means using u32 label.
     map<unsigned char, ir::Register> btiRegMap;
     GBE_CLASS(Context);                   //!< Use custom allocators
   };
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 3fab9c8..13c7664 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -133,18 +133,36 @@ namespace gbe
     return true;
   }
 
+  /* Get proper block ip register according to current label width. */
+  static GenRegister getBlockIP(GenContext &ctx) {
+    GenRegister blockip;
+    if (!ctx.isDWLabel())
+      blockip = ctx.ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+    else
+      blockip = ctx.ra->genReg(GenRegister::ud8grf(ir::ocl::dwblockip));
+    return blockip;
+  }
+
+  /* Set current block ip register to a specified constant label value. */
+  static void setBlockIP(GenContext &ctx, GenRegister blockip, uint32_t label) {
+    if (!ctx.isDWLabel())
+      ctx.p->MOV(blockip, GenRegister::immuw(label));
+    else
+      ctx.p->MOV(blockip, GenRegister::immud(label));
+  }
+
   void GenContext::clearFlagRegister(void) {
     // when group size not aligned to simdWidth, flag register need clear to
     // make prediction(any8/16h) work correctly
-    const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+    const GenRegister blockip = getBlockIP(*this);
     const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero));
     const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one));
     p->push();
       p->curr.noMask = 1;
       p->curr.predicate = GEN_PREDICATE_NONE;
-      p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL));
+      setBlockIP(*this, blockip, getMaxLabel());
       p->curr.noMask = 0;
-      p->MOV(blockip, GenRegister::immuw(0));
+      setBlockIP(*this, blockip, 0);
       p->curr.execWidth = 1;
       // FIXME, need to get the final use set of zero/one, if there is no user,
       // no need to generate the following two instructions.
@@ -1808,7 +1826,10 @@ namespace gbe
 
     // We insert the block IP mask first
     using namespace ir::ocl;
-    allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+    if (!isDWLabel())
+      allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+    else
+      allocCurbeReg(dwblockip, GBE_CURBE_DW_BLOCK_IP);
     allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
     allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
     allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 27ed11b..e025698 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -633,6 +633,64 @@ namespace gbe
                       SelectionDAG *dag0, SelectionDAG *dag1,
                       GenRegister &src0, GenRegister &src1,
                       ir::Type type, bool &inverse);
+
+    /* Get current block IP register according to label width. */
+    GenRegister getBlockIP() {
+      return ctx.isDWLabel() ? selReg(ir::ocl::dwblockip) : selReg(ir::ocl::blockip);
+    }
+
+    /* Get proper label immediate gen register from label value. */
+    GenRegister getLabelImmReg(uint32_t labelValue) {
+      return ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue);
+    }
+
+    /* Get proper label immediate gen register from label. */
+    GenRegister getLabelImmReg(ir::LabelIndex label) {
+      return getLabelImmReg(label.value());
+    }
+
+    /* Set current label register to a label value. */
+    void setBlockIP(GenRegister blockip, uint32_t labelValue) {
+      if (!ctx.isDWLabel())
+        MOV(GenRegister::retype(blockip, GEN_TYPE_UW), GenRegister::immuw(labelValue));
+      else
+        MOV(GenRegister::retype(blockip, GEN_TYPE_UD), GenRegister::immud(labelValue));
+    }
+
+    /* Generate comparison instruction to compare block ip address and specified label register.*/
+    void cmpBlockIP(uint32_t cond,
+                    GenRegister blockip,
+                    GenRegister labelReg) {
+      if (!ctx.isDWLabel())
+        CMP(cond,
+            GenRegister::retype(blockip, GEN_TYPE_UW),
+            labelReg,
+            GenRegister::retype(GenRegister::null(),
+            GEN_TYPE_UW));
+      else
+        CMP(cond,
+            GenRegister::retype(blockip, GEN_TYPE_UD),
+            labelReg,
+            GenRegister::retype(GenRegister::null(),
+            GEN_TYPE_UD));
+    }
+
+    void cmpBlockIP(uint32_t cond,
+                    GenRegister blockip,
+                    uint32_t labelValue) {
+      if (!ctx.isDWLabel())
+        CMP(cond,
+            GenRegister::retype(blockip, GEN_TYPE_UW),
+            GenRegister::immuw(labelValue),
+            GenRegister::retype(GenRegister::null(),
+            GEN_TYPE_UW));
+      else
+        CMP(cond,
+            GenRegister::retype(blockip, GEN_TYPE_UD),
+            GenRegister::immud(labelValue),
+            GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+    }
+
     /*! Use custom allocators */
     GBE_CLASS(Opaque);
     friend class SelectionBlock;
@@ -3860,10 +3918,10 @@ namespace gbe
     {
       using namespace ir;
       const LabelIndex label = insn.getLabelIndex();
-      const GenRegister src0 = sel.selReg(ocl::blockip);
-      const GenRegister src1 = GenRegister::immuw(label);
+      const GenRegister src0 = sel.getBlockIP();
+      const GenRegister src1 = sel.getLabelImmReg(label);
       const uint32_t simdWidth = sel.ctx.getSimdWidth();
-      GBE_ASSERTM(label < GEN_MAX_LABEL, "We reached the maximum label number which is reserved for barrier handling");
+      GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling");
       sel.LABEL(label);
 
       if(!insn.getParent()->needIf)
@@ -3884,8 +3942,7 @@ namespace gbe
       sel.push();
         sel.curr.noMask = 1;
         sel.curr.predicate = GEN_PREDICATE_NONE;
-        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
-                GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+        sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
       sel.pop();
 
       if (sel.block->hasBarrier) {
@@ -3895,11 +3952,10 @@ namespace gbe
         // this block, as it will always excute with all lanes activated.
         sel.push();
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
-          sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
+          sel.setBlockIP(src0, sel.ctx.getMaxLabel());
           sel.curr.predicate = GEN_PREDICATE_NONE;
           sel.curr.noMask = 1;
-          sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
-                  GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+          sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
           if (simdWidth == 8)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
           else if (simdWidth == 16)
@@ -3914,7 +3970,7 @@ namespace gbe
         // FIXME, if the last BRA is unconditional jump, we don't need to update the label here.
         sel.push();
          sel.curr.predicate = GEN_PREDICATE_NORMAL;
-         sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(label.value()));
+         sel.setBlockIP(src0, label.value());
         sel.pop();
       }
       else {
@@ -4191,7 +4247,7 @@ namespace gbe
                            ir::LabelIndex src) const
     {
       using namespace ir;
-      const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+      const GenRegister ip = sel.getBlockIP();
 
       // We will not emit any jump if we must go the next block anyway
       const BasicBlock *curr = insn.getParent();
@@ -4206,7 +4262,7 @@ namespace gbe
           sel.curr.physicalFlag = 0;
           sel.curr.flagIndex = pred.value();
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
-          sel.MOV(ip, GenRegister::immuw(dst.value()));
+          sel.setBlockIP(ip, dst.value());
           sel.curr.predicate = GEN_PREDICATE_NONE;
           if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
             sel.ENDIF(GenRegister::immd(0), nextLabel);
@@ -4216,7 +4272,7 @@ namespace gbe
         // Update the PcIPs
         const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
         if(insn.getParent()->needEndif)
-          sel.MOV(ip, GenRegister::immuw(dst.value()));
+          sel.setBlockIP(ip, dst.value());
 
         if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
           if(insn.getParent()->needEndif && !insn.getParent()->needIf)
@@ -4242,7 +4298,8 @@ namespace gbe
                             ir::LabelIndex src) const
     {
       using namespace ir;
-      const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+      //const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+      const GenRegister ip = sel.getBlockIP();
       const Function &fn = sel.ctx.getFunction();
       const BasicBlock &bb = fn.getBlock(src);
       const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
@@ -4257,13 +4314,13 @@ namespace gbe
         // block. Next instruction will properly update the IPs of the lanes
         // that actually take the branch
         const LabelIndex next = bb.getNextBlock()->getLabelIndex();
-        sel.MOV(ip, GenRegister::immuw(next.value()));
+        sel.setBlockIP(ip, next.value());
         GBE_ASSERT(jip == dst);
         sel.push();
           sel.curr.physicalFlag = 0;
           sel.curr.flagIndex = pred.value();
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
-          sel.MOV(ip, GenRegister::immuw(dst.value()));
+          sel.setBlockIP(ip, dst.value());
           sel.block->endifOffset = -1;
           sel.curr.predicate = GEN_PREDICATE_NONE;
           if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
@@ -4280,7 +4337,7 @@ namespace gbe
         const LabelIndex next = bb.getNextBlock()->getLabelIndex();
         // Update the PcIPs
         if(insn.getParent()->needEndif)
-          sel.MOV(ip, GenRegister::immuw(dst.value()));
+        sel.setBlockIP(ip, dst.value());
         sel.block->endifOffset = -1;
         if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
           if(insn.getParent()->needEndif && !insn.getParent()->needIf)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index c2c4dae..d3f7363 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -44,7 +44,6 @@ namespace gbe
   /*! Translate IR compare to Gen compare */
   uint32_t getGenCompare(ir::Opcode opcode);
 
-  #define GEN_MAX_LABEL 0xFFFF
 
   /*! Selection opcodes properly encoded from 0 to n for fast jump tables
    *  generations
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index dc5662f..4065a17 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -96,6 +96,7 @@ enum gbe_curbe_type {
   GBE_CURBE_KERNEL_ARGUMENT,
   GBE_CURBE_EXTRA_ARGUMENT,
   GBE_CURBE_BLOCK_IP,
+  GBE_CURBE_DW_BLOCK_IP,
   GBE_CURBE_THREAD_NUM,
   GBE_CURBE_ZERO,
   GBE_CURBE_ONE,
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index 7983778..79dc997 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -136,8 +136,8 @@ namespace ir {
   }
 
   LabelIndex Function::newLabel(void) {
-    GBE_ASSERTM(labels.size() < 0xffff,
-                "Too many labels are defined (65536 only are supported)");
+    GBE_ASSERTM(labels.size() < 0xffffffffull,
+                "Too many labels are defined (4G only are supported)");
     const LabelIndex index(labels.size());
     labels.push_back(NULL);
     return index;
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4c272bd..ec7ab94 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -43,6 +43,7 @@ namespace ir {
         "zero", "one",
         "retVal", "slm_offset",
         "printf_buffer_pointer", "printf_index_buffer_pointer",
+        "dwblockip",
         "invalid"
     };
 
@@ -86,6 +87,7 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1);
       DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
+      DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
       DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
     }
 #undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 7259d9f..8f69320 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -71,8 +71,9 @@ namespace ir {
     static const Register slmoffset = Register(27);  // Group's SLM offset in total 64K SLM
     static const Register printfbptr = Register(28); // printf buffer address .
     static const Register printfiptr = Register(29); // printf index buffer address.
-    static const Register invalid = Register(30);  // used for valid comparation.
-    static const uint32_t regNum = 31;             // number of special registers
+    static const Register dwblockip = Register(30);  // blockip
+    static const Register invalid = Register(31);  // used for valid comparation.
+    static const uint32_t regNum = 32;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 253c4f2..4adbd2b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -49,23 +49,27 @@ cl_set_varying_payload(const cl_kernel ker,
   size_t i, j, k, curr = 0;
   int32_t id_offset[3], ip_offset;
   cl_int err = CL_SUCCESS;
+  int32_t dw_ip_offset = -1;
 
   id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
   id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
   id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
   ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
+  if (ip_offset < 0)
+    dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0);
+  assert(ip_offset < 0 || dw_ip_offset < 0);
   assert(id_offset[0] >= 0 &&
          id_offset[1] >= 0 &&
          id_offset[2] >= 0 &&
-         ip_offset >= 0);
+         (ip_offset >= 0 || dw_ip_offset >= 0));
 
   TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
   TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
   TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
   TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz));
-
   /* 0xffff means that the lane is inactivated */
-  memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz);
+  memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz);
+
 
   /* Compute the IDs and the block IPs */
   for (k = 0; k < local_wk_sz[2]; ++k)
@@ -84,11 +88,15 @@ cl_set_varying_payload(const cl_kernel ker,
     uint32_t *ids1 = (uint32_t *) (data + id_offset[1]);
     uint32_t *ids2 = (uint32_t *) (data + id_offset[2]);
     uint16_t *ips  = (uint16_t *) (data + ip_offset);
+    uint32_t *dw_ips  = (uint32_t *) (data + dw_ip_offset);
     for (j = 0; j < simd_sz; ++j, ++curr) {
       ids0[j] = ids[0][curr];
       ids1[j] = ids[1][curr];
       ids2[j] = ids[2][curr];
-      ips[j] = block_ips[curr];
+      if (ip_offset >= 0)
+        ips[j] = block_ips[curr];
+      if (dw_ip_offset >= 0)
+        dw_ips[j] = block_ips[curr];
     }
   }
 
-- 
1.9.1



More information about the Beignet mailing list