[Beignet] [PATCH V2] generate sub_group_id inside kernel instead of payload

Guo Yejun yejun.guo at intel.com
Sun Aug 9 17:32:37 PDT 2015


get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
previously we set up the values in kernel payload, now change it
to generate the values inside kernel with packed integer vector.

v2: encapsulate into a function so that others can get the lane id easily.
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_context.cpp        |  8 --------
 backend/src/backend/gen_insn_selection.cpp | 28 ++++++++++++++++++++++++++--
 backend/src/backend/program.h              |  1 -
 backend/src/ir/profile.cpp                 |  1 -
 backend/src/ir/profile.hpp                 |  7 +++----
 src/cl_command_queue_gen7.c                |  8 --------
 6 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e16b0a9..29b58df 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2217,13 +2217,8 @@ namespace gbe
       allocCurbeReg(reg, GBE_CURBE_##PATCH); \
     } else
 
-    bool needLaneID = false;
     fn.foreachInstruction([&](ir::Instruction &insn) {
       const uint32_t srcNum = insn.getSrcNum();
-      if (insn.getOpcode() == ir::OP_SIMD_ID) {
-        GBE_ASSERT(srcNum == 0);
-        needLaneID = true;
-      }
       for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
         const ir::Register reg = insn.getSrc(srcID);
         if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2262,9 +2257,6 @@ namespace gbe
     });
 #undef INSERT_REG
 
-    if (needLaneID)
-      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
-
     // After this point the vector is immutable. Sorting it will make
     // research faster
     std::sort(kernel->patches.begin(), kernel->patches.end());
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b0ba9e3..598238d 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -401,6 +401,7 @@ namespace gbe
       return GenRegister::offset(reg, nr, subnr);
     }
 
+    GenRegister getLaneIDReg();
     /*! Implement public class */
     INLINE uint32_t getRegNum(void) const { return file.regNum(); }
     /*! Implements public interface */
@@ -1661,6 +1662,29 @@ namespace gbe
     insn->src(1) = src1;
   }
 
+  GenRegister Selection::Opaque::getLaneIDReg()
+  {
+    const GenRegister laneID = GenRegister::immv(0x76543210);
+    ir::Register r = reg(ir::RegisterFamily::FAMILY_WORD);
+    const GenRegister dst = selReg(r, ir::TYPE_U16);
+
+    uint32_t execWidth = curr.execWidth;
+    if (execWidth == 8)
+      MOV(dst, laneID);
+    else {
+      push();
+      curr.execWidth = 8;
+      curr.noMask = 1;
+      MOV(dst, laneID);
+      //Packed Unsigned Half-Byte Integer Vector does not work
+      //have to mock by adding 8 to the singed vector
+      const GenRegister eight = GenRegister::immuw(8);
+      ADD(GenRegister::offset(dst, 0, 16), dst, eight);
+      pop();
+    }
+    return dst;
+  }
+
   void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
     insn->src(0) = src0;
@@ -2299,8 +2323,8 @@ namespace gbe
           break;
         case ir::OP_SIMD_ID:
           {
-            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
-            sel.MOV(dst, selLaneID);
+            GenRegister laneID = sel.getLaneIDReg();
+            sel.MOV(dst, laneID);
           }
           break;
         default: NOT_SUPPORTED;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 3637ebb..56db1a1 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,7 +101,6 @@ enum gbe_curbe_type {
   GBE_CURBE_THREAD_NUM,
   GBE_CURBE_ZERO,
   GBE_CURBE_ONE,
-  GBE_CURBE_LANE_ID,
   GBE_CURBE_SLM_OFFSET,
   GBE_CURBE_BTI_UTIL,
 };
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index af9f698..37f2d3d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -90,7 +90,6 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
-      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
       DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
       DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
     }
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 9323824..bf909be 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,10 +72,9 @@ namespace ir {
     static const Register printfbptr = Register(28); // printf buffer address .
     static const Register printfiptr = Register(29); // printf index buffer address.
     static const Register dwblockip = Register(30);  // blockip
-    static const Register laneid = Register(31);  // lane id.
-    static const Register invalid = Register(32);  // used for valid comparation.
-    static const Register btiUtil = Register(33);  // used for mixed pointer as bti utility.
-    static const uint32_t regNum = 34;             // number of special registers
+    static const Register invalid = Register(31);  // used for valid comparation.
+    static const Register btiUtil = Register(32);  // used for mixed pointer as bti utility.
+    static const uint32_t regNum = 33;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 89f39b3..4adbd2b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,
   UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
 #undef UPLOAD
 
-  /* get_sub_group_id needs it */
-  if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
-    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
-    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
-    int32_t i;
-    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
-  }
-
   /* Write identity for the stack pointer. This is required by the stack pointer
    * computation in the kernel
    */
-- 
1.9.1



More information about the Beignet mailing list