[Beignet] [PATCH 1/2] add simd level function __gen_ocl_get_simd_id

Guo Yejun yejun.guo at intel.com
Wed Apr 15 20:14:58 PDT 2015


uint __gen_ocl_get_simd_id();
return value ranges from 0 to simdsize - 1

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_context.cpp        | 9 ++++++++-
 backend/src/backend/gen_insn_selection.cpp | 8 ++++++++
 backend/src/backend/program.h              | 1 +
 backend/src/ir/instruction.cpp             | 1 +
 backend/src/ir/instruction.hpp             | 2 ++
 backend/src/ir/instruction.hxx             | 1 +
 backend/src/ir/liveness.cpp                | 5 +++++
 backend/src/ir/profile.cpp                 | 2 ++
 backend/src/ir/profile.hpp                 | 5 +++--
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    | 1 +
 backend/src/llvm/llvm_gen_backend.cpp      | 7 +++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
 src/cl_command_queue_gen7.c                | 8 ++++++++
 13 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 684ecaf..62fd596 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2013,9 +2013,14 @@ namespace gbe
       if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
       allocCurbeReg(reg, GBE_CURBE_##PATCH); \
     } else
-  
+
+    bool needLaneID = false;
     fn.foreachInstruction([&](ir::Instruction &insn) {
       const uint32_t srcNum = insn.getSrcNum();
+      if (insn.getOpcode() == ir::OP_SIMD_ID) {
+        GBE_ASSERT(srcNum == 0);
+        needLaneID = true;
+      }
       for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
         const ir::Register reg = insn.getSrc(srcID);
         if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2054,6 +2059,8 @@ namespace gbe
     });
 #undef INSERT_REG
 
+    if (needLaneID)
+      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
 
     // After this point the vector is immutable. Sorting it will make
     // research faster
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 026a858..fa08ade 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2137,6 +2137,14 @@ namespace gbe
             sel.MOV(dst, src);
           }
           break;
+        case ir::OP_SIMD_ID:
+          {
+            const GenRegister selLaneID = sel.curr.execWidth == 8 ?
+                                GenRegister::ud8grf(ir::ocl::laneid) :
+                                GenRegister::ud16grf(ir::ocl::laneid);
+            sel.MOV(dst, selLaneID);
+          }
+          break;
         default: NOT_SUPPORTED;
       }
       sel.pop();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 554fb16..8c171f5 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,6 +101,7 @@ enum gbe_curbe_type {
   GBE_CURBE_THREAD_NUM,
   GBE_CURBE_ZERO,
   GBE_CURBE_ONE,
+  GBE_CURBE_LANE_ID,
   GBE_CURBE_SLM_OFFSET,
 };
 
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 86148bc..7723b90 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
   }
 
   DECL_EMIT_FUNCTION(SIMD_SIZE)
+  DECL_EMIT_FUNCTION(SIMD_ID)
 
 #undef DECL_EMIT_FUNCTION
 
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index c603d9e..436bfd2 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -572,6 +572,8 @@ namespace ir {
   Instruction ALU0(Opcode opcode, Type type, Register dst);
   /*! simd_size.type dst */
   Instruction SIMD_SIZE(Type type, Register dst);
+  /*! simd_id.type dst */
+  Instruction SIMD_ID(Type type, Register dst);
   /*! alu1.type dst src */
   Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
   /*! mov.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index f86cfbb..3f08a92 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -26,6 +26,7 @@
  * \author Benjamin Segovia <benjamin.segovia at intel.com>
  */
 DECL_INSN(SIMD_SIZE, NullaryInstruction)
+DECL_INSN(SIMD_ID, NullaryInstruction)
 DECL_INSN(MOV, UnaryInstruction)
 DECL_INSN(COS, UnaryInstruction)
 DECL_INSN(SIN, UnaryInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 2b1ffdb..26c4129 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -66,6 +66,11 @@ namespace ir {
         const uint32_t srcNum = insn.getSrcNum();
         const uint32_t dstNum = insn.getDstNum();
         bool uniform = true;
+
+        //have no way to decide the dst uniform if there is no source
+        if (srcNum == 0)
+          uniform = false;
+
         for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
           const Register reg = insn.getSrc(srcID);
           if (!fn.isUniformRegister(reg))
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index ec7ab94..2f6539a 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -44,6 +44,7 @@ namespace ir {
         "retVal", "slm_offset",
         "printf_buffer_pointer", "printf_index_buffer_pointer",
         "dwblockip",
+        "lane_id",
         "invalid"
     };
 
@@ -88,6 +89,7 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
+      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
       DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
     }
 #undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 8f69320..4de6fe0 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,8 +72,9 @@ namespace ir {
     static const Register printfbptr = Register(28); // printf buffer address .
     static const Register printfiptr = Register(29); // printf index buffer address.
     static const Register dwblockip = Register(30);  // blockip
-    static const Register invalid = Register(31);  // used for valid comparation.
-    static const uint32_t regNum = 32;             // number of special registers
+    static const Register laneid = Register(31);  // lane id.
+    static const Register invalid = Register(32);  // used for valid comparation.
+    static const uint32_t regNum = 33;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index b992902..620e329 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -25,3 +25,4 @@
 /////////////////////////////////////////////////////////////////////////////
 
 uint __gen_ocl_get_simd_size(void);
+uint __gen_ocl_get_simd_id(void);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index ac67add..f46bc79 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2806,6 +2806,7 @@ namespace gbe
       case GEN_OCL_SIMD_SIZE:
       case GEN_OCL_READ_TM:
       case GEN_OCL_REGION:
+      case GEN_OCL_SIMD_ID:
         this->newRegister(&I);
         break;
       case GEN_OCL_PRINTF:
@@ -3461,6 +3462,12 @@ namespace gbe
             ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
             break;
           }
+          case GEN_OCL_SIMD_ID:
+          {
+            const ir::Register dst = this->getRegister(&I);
+            ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
+            break;
+          }
           default: break;
         }
       }
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 2b151f2..e2bffde 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
 DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
 DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
 
 DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
 DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4adbd2b..e27a211 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker,
   UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
 #undef UPLOAD
 
+  /* __gen_ocl_get_simd_id needs it */
+  if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
+    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
+    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
+    int32_t i;
+    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
+  }
+
   /* Write identity for the stack pointer. This is required by the stack pointer
    * computation in the kernel
    */
-- 
1.9.1



More information about the Beignet mailing list