[Beignet] [PATCH V3 1/2] add simd level function __gen_ocl_get_simd_id
Guo Yejun
yejun.guo at intel.com
Mon Apr 20 20:41:07 PDT 2015
uint __gen_ocl_get_simd_id();
return value ranges from 0 to simdsize - 1
V2: use function sel.selReg to refine code
V3: correct the uniform condition in liveness.cpp
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen_context.cpp | 9 ++++++++-
backend/src/backend/gen_insn_selection.cpp | 6 ++++++
backend/src/backend/program.h | 1 +
backend/src/ir/instruction.cpp | 1 +
backend/src/ir/instruction.hpp | 2 ++
backend/src/ir/instruction.hxx | 1 +
backend/src/ir/liveness.cpp | 5 +++++
backend/src/ir/profile.cpp | 2 ++
backend/src/ir/profile.hpp | 5 +++--
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 1 +
backend/src/llvm/llvm_gen_backend.cpp | 7 +++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
src/cl_command_queue_gen7.c | 8 ++++++++
13 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 684ecaf..62fd596 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2013,9 +2013,14 @@ namespace gbe
if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
allocCurbeReg(reg, GBE_CURBE_##PATCH); \
} else
-
+
+ bool needLaneID = false;
fn.foreachInstruction([&](ir::Instruction &insn) {
const uint32_t srcNum = insn.getSrcNum();
+ if (insn.getOpcode() == ir::OP_SIMD_ID) {
+ GBE_ASSERT(srcNum == 0);
+ needLaneID = true;
+ }
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2054,6 +2059,8 @@ namespace gbe
});
#undef INSERT_REG
+ if (needLaneID)
+ allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
// After this point the vector is immutable. Sorting it will make
// research faster
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 026a858..19a3c24 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2137,6 +2137,12 @@ namespace gbe
sel.MOV(dst, src);
}
break;
+ case ir::OP_SIMD_ID:
+ {
+ const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
+ sel.MOV(dst, selLaneID);
+ }
+ break;
default: NOT_SUPPORTED;
}
sel.pop();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 554fb16..8c171f5 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,6 +101,7 @@ enum gbe_curbe_type {
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
+ GBE_CURBE_LANE_ID,
GBE_CURBE_SLM_OFFSET,
};
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 86148bc..7723b90 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
}
DECL_EMIT_FUNCTION(SIMD_SIZE)
+ DECL_EMIT_FUNCTION(SIMD_ID)
#undef DECL_EMIT_FUNCTION
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index c603d9e..436bfd2 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -572,6 +572,8 @@ namespace ir {
Instruction ALU0(Opcode opcode, Type type, Register dst);
/*! simd_size.type dst */
Instruction SIMD_SIZE(Type type, Register dst);
+ /*! simd_id.type dst */
+ Instruction SIMD_ID(Type type, Register dst);
/*! alu1.type dst src */
Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
/*! mov.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index f86cfbb..3f08a92 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -26,6 +26,7 @@
* \author Benjamin Segovia <benjamin.segovia at intel.com>
*/
DECL_INSN(SIMD_SIZE, NullaryInstruction)
+DECL_INSN(SIMD_ID, NullaryInstruction)
DECL_INSN(MOV, UnaryInstruction)
DECL_INSN(COS, UnaryInstruction)
DECL_INSN(SIN, UnaryInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 2b1ffdb..9fa7ac3 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -66,6 +66,11 @@ namespace ir {
const uint32_t srcNum = insn.getSrcNum();
const uint32_t dstNum = insn.getDstNum();
bool uniform = true;
+
+ //do not change dst uniform for simd id
+ if (insn.getOpcode() == ir::OP_SIMD_ID)
+ uniform = false;
+
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const Register reg = insn.getSrc(srcID);
if (!fn.isUniformRegister(reg))
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index ec7ab94..2f6539a 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -44,6 +44,7 @@ namespace ir {
"retVal", "slm_offset",
"printf_buffer_pointer", "printf_index_buffer_pointer",
"dwblockip",
+ "lane_id",
"invalid"
};
@@ -88,6 +89,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
+ DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 8f69320..4de6fe0 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,8 +72,9 @@ namespace ir {
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
static const Register dwblockip = Register(30); // blockip
- static const Register invalid = Register(31); // used for valid comparation.
- static const uint32_t regNum = 32; // number of special registers
+ static const Register laneid = Register(31); // lane id.
+ static const Register invalid = Register(32); // used for valid comparation.
+ static const uint32_t regNum = 33; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index b992902..620e329 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -25,3 +25,4 @@
/////////////////////////////////////////////////////////////////////////////
uint __gen_ocl_get_simd_size(void);
+uint __gen_ocl_get_simd_id(void);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index ac67add..f46bc79 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2806,6 +2806,7 @@ namespace gbe
case GEN_OCL_SIMD_SIZE:
case GEN_OCL_READ_TM:
case GEN_OCL_REGION:
+ case GEN_OCL_SIMD_ID:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -3461,6 +3462,12 @@ namespace gbe
ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
break;
}
+ case GEN_OCL_SIMD_ID:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 2b151f2..e2bffde 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4adbd2b..e27a211 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker,
UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
#undef UPLOAD
+ /* __gen_ocl_get_simd_id needs it */
+ if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
+ const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
+ uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
+ int32_t i;
+ for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
+ }
+
/* Write identity for the stack pointer. This is required by the stack pointer
* computation in the kernel
*/
--
1.9.1
More information about the Beignet
mailing list