[Beignet] [PATCH] generate sub_group_id inside kernel instead of payload
Guo Yejun
yejun.guo at intel.com
Thu Aug 6 17:51:27 PDT 2015
get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
previously we set up the values in kernel payload, now change it
to generate the values inside kernel with packed integer vector.
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen_context.cpp | 8 --------
backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--
backend/src/backend/program.h | 1 -
backend/src/ir/profile.cpp | 1 -
backend/src/ir/profile.hpp | 7 +++----
src/cl_command_queue_gen7.c | 8 --------
6 files changed, 19 insertions(+), 24 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e16b0a9..29b58df 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2217,13 +2217,8 @@ namespace gbe
allocCurbeReg(reg, GBE_CURBE_##PATCH); \
} else
- bool needLaneID = false;
fn.foreachInstruction([&](ir::Instruction &insn) {
const uint32_t srcNum = insn.getSrcNum();
- if (insn.getOpcode() == ir::OP_SIMD_ID) {
- GBE_ASSERT(srcNum == 0);
- needLaneID = true;
- }
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2262,9 +2257,6 @@ namespace gbe
});
#undef INSERT_REG
- if (needLaneID)
- allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
-
// After this point the vector is immutable. Sorting it will make
// research faster
std::sort(kernel->patches.begin(), kernel->patches.end());
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b0ba9e3..9772ad1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2299,8 +2299,22 @@ namespace gbe
break;
case ir::OP_SIMD_ID:
{
- const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
- sel.MOV(dst, selLaneID);
+ const GenRegister landID = GenRegister::immv(0x76543210);
+ ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);
+ const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);
+
+ uint32_t execWidth = sel.curr.execWidth;
+ sel.curr.execWidth = 8;
+ sel.MOV(dst_, landID);
+
+ if (execWidth == 16) {
+ //Packed Unsigned Half-Byte Integer Vector does not work
+ //have to mock by adding 8 to the singed vector
+ const GenRegister eight = GenRegister::immuw(8);
+ sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
+ sel.curr.execWidth = 16;
+ }
+ sel.MOV(dst, dst_);
}
break;
default: NOT_SUPPORTED;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 3637ebb..56db1a1 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,7 +101,6 @@ enum gbe_curbe_type {
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
- GBE_CURBE_LANE_ID,
GBE_CURBE_SLM_OFFSET,
GBE_CURBE_BTI_UTIL,
};
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index af9f698..37f2d3d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -90,7 +90,6 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
- DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
}
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 9323824..bf909be 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,10 +72,9 @@ namespace ir {
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
static const Register dwblockip = Register(30); // blockip
- static const Register laneid = Register(31); // lane id.
- static const Register invalid = Register(32); // used for valid comparation.
- static const Register btiUtil = Register(33); // used for mixed pointer as bti utility.
- static const uint32_t regNum = 34; // number of special registers
+ static const Register invalid = Register(31); // used for valid comparation.
+ static const Register btiUtil = Register(32); // used for mixed pointer as bti utility.
+ static const uint32_t regNum = 33; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 89f39b3..4adbd2b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,
UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
#undef UPLOAD
- /* get_sub_group_id needs it */
- if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
- const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
- uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
- int32_t i;
- for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
- }
-
/* Write identity for the stack pointer. This is required by the stack pointer
* computation in the kernel
*/
--
1.9.1
More information about the Beignet
mailing list