[Beignet] [PATCH V2] generate sub_group_id inside kernel instead of payload
Yang, Rong R
rong.r.yang at intel.com
Wed Aug 12 20:08:38 PDT 2015
LGTM, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Monday, August 10, 2015 08:33
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] generate sub_group_id inside kernel instead
> of payload
>
> get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
> previously we set up the values in kernel payload, now change it to generate
> the values inside kernel with packed integer vector.
>
> v2: encapsulate into a function so that others can get the lane id easily.
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 8 --------
> backend/src/backend/gen_insn_selection.cpp | 28
> ++++++++++++++++++++++++++--
> backend/src/backend/program.h | 1 -
> backend/src/ir/profile.cpp | 1 -
> backend/src/ir/profile.hpp | 7 +++----
> src/cl_command_queue_gen7.c | 8 --------
> 6 files changed, 29 insertions(+), 24 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index e16b0a9..29b58df 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2217,13 +2217,8 @@ namespace gbe
> allocCurbeReg(reg, GBE_CURBE_##PATCH); \
> } else
>
> - bool needLaneID = false;
> fn.foreachInstruction([&](ir::Instruction &insn) {
> const uint32_t srcNum = insn.getSrcNum();
> - if (insn.getOpcode() == ir::OP_SIMD_ID) {
> - GBE_ASSERT(srcNum == 0);
> - needLaneID = true;
> - }
> for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> const ir::Register reg = insn.getSrc(srcID);
> if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 +2257,6
> @@ namespace gbe
> });
> #undef INSERT_REG
>
> - if (needLaneID)
> - allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
> -
> // After this point the vector is immutable. Sorting it will make
> // research faster
> std::sort(kernel->patches.begin(), kernel->patches.end()); diff --git
> a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index b0ba9e3..598238d 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -401,6 +401,7 @@ namespace gbe
> return GenRegister::offset(reg, nr, subnr);
> }
>
> + GenRegister getLaneIDReg();
> /*! Implement public class */
> INLINE uint32_t getRegNum(void) const { return file.regNum(); }
> /*! Implements public interface */
> @@ -1661,6 +1662,29 @@ namespace gbe
> insn->src(1) = src1;
> }
>
> + GenRegister Selection::Opaque::getLaneIDReg() {
> + const GenRegister laneID = GenRegister::immv(0x76543210);
> + ir::Register r = reg(ir::RegisterFamily::FAMILY_WORD);
> + const GenRegister dst = selReg(r, ir::TYPE_U16);
> +
> + uint32_t execWidth = curr.execWidth;
> + if (execWidth == 8)
> + MOV(dst, laneID);
> + else {
> + push();
> + curr.execWidth = 8;
> + curr.noMask = 1;
> + MOV(dst, laneID);
> + //Packed Unsigned Half-Byte Integer Vector does not work
> + //have to mock by adding 8 to the singed vector
> + const GenRegister eight = GenRegister::immuw(8);
> + ADD(GenRegister::offset(dst, 0, 16), dst, eight);
> + pop();
> + }
> + return dst;
> + }
> +
> void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1,
> GenRegister tmp[3]) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
> insn->src(0) = src0;
> @@ -2299,8 +2323,8 @@ namespace gbe
> break;
> case ir::OP_SIMD_ID:
> {
> - const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
> - sel.MOV(dst, selLaneID);
> + GenRegister laneID = sel.getLaneIDReg();
> + sel.MOV(dst, laneID);
> }
> break;
> default: NOT_SUPPORTED;
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index 3637ebb..56db1a1 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -101,7 +101,6 @@ enum gbe_curbe_type {
> GBE_CURBE_THREAD_NUM,
> GBE_CURBE_ZERO,
> GBE_CURBE_ONE,
> - GBE_CURBE_LANE_ID,
> GBE_CURBE_SLM_OFFSET,
> GBE_CURBE_BTI_UTIL,
> };
> diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index
> af9f698..37f2d3d 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -90,7 +90,6 @@ namespace ir {
> DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
> DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
> DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
> - DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
> DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
> DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
> }
> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index
> 9323824..bf909be 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -72,10 +72,9 @@ namespace ir {
> static const Register printfbptr = Register(28); // printf buffer address .
> static const Register printfiptr = Register(29); // printf index buffer address.
> static const Register dwblockip = Register(30); // blockip
> - static const Register laneid = Register(31); // lane id.
> - static const Register invalid = Register(32); // used for valid comparation.
> - static const Register btiUtil = Register(33); // used for mixed pointer as bti
> utility.
> - static const uint32_t regNum = 34; // number of special registers
> + static const Register invalid = Register(31); // used for valid comparation.
> + static const Register btiUtil = Register(32); // used for mixed pointer as bti
> utility.
> + static const uint32_t regNum = 33; // number of special registers
> extern const char *specialRegMean[]; // special register name.
> } /* namespace ocl */
>
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 89f39b3..4adbd2b 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,
> UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD
>
> - /* get_sub_group_id needs it */
> - if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_LANE_ID, 0)) >= 0) {
> - const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> - uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> - int32_t i;
> - for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
> - }
> -
> /* Write identity for the stack pointer. This is required by the stack pointer
> * computation in the kernel
> */
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list