[Beignet] [PATCH] generate sub_group_id inside kernel instead of payload
Yang, Rong R
rong.r.yang at intel.com
Sun Aug 9 22:46:45 PDT 2015
Ok, pop and push don't need here.
But you should set the correct p->curr.quarterControl when doing the half of 16 lanes, otherwise the flag mask is not correct.
> -----Original Message-----
> From: Guo, Yejun
> Sent: Monday, August 10, 2015 12:06
> To: Yang, Rong R; beignet at lists.freedesktop.org
> Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel instead
> of payload
>
> The sel.push()/pop() is already at the beginning/end of the function, shall I
> add it again?
>
> Btw, I plan to encapsulate into a function so that others can get the lane id
> easily.
>
> -----Original Message-----
> From: Yang, Rong R
> Sent: Monday, August 10, 2015 11:37 AM
> To: Guo, Yejun; beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: RE: [Beignet] [PATCH] generate sub_group_id inside kernel instead
> of payload
>
> One comment.
>
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of Guo Yejun
> > Sent: Friday, August 7, 2015 08:51
> > To: beignet at lists.freedesktop.org
> > Cc: Guo, Yejun
> > Subject: [Beignet] [PATCH] generate sub_group_id inside kernel instead
> > of payload
> >
> > get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
> > previously we set up the values in kernel payload, now change it to
> > generate the values inside kernel with packed integer vector.
> >
> > Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> > ---
> > backend/src/backend/gen_context.cpp | 8 --------
> > backend/src/backend/gen_insn_selection.cpp | 18 ++++++++++++++++--
> > backend/src/backend/program.h | 1 -
> > backend/src/ir/profile.cpp | 1 -
> > backend/src/ir/profile.hpp | 7 +++----
> > src/cl_command_queue_gen7.c | 8 --------
> > 6 files changed, 19 insertions(+), 24 deletions(-)
> >
> > diff --git a/backend/src/backend/gen_context.cpp
> > b/backend/src/backend/gen_context.cpp
> > index e16b0a9..29b58df 100644
> > --- a/backend/src/backend/gen_context.cpp
> > +++ b/backend/src/backend/gen_context.cpp
> > @@ -2217,13 +2217,8 @@ namespace gbe
> > allocCurbeReg(reg, GBE_CURBE_##PATCH); \
> > } else
> >
> > - bool needLaneID = false;
> > fn.foreachInstruction([&](ir::Instruction &insn) {
> > const uint32_t srcNum = insn.getSrcNum();
> > - if (insn.getOpcode() == ir::OP_SIMD_ID) {
> > - GBE_ASSERT(srcNum == 0);
> > - needLaneID = true;
> > - }
> > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> > const ir::Register reg = insn.getSrc(srcID);
> > if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9
> > +2257,6 @@ namespace gbe
> > });
> > #undef INSERT_REG
> >
> > - if (needLaneID)
> > - allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
> > -
> > // After this point the vector is immutable. Sorting it will make
> > // research faster
> > std::sort(kernel->patches.begin(), kernel->patches.end()); diff
> > --git a/backend/src/backend/gen_insn_selection.cpp
> > b/backend/src/backend/gen_insn_selection.cpp
> > index b0ba9e3..9772ad1 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -2299,8 +2299,22 @@ namespace gbe
> > break;
> > case ir::OP_SIMD_ID:
> > {
> > - const GenRegister selLaneID = sel.selReg(ir::ocl::laneid,
> ir::TYPE_U32);
> > - sel.MOV(dst, selLaneID);
> > + const GenRegister landID = GenRegister::immv(0x76543210);
> > + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_WORD);
> > + const GenRegister dst_ = sel.selReg(r, ir::TYPE_U16);
> > +
> > + uint32_t execWidth = sel.curr.execWidth;
> > + sel.curr.execWidth = 8;
> > + sel.MOV(dst_, landID);
> > +
> > + if (execWidth == 16) {
> > + //Packed Unsigned Half-Byte Integer Vector does not work
> > + //have to mock by adding 8 to the singed vector
> > + const GenRegister eight = GenRegister::immuw(8);
> > + sel.ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
> > + sel.curr.execWidth = 16;
> > + }
>
> Because you have changed the state, I think it is better to use sel.push(),
> sel.pop().
>
> > + sel.MOV(dst, dst_);
> > }
> > break;
> > default: NOT_SUPPORTED;
> > diff --git a/backend/src/backend/program.h
> > b/backend/src/backend/program.h index 3637ebb..56db1a1 100644
> > --- a/backend/src/backend/program.h
> > +++ b/backend/src/backend/program.h
> > @@ -101,7 +101,6 @@ enum gbe_curbe_type {
> > GBE_CURBE_THREAD_NUM,
> > GBE_CURBE_ZERO,
> > GBE_CURBE_ONE,
> > - GBE_CURBE_LANE_ID,
> > GBE_CURBE_SLM_OFFSET,
> > GBE_CURBE_BTI_UTIL,
> > };
> > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
> > index af9f698..37f2d3d 100644
> > --- a/backend/src/ir/profile.cpp
> > +++ b/backend/src/ir/profile.cpp
> > @@ -90,7 +90,6 @@ namespace ir {
> > DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
> > DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
> > DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
> > - DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
> > DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
> > DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
> > }
> > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
> > index 9323824..bf909be 100644
> > --- a/backend/src/ir/profile.hpp
> > +++ b/backend/src/ir/profile.hpp
> > @@ -72,10 +72,9 @@ namespace ir {
> > static const Register printfbptr = Register(28); // printf buffer address .
> > static const Register printfiptr = Register(29); // printf index buffer
> address.
> > static const Register dwblockip = Register(30); // blockip
> > - static const Register laneid = Register(31); // lane id.
> > - static const Register invalid = Register(32); // used for valid comparation.
> > - static const Register btiUtil = Register(33); // used for mixed pointer as
> bti
> > utility.
> > - static const uint32_t regNum = 34; // number of special registers
> > + static const Register invalid = Register(31); // used for valid comparation.
> > + static const Register btiUtil = Register(32); // used for mixed
> > + pointer as bti
> > utility.
> > + static const uint32_t regNum = 33; // number of special registers
> > extern const char *specialRegMean[]; // special register name.
> > } /* namespace ocl */
> >
> > diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c
> > index 89f39b3..4adbd2b 100644
> > --- a/src/cl_command_queue_gen7.c
> > +++ b/src/cl_command_queue_gen7.c
> > @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,
> > UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD
> >
> > - /* get_sub_group_id needs it */
> > - if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> > GBE_CURBE_LANE_ID, 0)) >= 0) {
> > - const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> > - uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> > - int32_t i;
> > - for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
> > - }
> > -
> > /* Write identity for the stack pointer. This is required by the stack
> pointer
> > * computation in the kernel
> > */
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list