[Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in payload
Yang, Rong R
rong.r.yang at intel.com
Wed Aug 26 23:17:51 PDT 2015
LGTM, pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Tuesday, August 25, 2015 04:04
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in
> payload
>
> initialize the data inside kernel with packed integer vector
>
> V2: call functions from ctx, instead of ctx.registerAllocator
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/context.cpp | 10 ++++-----
> backend/src/backend/context.hpp | 2 +-
> backend/src/backend/gen75_context.cpp | 4 +++-
> backend/src/backend/gen_context.cpp | 33
> ++++++++++++++++++++++++++++--
> backend/src/backend/gen_context.hpp | 2 ++
> backend/src/backend/gen_reg_allocation.cpp | 27 +++++++++++++++-------
> --
> backend/src/backend/program.h | 1 -
> backend/src/backend/program.hpp | 2 +-
> src/cl_command_queue_gen7.c | 9 --------
> 9 files changed, 60 insertions(+), 30 deletions(-)
>
> diff --git a/backend/src/backend/context.cpp
> b/backend/src/backend/context.cpp index b8dfa8c..33b2409 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -373,8 +373,8 @@ namespace gbe
> return this->kernel;
> }
>
> - int16_t Context::allocate(int16_t size, int16_t alignment) {
> - return registerAllocator->allocate(size, alignment);
> + int16_t Context::allocate(int16_t size, int16_t alignment, bool bFwd) {
> + return registerAllocator->allocate(size, alignment, bFwd);
> }
>
> void Context::deallocate(int16_t offset) { registerAllocator-
> >deallocate(offset); } @@ -396,10 +396,10 @@ namespace gbe
>
> void Context::buildStack(void) {
> const auto &stackUse = dag->getUse(ir::ocl::stackptr);
> - if (stackUse.size() == 0) // no stack is used if stackptr is unused
> + if (stackUse.size() == 0) { // no stack is used if stackptr is unused
> + this->kernel->stackSize = 0;
> return;
> - // Be sure that the stack pointer is set
> - // GBE_ASSERT(this->kernel-
> >getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
> + }
> uint32_t stackSize = 128;
> while (stackSize < fn.getStackSize()) {
> stackSize *= 3;
> diff --git a/backend/src/backend/context.hpp
> b/backend/src/backend/context.hpp index faa7c8a..079967d 100644
> --- a/backend/src/backend/context.hpp
> +++ b/backend/src/backend/context.hpp
> @@ -85,7 +85,7 @@ namespace gbe
> return JIPs.find(insn) != JIPs.end();
> }
> /*! Allocate some memory in the register file */
> - int16_t allocate(int16_t size, int16_t alignment);
> + int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
> /*! Deallocate previously allocated memory */
> void deallocate(int16_t offset);
> /*! Spilt a block into 2 blocks, for some registers allocate together but
> deallocate seperate */ diff --git a/backend/src/backend/gen75_context.cpp
> b/backend/src/backend/gen75_context.cpp
> index b9dfb18..7d407c3 100644
> --- a/backend/src/backend/gen75_context.cpp
> +++ b/backend/src/backend/gen75_context.cpp
> @@ -67,7 +67,7 @@ namespace gbe
> using namespace ir;
>
> // Only emit stack pointer computation if we use a stack
> - if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
> + if (kernel->getStackSize() == 0)
> return;
>
> // Check that everything is consistent in the kernel code @@ -80,6 +80,8
> @@ namespace gbe
> GenRegister::ud16grf(ir::ocl::stackptr);
> const GenRegister stackptr = ra->genReg(selStatckPtr);
>
> + loadLaneID(stackptr);
> +
> // We compute the per-lane stack pointer here
> // private address start from zero
> p->push();
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 0c301dd..25fdf08 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -176,11 +176,39 @@ namespace gbe
> p->pop();
> }
>
> + void GenContext::loadLaneID(GenRegister dst) {
> + const GenRegister laneID = GenRegister::immv(0x76543210);
> + GenRegister dst_;
> + if (dst.type == GEN_TYPE_UW)
> + dst_ = dst;
> + else
> + dst_ = GenRegister::uw16grf(126,0);
> +
> + p->push();
> + uint32_t execWidth = p->curr.execWidth;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + if (execWidth == 8)
> + p->MOV(dst_, laneID);
> + else {
> + p->curr.execWidth = 8;
> + p->MOV(dst_, laneID);
> + //Packed Unsigned Half-Byte Integer Vector does not work
> + //have to mock by adding 8 to the singed vector
> + const GenRegister eight = GenRegister::immuw(8);
> + p->ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
> + p->curr.execWidth = 16;
> + }
> + if (dst.type != GEN_TYPE_UW)
> + p->MOV(dst, dst_);
> + p->pop();
> + }
> +
> void GenContext::emitStackPointer(void) {
> using namespace ir;
>
> // Only emit stack pointer computation if we use a stack
> - if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
> + if (kernel->getStackSize() == 0)
> return;
>
> // Check that everything is consistent in the kernel code @@ -193,6 +221,8
> @@ namespace gbe
> GenRegister::ud16grf(ir::ocl::stackptr);
> const GenRegister stackptr = ra->genReg(selStatckPtr);
>
> + loadLaneID(stackptr);
> +
> // We compute the per-lane stack pointer here
> // threadId * perThreadSize + laneId*perLaneSize
> // let private address start from zero @@ -2254,7 +2284,6 @@ namespace
> gbe
> INSERT_REG(numgroup0, GROUP_NUM_X)
> INSERT_REG(numgroup1, GROUP_NUM_Y)
> INSERT_REG(numgroup2, GROUP_NUM_Z)
> - INSERT_REG(stackptr, STACK_POINTER)
> INSERT_REG(printfbptr, PRINTF_BUF_POINTER)
> INSERT_REG(printfiptr, PRINTF_INDEX_POINTER)
> do {} while(0);
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 8ef725f..34f9293 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -107,6 +107,8 @@ namespace gbe
> return this->liveness->getLiveIn(bb);
> }
>
> + void loadLaneID(GenRegister dst);
> +
> void collectShifter(GenRegister dest, GenRegister src);
> void loadTopHalf(GenRegister dest, GenRegister src);
> void storeTopHalf(GenRegister dest, GenRegister src); diff --git
> a/backend/src/backend/gen_reg_allocation.cpp
> b/backend/src/backend/gen_reg_allocation.cpp
> index 4cb88e9..39f1934 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -133,8 +133,8 @@ namespace gbe
> void validateFlag(Selection &selection, SelectionInstruction &insn);
> /*! Allocate the GRF registers */
> bool allocateGRFs(Selection &selection);
> - /*! Create gen registers for all preallocated curbe registers. */
> - void allocatePayloadRegs(void);
> + /*! Create gen registers for all preallocated special registers. */
> + void allocateSpecialRegs(void);
> /*! Create a Gen register from a register set in the payload */
> void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset =
> 0);
> /*! Create the intervals for each register */ @@ -228,7 +228,7 @@
> namespace gbe
> this->intervals[reg].maxID = 0;
> }
>
> - INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
> + INLINE void GenRegAllocator::Opaque::allocateSpecialRegs(void) {
> using namespace ir;
> for(auto &it : this->ctx.curbeRegs)
> allocatePayloadReg(it.first, it.second); @@ -248,6 +248,19 @@
> namespace gbe
> allocatePayloadReg(reg, it->second, subOffset);
> ctx.splitBlock(it->second, subOffset);
> }
> +
> + if (RA.contains(ocl::stackbuffer)) {
> + uint32_t regSize = 0;
> + this->getRegAttrib(ocl::stackptr, regSize);
> + uint32_t offset = this->ctx.allocate(regSize, regSize, 1);
> + RA.insert(std::make_pair(ocl::stackptr, offset));
> + }
> +
> + // Group and barrier IDs are always allocated by the hardware in r0
> + RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
> + RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6
> + RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7
> + RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
> }
>
> bool GenRegAllocator::Opaque::createGenReg(const Selection &selection,
> const GenRegInterval &interval) { @@ -1001,13 +1014,7 @@ namespace gbe
> this->intervals.push_back(ir::Register(regID));
>
> // Allocate the special registers (only those which are actually used)
> - this->allocatePayloadRegs();
> -
> - // Group and barrier IDs are always allocated by the hardware in r0
> - RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
> - RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6
> - RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7
> - RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
> + this->allocateSpecialRegs();
>
> // block IP used to handle the mask in SW is always allocated
>
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index fa75052..af19732 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -91,7 +91,6 @@ enum gbe_curbe_type {
> GBE_CURBE_GROUP_NUM_Z,
> GBE_CURBE_WORK_DIM,
> GBE_CURBE_IMAGE_INFO,
> - GBE_CURBE_STACK_POINTER,
> GBE_CURBE_PRINTF_BUF_POINTER,
> GBE_CURBE_PRINTF_INDEX_POINTER,
> GBE_CURBE_KERNEL_ARGUMENT,
> diff --git a/backend/src/backend/program.hpp
> b/backend/src/backend/program.hpp index cff2463..efe192f 100644
> --- a/backend/src/backend/program.hpp
> +++ b/backend/src/backend/program.hpp
> @@ -223,7 +223,7 @@ namespace gbe {
> uint32_t argNum; //!< Number of function arguments
> uint32_t curbeSize; //!< Size of the data to push
> uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
> - uint32_t stackSize; //!< Stack size (may be 0 if unused)
> + uint32_t stackSize; //!< Stack size (0 if unused)
> uint32_t scratchSize; //!< Scratch memory size (may be 0 if unused)
> bool useSLM; //!< SLM requires a special HW config
> uint32_t slmSize; //!< slm size for kernel variable
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 4adbd2b..0e60528 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,15 +210,6 @@ cl_curbe_fill(cl_kernel ker,
> UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD
>
> - /* Write identity for the stack pointer. This is required by the stack pointer
> - * computation in the kernel
> - */
> - if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_STACK_POINTER, 0)) >= 0) {
> - const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> - uint32_t *stackptr = (uint32_t *) (ker->curbe + offset);
> - int32_t i;
> - for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
> - }
> /* Handle the various offsets to SLM */
> const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque);
> int32_t arg, slm_offset = interp_kernel_get_slm_size(ker->opaque);
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list