[Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in payload

Wed Aug 26 23:17:51 PDT 2015

LGTM, pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Tuesday, August 25, 2015 04:04
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in
> payload
> 
> initialize the data inside kernel with packed integer vector
> 
> V2: call functions from ctx, instead of ctx.registerAllocator
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/context.cpp            | 10 ++++-----
>  backend/src/backend/context.hpp            |  2 +-
>  backend/src/backend/gen75_context.cpp      |  4 +++-
>  backend/src/backend/gen_context.cpp        | 33
> ++++++++++++++++++++++++++++--
>  backend/src/backend/gen_context.hpp        |  2 ++
>  backend/src/backend/gen_reg_allocation.cpp | 27 +++++++++++++++-------
> --
>  backend/src/backend/program.h              |  1 -
>  backend/src/backend/program.hpp            |  2 +-
>  src/cl_command_queue_gen7.c                |  9 --------
>  9 files changed, 60 insertions(+), 30 deletions(-)
> 
> diff --git a/backend/src/backend/context.cpp
> b/backend/src/backend/context.cpp index b8dfa8c..33b2409 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -373,8 +373,8 @@ namespace gbe
>      return this->kernel;
>    }
> 
> -  int16_t Context::allocate(int16_t size, int16_t alignment) {
> -    return registerAllocator->allocate(size, alignment);
> +  int16_t Context::allocate(int16_t size, int16_t alignment, bool bFwd) {
> +    return registerAllocator->allocate(size, alignment, bFwd);
>    }
> 
>    void Context::deallocate(int16_t offset) { registerAllocator-
> >deallocate(offset); } @@ -396,10 +396,10 @@ namespace gbe
> 
>    void Context::buildStack(void) {
>      const auto &stackUse = dag->getUse(ir::ocl::stackptr);
> -    if (stackUse.size() == 0)  // no stack is used if stackptr is unused
> +    if (stackUse.size() == 0) {  // no stack is used if stackptr is unused
> +      this->kernel->stackSize = 0;
>        return;
> -    // Be sure that the stack pointer is set
> -    // GBE_ASSERT(this->kernel-
> >getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
> +    }
>      uint32_t stackSize = 128;
>      while (stackSize < fn.getStackSize()) {
>        stackSize *= 3;
> diff --git a/backend/src/backend/context.hpp
> b/backend/src/backend/context.hpp index faa7c8a..079967d 100644
> --- a/backend/src/backend/context.hpp
> +++ b/backend/src/backend/context.hpp
> @@ -85,7 +85,7 @@ namespace gbe
>        return JIPs.find(insn) != JIPs.end();
>      }
>      /*! Allocate some memory in the register file */
> -    int16_t allocate(int16_t size, int16_t alignment);
> +    int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
>      /*! Deallocate previously allocated memory */
>      void deallocate(int16_t offset);
>      /*! Spilt a block into 2 blocks, for some registers allocate together but
> deallocate seperate */ diff --git a/backend/src/backend/gen75_context.cpp
> b/backend/src/backend/gen75_context.cpp
> index b9dfb18..7d407c3 100644
> --- a/backend/src/backend/gen75_context.cpp
> +++ b/backend/src/backend/gen75_context.cpp
> @@ -67,7 +67,7 @@ namespace gbe
>      using namespace ir;
> 
>      // Only emit stack pointer computation if we use a stack
> -    if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
> +    if (kernel->getStackSize() == 0)
>        return;
> 
>      // Check that everything is consistent in the kernel code @@ -80,6 +80,8
> @@ namespace gbe
>        GenRegister::ud16grf(ir::ocl::stackptr);
>      const GenRegister stackptr = ra->genReg(selStatckPtr);
> 
> +    loadLaneID(stackptr);
> +
>      // We compute the per-lane stack pointer here
>      // private address start from zero
>      p->push();
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 0c301dd..25fdf08 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -176,11 +176,39 @@ namespace gbe
>      p->pop();
>    }
> 
> +  void GenContext::loadLaneID(GenRegister dst) {
> +    const GenRegister laneID = GenRegister::immv(0x76543210);
> +    GenRegister dst_;
> +    if (dst.type == GEN_TYPE_UW)
> +      dst_ = dst;
> +    else
> +      dst_ = GenRegister::uw16grf(126,0);
> +
> +    p->push();
> +      uint32_t execWidth = p->curr.execWidth;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->curr.noMask = 1;
> +      if (execWidth == 8)
> +        p->MOV(dst_, laneID);
> +      else {
> +        p->curr.execWidth = 8;
> +        p->MOV(dst_, laneID);
> +        //Packed Unsigned Half-Byte Integer Vector does not work
> +        //have to mock by adding 8 to the singed vector
> +        const GenRegister eight = GenRegister::immuw(8);
> +        p->ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
> +        p->curr.execWidth = 16;
> +      }
> +      if (dst.type != GEN_TYPE_UW)
> +        p->MOV(dst, dst_);
> +    p->pop();
> +  }
> +
>    void GenContext::emitStackPointer(void) {
>      using namespace ir;
> 
>      // Only emit stack pointer computation if we use a stack
> -    if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
> +    if (kernel->getStackSize() == 0)
>        return;
> 
>      // Check that everything is consistent in the kernel code @@ -193,6 +221,8
> @@ namespace gbe
>        GenRegister::ud16grf(ir::ocl::stackptr);
>      const GenRegister stackptr = ra->genReg(selStatckPtr);
> 
> +    loadLaneID(stackptr);
> +
>      // We compute the per-lane stack pointer here
>      // threadId * perThreadSize + laneId*perLaneSize
>      // let private address start from zero @@ -2254,7 +2284,6 @@ namespace
> gbe
>          INSERT_REG(numgroup0, GROUP_NUM_X)
>          INSERT_REG(numgroup1, GROUP_NUM_Y)
>          INSERT_REG(numgroup2, GROUP_NUM_Z)
> -        INSERT_REG(stackptr, STACK_POINTER)
>          INSERT_REG(printfbptr, PRINTF_BUF_POINTER)
>          INSERT_REG(printfiptr, PRINTF_INDEX_POINTER)
>          do {} while(0);
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 8ef725f..34f9293 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -107,6 +107,8 @@ namespace gbe
>        return this->liveness->getLiveIn(bb);
>      }
> 
> +    void loadLaneID(GenRegister dst);
> +
>      void collectShifter(GenRegister dest, GenRegister src);
>      void loadTopHalf(GenRegister dest, GenRegister src);
>      void storeTopHalf(GenRegister dest, GenRegister src); diff --git
> a/backend/src/backend/gen_reg_allocation.cpp
> b/backend/src/backend/gen_reg_allocation.cpp
> index 4cb88e9..39f1934 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -133,8 +133,8 @@ namespace gbe
>      void validateFlag(Selection &selection, SelectionInstruction &insn);
>      /*! Allocate the GRF registers */
>      bool allocateGRFs(Selection &selection);
> -    /*! Create gen registers for all preallocated curbe registers. */
> -    void allocatePayloadRegs(void);
> +    /*! Create gen registers for all preallocated special registers. */
> +    void allocateSpecialRegs(void);
>      /*! Create a Gen register from a register set in the payload */
>      void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset =
> 0);
>      /*! Create the intervals for each register */ @@ -228,7 +228,7 @@
> namespace gbe
>      this->intervals[reg].maxID = 0;
>    }
> 
> -  INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
> +  INLINE void GenRegAllocator::Opaque::allocateSpecialRegs(void) {
>      using namespace ir;
>      for(auto &it : this->ctx.curbeRegs)
>        allocatePayloadReg(it.first, it.second); @@ -248,6 +248,19 @@
> namespace gbe
>        allocatePayloadReg(reg, it->second, subOffset);
>        ctx.splitBlock(it->second, subOffset);
>      }
> +
> +    if (RA.contains(ocl::stackbuffer)) {
> +      uint32_t regSize = 0;
> +      this->getRegAttrib(ocl::stackptr, regSize);
> +      uint32_t offset = this->ctx.allocate(regSize, regSize, 1);
> +      RA.insert(std::make_pair(ocl::stackptr, offset));
> +    }
> +
> +    // Group and barrier IDs are always allocated by the hardware in r0
> +    RA.insert(std::make_pair(ocl::groupid0,  1*sizeof(float))); // r0.1
> +    RA.insert(std::make_pair(ocl::groupid1,  6*sizeof(float))); // r0.6
> +    RA.insert(std::make_pair(ocl::groupid2,  7*sizeof(float))); // r0.7
> +    RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
>    }
> 
>    bool GenRegAllocator::Opaque::createGenReg(const Selection &selection,
> const GenRegInterval &interval) { @@ -1001,13 +1014,7 @@ namespace gbe
>        this->intervals.push_back(ir::Register(regID));
> 
>      // Allocate the special registers (only those which are actually used)
> -    this->allocatePayloadRegs();
> -
> -    // Group and barrier IDs are always allocated by the hardware in r0
> -    RA.insert(std::make_pair(ocl::groupid0,  1*sizeof(float))); // r0.1
> -    RA.insert(std::make_pair(ocl::groupid1,  6*sizeof(float))); // r0.6
> -    RA.insert(std::make_pair(ocl::groupid2,  7*sizeof(float))); // r0.7
> -    RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
> +    this->allocateSpecialRegs();
> 
>      // block IP used to handle the mask in SW is always allocated
> 
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index fa75052..af19732 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -91,7 +91,6 @@ enum gbe_curbe_type {
>    GBE_CURBE_GROUP_NUM_Z,
>    GBE_CURBE_WORK_DIM,
>    GBE_CURBE_IMAGE_INFO,
> -  GBE_CURBE_STACK_POINTER,
>    GBE_CURBE_PRINTF_BUF_POINTER,
>    GBE_CURBE_PRINTF_INDEX_POINTER,
>    GBE_CURBE_KERNEL_ARGUMENT,
> diff --git a/backend/src/backend/program.hpp
> b/backend/src/backend/program.hpp index cff2463..efe192f 100644
> --- a/backend/src/backend/program.hpp
> +++ b/backend/src/backend/program.hpp
> @@ -223,7 +223,7 @@ namespace gbe {
>      uint32_t argNum;           //!< Number of function arguments
>      uint32_t curbeSize;        //!< Size of the data to push
>      uint32_t simdWidth;        //!< SIMD size for the kernel (lane number)
> -    uint32_t stackSize;        //!< Stack size (may be 0 if unused)
> +    uint32_t stackSize;        //!< Stack size (0 if unused)
>      uint32_t scratchSize;      //!< Scratch memory size (may be 0 if unused)
>      bool useSLM;               //!< SLM requires a special HW config
>      uint32_t slmSize;          //!< slm size for kernel variable
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 4adbd2b..0e60528 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,15 +210,6 @@ cl_curbe_fill(cl_kernel ker,
>    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD
> 
> -  /* Write identity for the stack pointer. This is required by the stack pointer
> -   * computation in the kernel
> -   */
> -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_STACK_POINTER, 0)) >= 0) {
> -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> -    uint32_t *stackptr = (uint32_t *) (ker->curbe + offset);
> -    int32_t i;
> -    for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
> -  }
>    /* Handle the various offsets to SLM */
>    const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque);
>    int32_t arg, slm_offset = interp_kernel_get_slm_size(ker->opaque);
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet