[Beignet] [PATCH V2] generate sub_group_id inside kernel instead of payload

Yang, Rong R rong.r.yang at intel.com
Wed Aug 12 20:08:38 PDT 2015


LGTM, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Monday, August 10, 2015 08:33
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] generate sub_group_id inside kernel instead
> of payload
> 
> get_sub_group_id ranges at [0, 7] for SIMD8 and [0, 15] for SIMD16,
> previously we set up the values in kernel payload, now change it to generate
> the values inside kernel with packed integer vector.
> 
> v2: encapsulate into a function so that others can get the lane id easily.
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/gen_context.cpp        |  8 --------
>  backend/src/backend/gen_insn_selection.cpp | 28
> ++++++++++++++++++++++++++--
>  backend/src/backend/program.h              |  1 -
>  backend/src/ir/profile.cpp                 |  1 -
>  backend/src/ir/profile.hpp                 |  7 +++----
>  src/cl_command_queue_gen7.c                |  8 --------
>  6 files changed, 29 insertions(+), 24 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index e16b0a9..29b58df 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2217,13 +2217,8 @@ namespace gbe
>        allocCurbeReg(reg, GBE_CURBE_##PATCH); \
>      } else
> 
> -    bool needLaneID = false;
>      fn.foreachInstruction([&](ir::Instruction &insn) {
>        const uint32_t srcNum = insn.getSrcNum();
> -      if (insn.getOpcode() == ir::OP_SIMD_ID) {
> -        GBE_ASSERT(srcNum == 0);
> -        needLaneID = true;
> -      }
>        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
>          const ir::Register reg = insn.getSrc(srcID);
>          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2262,9 +2257,6
> @@ namespace gbe
>      });
>  #undef INSERT_REG
> 
> -    if (needLaneID)
> -      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
> -
>      // After this point the vector is immutable. Sorting it will make
>      // research faster
>      std::sort(kernel->patches.begin(), kernel->patches.end()); diff --git
> a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index b0ba9e3..598238d 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -401,6 +401,7 @@ namespace gbe
>        return GenRegister::offset(reg, nr, subnr);
>      }
> 
> +    GenRegister getLaneIDReg();
>      /*! Implement public class */
>      INLINE uint32_t getRegNum(void) const { return file.regNum(); }
>      /*! Implements public interface */
> @@ -1661,6 +1662,29 @@ namespace gbe
>      insn->src(1) = src1;
>    }
> 
> +  GenRegister Selection::Opaque::getLaneIDReg()  {
> +    const GenRegister laneID = GenRegister::immv(0x76543210);
> +    ir::Register r = reg(ir::RegisterFamily::FAMILY_WORD);
> +    const GenRegister dst = selReg(r, ir::TYPE_U16);
> +
> +    uint32_t execWidth = curr.execWidth;
> +    if (execWidth == 8)
> +      MOV(dst, laneID);
> +    else {
> +      push();
> +      curr.execWidth = 8;
> +      curr.noMask = 1;
> +      MOV(dst, laneID);
> +      //Packed Unsigned Half-Byte Integer Vector does not work
> +      //have to mock by adding 8 to the singed vector
> +      const GenRegister eight = GenRegister::immuw(8);
> +      ADD(GenRegister::offset(dst, 0, 16), dst, eight);
> +      pop();
> +    }
> +    return dst;
> +  }
> +
>    void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1,
> GenRegister tmp[3]) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
>      insn->src(0) = src0;
> @@ -2299,8 +2323,8 @@ namespace gbe
>            break;
>          case ir::OP_SIMD_ID:
>            {
> -            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
> -            sel.MOV(dst, selLaneID);
> +            GenRegister laneID = sel.getLaneIDReg();
> +            sel.MOV(dst, laneID);
>            }
>            break;
>          default: NOT_SUPPORTED;
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index 3637ebb..56db1a1 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -101,7 +101,6 @@ enum gbe_curbe_type {
>    GBE_CURBE_THREAD_NUM,
>    GBE_CURBE_ZERO,
>    GBE_CURBE_ONE,
> -  GBE_CURBE_LANE_ID,
>    GBE_CURBE_SLM_OFFSET,
>    GBE_CURBE_BTI_UTIL,
>  };
> diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index
> af9f698..37f2d3d 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -90,7 +90,6 @@ namespace ir {
>        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
>        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
>        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
> -      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
>        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
>        DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1);
>      }
> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index
> 9323824..bf909be 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -72,10 +72,9 @@ namespace ir {
>      static const Register printfbptr = Register(28); // printf buffer address .
>      static const Register printfiptr = Register(29); // printf index buffer address.
>      static const Register dwblockip = Register(30);  // blockip
> -    static const Register laneid = Register(31);  // lane id.
> -    static const Register invalid = Register(32);  // used for valid comparation.
> -    static const Register btiUtil = Register(33);  // used for mixed pointer as bti
> utility.
> -    static const uint32_t regNum = 34;             // number of special registers
> +    static const Register invalid = Register(31);  // used for valid comparation.
> +    static const Register btiUtil = Register(32);  // used for mixed pointer as bti
> utility.
> +    static const uint32_t regNum = 33;             // number of special registers
>      extern const char *specialRegMean[];           // special register name.
>    } /* namespace ocl */
> 
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 89f39b3..4adbd2b 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,14 +210,6 @@ cl_curbe_fill(cl_kernel ker,
>    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD
> 
> -  /* get_sub_group_id needs it */
> -  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_LANE_ID, 0)) >= 0) {
> -    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> -    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> -    int32_t i;
> -    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
> -  }
> -
>    /* Write identity for the stack pointer. This is required by the stack pointer
>     * computation in the kernel
>     */
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list