[Beignet] [PATCH V2 1/2] add simd level function __gen_ocl_get_simd_id

Guo, Yejun yejun.guo at intel.com
Sun Apr 19 18:27:35 PDT 2015



-----Original Message-----
From: Yang, Rong R 
Sent: Friday, April 17, 2015 5:12 PM
To: Guo, Yejun; beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: RE: [Beignet] [PATCH V2 1/2] add simd level function __gen_ocl_get_simd_id


> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Friday, April 17, 2015 14:47
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2 1/2] add simd level function
> __gen_ocl_get_simd_id
> 
> uint __gen_ocl_get_simd_id();
> return value ranges from 0 to simdsize - 1
> 
> V2: use function sel.selReg to refine code
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/gen_context.cpp        | 9 ++++++++-
>  backend/src/backend/gen_insn_selection.cpp | 6 ++++++
>  backend/src/backend/program.h              | 1 +
>  backend/src/ir/instruction.cpp             | 1 +
>  backend/src/ir/instruction.hpp             | 2 ++
>  backend/src/ir/instruction.hxx             | 1 +
>  backend/src/ir/liveness.cpp                | 5 +++++
>  backend/src/ir/profile.cpp                 | 2 ++
>  backend/src/ir/profile.hpp                 | 5 +++--
>  backend/src/libocl/tmpl/ocl_simd.tmpl.h    | 1 +
>  backend/src/llvm/llvm_gen_backend.cpp      | 7 +++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
>  src/cl_command_queue_gen7.c                | 8 ++++++++
>  13 files changed, 46 insertions(+), 3 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 684ecaf..62fd596 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2013,9 +2013,14 @@ namespace gbe
>        if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
>        allocCurbeReg(reg, GBE_CURBE_##PATCH); \
>      } else
> -
> +
> +    bool needLaneID = false;
>      fn.foreachInstruction([&](ir::Instruction &insn) {
>        const uint32_t srcNum = insn.getSrcNum();
> +      if (insn.getOpcode() == ir::OP_SIMD_ID) {
> +        GBE_ASSERT(srcNum == 0);
> +        needLaneID = true;
> +      }
>        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
>          const ir::Register reg = insn.getSrc(srcID);
>          if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2054,6 +2059,8
> @@ namespace gbe
>      });
>  #undef INSERT_REG
> 
> +    if (needLaneID)
> +      allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
> 
Seems need add curbeRegs.find(laneid)  check here. If has allocate curbe before, need not allocate again.
[yejun] in my understanding, here is the only place to allocate laneid, so don't need to check first.


>      // After this point the vector is immutable. Sorting it will make
>      // research faster
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 026a858..19a3c24 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2137,6 +2137,12 @@ namespace gbe
>              sel.MOV(dst, src);
>            }
>            break;
> +        case ir::OP_SIMD_ID:
> +          {
> +            const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
> +            sel.MOV(dst, selLaneID);
> +          }
> +          break;
>          default: NOT_SUPPORTED;
>        }
>        sel.pop();
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index 554fb16..8c171f5 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -101,6 +101,7 @@ enum gbe_curbe_type {
>    GBE_CURBE_THREAD_NUM,
>    GBE_CURBE_ZERO,
>    GBE_CURBE_ONE,
> +  GBE_CURBE_LANE_ID,
>    GBE_CURBE_SLM_OFFSET,
>  };
> 
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 86148bc..7723b90 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
>    }
> 
>    DECL_EMIT_FUNCTION(SIMD_SIZE)
> +  DECL_EMIT_FUNCTION(SIMD_ID)
> 
>  #undef DECL_EMIT_FUNCTION
> 
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index c603d9e..436bfd2 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -572,6 +572,8 @@ namespace ir {
>    Instruction ALU0(Opcode opcode, Type type, Register dst);
>    /*! simd_size.type dst */
>    Instruction SIMD_SIZE(Type type, Register dst);
> +  /*! simd_id.type dst */
> +  Instruction SIMD_ID(Type type, Register dst);
>    /*! alu1.type dst src */
>    Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
>    /*! mov.type dst src */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index f86cfbb..3f08a92 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -26,6 +26,7 @@
>   * \author Benjamin Segovia <benjamin.segovia at intel.com>
>   */
>  DECL_INSN(SIMD_SIZE, NullaryInstruction)
> +DECL_INSN(SIMD_ID, NullaryInstruction)
>  DECL_INSN(MOV, UnaryInstruction)
>  DECL_INSN(COS, UnaryInstruction)
>  DECL_INSN(SIN, UnaryInstruction)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index
> 2b1ffdb..26c4129 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -66,6 +66,11 @@ namespace ir {
>          const uint32_t srcNum = insn.getSrcNum();
>          const uint32_t dstNum = insn.getDstNum();
>          bool uniform = true;
> +
> +        //have no way to decide the dst uniform if there is no source
> +        if (srcNum == 0)
> +          uniform = false;
> +
>          for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
>            const Register reg = insn.getSrc(srcID);
>            if (!fn.isUniformRegister(reg)) diff --git a/backend/src/ir/profile.cpp
> b/backend/src/ir/profile.cpp index ec7ab94..2f6539a 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -44,6 +44,7 @@ namespace ir {
>          "retVal", "slm_offset",
>          "printf_buffer_pointer", "printf_index_buffer_pointer",
>          "dwblockip",
> +        "lane_id",
>          "invalid"
>      };
> 
> @@ -88,6 +89,7 @@ namespace ir {
>        DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
>        DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
>        DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
> +      DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
>        DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
>      }
>  #undef DECL_NEW_REG
> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index
> 8f69320..4de6fe0 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -72,8 +72,9 @@ namespace ir {
>      static const Register printfbptr = Register(28); // printf buffer address .
>      static const Register printfiptr = Register(29); // printf index buffer address.
>      static const Register dwblockip = Register(30);  // blockip
> -    static const Register invalid = Register(31);  // used for valid comparation.
> -    static const uint32_t regNum = 32;             // number of special registers
> +    static const Register laneid = Register(31);  // lane id.
> +    static const Register invalid = Register(32);  // used for valid comparation.
> +    static const uint32_t regNum = 33;             // number of special registers
>      extern const char *specialRegMean[];           // special register name.
>    } /* namespace ocl */
> 
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> index b992902..620e329 100644
> --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> @@ -25,3 +25,4 @@
>  /////////////////////////////////////////////////////////////////////////////
> 
>  uint __gen_ocl_get_simd_size(void);
> +uint __gen_ocl_get_simd_id(void);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index ac67add..f46bc79 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2806,6 +2806,7 @@ namespace gbe
>        case GEN_OCL_SIMD_SIZE:
>        case GEN_OCL_READ_TM:
>        case GEN_OCL_REGION:
> +      case GEN_OCL_SIMD_ID:
>          this->newRegister(&I);
>          break;
>        case GEN_OCL_PRINTF:
> @@ -3461,6 +3462,12 @@ namespace gbe
>              ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
>              break;
>            }
> +          case GEN_OCL_SIMD_ID:
> +          {
> +            const ir::Register dst = this->getRegister(&I);
> +            ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
> +            break;
> +          }
>            default: break;
>          }
>        }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 2b151f2..e2bffde 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16,
> __gen_ocl_f32to16)  DECL_LLVM_GEN_FUNCTION(SIMD_ANY,
> __gen_ocl_simd_any)  DECL_LLVM_GEN_FUNCTION(SIMD_ALL,
> __gen_ocl_simd_all)  DECL_LLVM_GEN_FUNCTION(SIMD_SIZE,
> __gen_ocl_get_simd_size)
> +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
> 
>  DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) diff --git
> a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index
> 4adbd2b..e27a211 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker,
>    UPLOAD(GBE_CURBE_WORK_DIM, work_dim);  #undef UPLOAD
> 
> +  /* __gen_ocl_get_simd_id needs it */
> +  if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_LANE_ID, 0)) >= 0) {
> +    const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> +    uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> +    int32_t i;
> +    for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;  }
> +
>    /* Write identity for the stack pointer. This is required by the stack pointer
>     * computation in the kernel
>     */
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list