[Beignet] [PATCH V2 1/2] add simd level function __gen_ocl_get_simd_id
Yang, Rong R
rong.r.yang at intel.com
Fri Apr 17 02:12:03 PDT 2015
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Friday, April 17, 2015 14:47
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2 1/2] add simd level function
> __gen_ocl_get_simd_id
>
> uint __gen_ocl_get_simd_id();
> return value ranges from 0 to simdsize - 1
>
> V2: use function sel.selReg to refine code
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 9 ++++++++-
> backend/src/backend/gen_insn_selection.cpp | 6 ++++++
> backend/src/backend/program.h | 1 +
> backend/src/ir/instruction.cpp | 1 +
> backend/src/ir/instruction.hpp | 2 ++
> backend/src/ir/instruction.hxx | 1 +
> backend/src/ir/liveness.cpp | 5 +++++
> backend/src/ir/profile.cpp | 2 ++
> backend/src/ir/profile.hpp | 5 +++--
> backend/src/libocl/tmpl/ocl_simd.tmpl.h | 1 +
> backend/src/llvm/llvm_gen_backend.cpp | 7 +++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 1 +
> src/cl_command_queue_gen7.c | 8 ++++++++
> 13 files changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 684ecaf..62fd596 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2013,9 +2013,14 @@ namespace gbe
> if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
> allocCurbeReg(reg, GBE_CURBE_##PATCH); \
> } else
> -
> +
> + bool needLaneID = false;
> fn.foreachInstruction([&](ir::Instruction &insn) {
> const uint32_t srcNum = insn.getSrcNum();
> + if (insn.getOpcode() == ir::OP_SIMD_ID) {
> + GBE_ASSERT(srcNum == 0);
> + needLaneID = true;
> + }
> for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> const ir::Register reg = insn.getSrc(srcID);
> if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { @@ -2054,6 +2059,8
> @@ namespace gbe
> });
> #undef INSERT_REG
>
> + if (needLaneID)
> + allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
>
Seems need add curbeRegs.find(laneid) check here. If has allocate curbe before, need not allocate again.
> // After this point the vector is immutable. Sorting it will make
> // research faster
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 026a858..19a3c24 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2137,6 +2137,12 @@ namespace gbe
> sel.MOV(dst, src);
> }
> break;
> + case ir::OP_SIMD_ID:
> + {
> + const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
> + sel.MOV(dst, selLaneID);
> + }
> + break;
> default: NOT_SUPPORTED;
> }
> sel.pop();
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index 554fb16..8c171f5 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -101,6 +101,7 @@ enum gbe_curbe_type {
> GBE_CURBE_THREAD_NUM,
> GBE_CURBE_ZERO,
> GBE_CURBE_ONE,
> + GBE_CURBE_LANE_ID,
> GBE_CURBE_SLM_OFFSET,
> };
>
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 86148bc..7723b90 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> }
>
> DECL_EMIT_FUNCTION(SIMD_SIZE)
> + DECL_EMIT_FUNCTION(SIMD_ID)
>
> #undef DECL_EMIT_FUNCTION
>
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index c603d9e..436bfd2 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -572,6 +572,8 @@ namespace ir {
> Instruction ALU0(Opcode opcode, Type type, Register dst);
> /*! simd_size.type dst */
> Instruction SIMD_SIZE(Type type, Register dst);
> + /*! simd_id.type dst */
> + Instruction SIMD_ID(Type type, Register dst);
> /*! alu1.type dst src */
> Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
> /*! mov.type dst src */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index f86cfbb..3f08a92 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -26,6 +26,7 @@
> * \author Benjamin Segovia <benjamin.segovia at intel.com>
> */
> DECL_INSN(SIMD_SIZE, NullaryInstruction)
> +DECL_INSN(SIMD_ID, NullaryInstruction)
> DECL_INSN(MOV, UnaryInstruction)
> DECL_INSN(COS, UnaryInstruction)
> DECL_INSN(SIN, UnaryInstruction)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index
> 2b1ffdb..26c4129 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -66,6 +66,11 @@ namespace ir {
> const uint32_t srcNum = insn.getSrcNum();
> const uint32_t dstNum = insn.getDstNum();
> bool uniform = true;
> +
> + //have no way to decide the dst uniform if there is no source
> + if (srcNum == 0)
> + uniform = false;
> +
> for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> const Register reg = insn.getSrc(srcID);
> if (!fn.isUniformRegister(reg)) diff --git a/backend/src/ir/profile.cpp
> b/backend/src/ir/profile.cpp index ec7ab94..2f6539a 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -44,6 +44,7 @@ namespace ir {
> "retVal", "slm_offset",
> "printf_buffer_pointer", "printf_index_buffer_pointer",
> "dwblockip",
> + "lane_id",
> "invalid"
> };
>
> @@ -88,6 +89,7 @@ namespace ir {
> DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
> DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
> DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
> + DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
> DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
> }
> #undef DECL_NEW_REG
> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index
> 8f69320..4de6fe0 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -72,8 +72,9 @@ namespace ir {
> static const Register printfbptr = Register(28); // printf buffer address .
> static const Register printfiptr = Register(29); // printf index buffer address.
> static const Register dwblockip = Register(30); // blockip
> - static const Register invalid = Register(31); // used for valid comparation.
> - static const uint32_t regNum = 32; // number of special registers
> + static const Register laneid = Register(31); // lane id.
> + static const Register invalid = Register(32); // used for valid comparation.
> + static const uint32_t regNum = 33; // number of special registers
> extern const char *specialRegMean[]; // special register name.
> } /* namespace ocl */
>
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> index b992902..620e329 100644
> --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> @@ -25,3 +25,4 @@
> /////////////////////////////////////////////////////////////////////////////
>
> uint __gen_ocl_get_simd_size(void);
> +uint __gen_ocl_get_simd_id(void);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index ac67add..f46bc79 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2806,6 +2806,7 @@ namespace gbe
> case GEN_OCL_SIMD_SIZE:
> case GEN_OCL_READ_TM:
> case GEN_OCL_REGION:
> + case GEN_OCL_SIMD_ID:
> this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> @@ -3461,6 +3462,12 @@ namespace gbe
> ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
> break;
> }
> + case GEN_OCL_SIMD_ID:
> + {
> + const ir::Register dst = this->getRegister(&I);
> + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
> + break;
> + }
> default: break;
> }
> }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 2b151f2..e2bffde 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16,
> __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY,
> __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL,
> __gen_ocl_simd_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE,
> __gen_ocl_get_simd_size)
> +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
>
> DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) diff --git
> a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index
> 4adbd2b..e27a211 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker,
> UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD
>
> + /* __gen_ocl_get_simd_id needs it */
> + if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_LANE_ID, 0)) >= 0) {
> + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> + uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> + int32_t i;
> + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; }
> +
> /* Write identity for the stack pointer. This is required by the stack pointer
> * computation in the kernel
> */
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list