[Beignet] [PATCH 1/2] add 3 simd level built-in functions: shuffle, simdsize and simdid
Yang, Rong R
rong.r.yang at intel.com
Thu Apr 2 22:47:13 PDT 2015
Some comments.
Thanks
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Friday, March 27, 2015 14:17
> To: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 1/2] add 3 simd level built-in functions: shuffle,
> simdsize and simdid
>
> Ask for review, thanks.
>
> yejun
>
> -----Original Message-----
> From: Guo, Yejun
> Sent: Friday, March 20, 2015 1:58 PM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [PATCH 1/2] add 3 simd level built-in functions: shuffle, simdsize and
> simdid
>
> uint __gen_ocl_get_simd_size();
> returns 8 if SIMD8, returns 16 if SIMD16
>
> uint __gen_ocl_get_simd_id();
> return value ranges from 0 to simdsize - 1
>
> floatN __gen_ocl_simd_shuffle(floatN x, uint c);
> intN __gen_ocl_simd_shuffle(intN x, uint c);
> uintN __gen_ocl_simd_shuffle(uintN x, uint c); the value of x of the c-th
> channel of the SIMD is returned, for all SIMD channels, the behavior is
> undefined if c is larger than simdsize - 1
>
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen8_context.cpp | 29 ++++-
> backend/src/backend/gen_context.cpp | 127 +++++++++++++++----
> --
> backend/src/backend/gen_context.hpp | 1 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 60 ++++++++++
> backend/src/backend/gen_insn_selection.hxx | 2 +
> backend/src/backend/program.h | 1 +
> backend/src/ir/context.hpp | 6 +
> backend/src/ir/instruction.cpp | 32 ++++++
> backend/src/ir/instruction.hpp | 17 +++
> backend/src/ir/instruction.hxx | 3 +
> backend/src/ir/liveness.cpp | 5 +
> backend/src/ir/profile.cpp | 2 +
> backend/src/ir/profile.hpp | 5 +-
> backend/src/libocl/CMakeLists.txt | 2 +-
> backend/src/libocl/include/ocl.h | 1 +
> backend/src/libocl/include/ocl_misc.h | 8 --
> backend/src/libocl/script/ocl_simd.def | 4 +
> backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 19 +++
> backend/src/libocl/tmpl/ocl_simd.tmpl.h | 34 ++++++
> backend/src/llvm/llvm_gen_backend.cpp | 27 +++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 4 +
> src/cl_command_queue_gen7.c | 8 ++
> 23 files changed, 351 insertions(+), 47 deletions(-) create mode 100644
> backend/src/libocl/script/ocl_simd.def
> create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.h
>
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 3f57cf6..144fd00 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -240,6 +240,9 @@ namespace gbe
> }
>
> void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn)
> {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister src0 = ra->genReg(insn.src(0));
> + const GenRegister src1 = ra->genReg(insn.src(1));
> switch (insn.opcode) {
> case SEL_OP_SEL_INT64:
> case SEL_OP_I64AND:
> @@ -250,14 +253,34 @@ namespace gbe
> break;
> case SEL_OP_UPSAMPLE_LONG:
> {
> - const GenRegister dst = ra->genReg(insn.dst(0));
> - const GenRegister src0 = ra->genReg(insn.src(0));
> - const GenRegister src1 = ra->genReg(insn.src(1));
> p->MOV(dst, src0);
> p->SHL(dst, dst, GenRegister::immud(32));
> p->ADD(dst, dst, src1);
> break;
> }
> + case SEL_OP_SIMD_SHUFFLE:
> + {
> + uint32_t simd = p->curr.execWidth;
> + if (src1.file == GEN_IMMEDIATE_VALUE) {
> + uint32_t offset = src1.value.ud % simd;
> + uint32_t nr = src0.nr;
> + uint32_t subnr = src0.subnr;
> + subnr = subnr + offset;
> + if (subnr > 8) {
> + nr = nr + 1;
> + subnr = subnr - 8;
> + }
You can use GenRegister::suboffset directly here.
> + p->MOV(dst, GenRegister::ud1grf(nr, subnr));
> + } else {
> + uint32_t base = src0.nr * 32 + src0.subnr * 4;
> + GenRegister baseReg = GenRegister::immuw(base);
> + const GenRegister a0 = GenRegister::addr8(0);
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> + }
> + break;
> + }
> default:
> GenContext::emitBinaryInstruction(insn);
> }
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index cdf581c..25c7a5a 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -198,6 +198,22 @@ namespace gbe
> this->labelPos.insert(std::make_pair(label, p->store.size()));
> }
>
> + void GenContext::emitNullaryInstruction(const SelectionInstruction &insn)
> {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + switch (insn.opcode) {
> + case SEL_OP_SIMD_ID:
> + {
> + const GenRegister selLaneID = this->simdWidth == 8 ?
> + GenRegister::ud8grf(ir::ocl::laneid) :
> + GenRegister::ud16grf(ir::ocl::laneid);
> + const GenRegister laneID = ra->genReg(selLaneID);
> + p->MOV(dst, laneID);
> + }
> + break;
> + default: NOT_IMPLEMENTED;
> + }
> + }
> +
Why not handle SEL_OP_SIMD_ID in instruction selection, just as SIMD_SIZE?
Furthermore, you could try to handle SIMD_ID and SIMD_SIZE in the GenWriter, then can avoid NullaryInstruction totally.
> void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src = ra->genReg(insn.src(0)); @@ -583,6 +599,46 @@
> namespace gbe
> p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
> }
> break;
> + case SEL_OP_SIMD_SHUFFLE:
> + {
> + uint32_t simd = p->curr.execWidth;
> + if (src1.file == GEN_IMMEDIATE_VALUE) {
> + uint32_t offset = src1.value.ud % simd;
> + uint32_t nr = src0.nr;
> + uint32_t subnr = src0.subnr;
> + subnr = subnr + offset;
> + if (subnr > 8) {
> + nr = nr + 1;
> + subnr = subnr - 8;
> + }
Also can use GenRegister::suboffset.
> + p->MOV(dst, GenRegister::ud1grf(nr, subnr));
> + } else {
> + uint32_t base = src0.nr * 32 + src0.subnr * 4;
> + GenRegister baseReg = GenRegister::immuw(base);
> + const GenRegister a0 = GenRegister::addr8(0);
> +
> + p->push();
> + if (simd == 8) {
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> + }
> + else if (simd == 16) {
> + p->curr.execWidth = 8;
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
> + p->MOV(dst, indirect);
> +
> + p->curr.quarterControl = 1;
> + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr /
> typeSize(GEN_TYPE_UW)), baseReg);
> + p->MOV(GenRegister::offset(dst, 1, 0), indirect);
> + }
> + else
> + NOT_IMPLEMENTED;
> + p->pop();
> + }
> + }
> + break;
> default: NOT_IMPLEMENTED;
> }
> }
> @@ -2023,41 +2079,46 @@ namespace gbe
> } else
>
> fn.foreachInstruction([&](ir::Instruction &insn) {
> - const uint32_t srcNum = insn.getSrcNum();
> - for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> - const ir::Register reg = insn.getSrc(srcID);
> - if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
> - if (srcID != 0) continue;
> - const unsigned char bti =
> ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
> - const unsigned char type =
> ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
> - ir::ImageInfoKey key(bti, type);
> - const ir::Register imageInfo = insn.getSrc(0);
> - if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
> - uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
> - insertCurbeReg(imageInfo, offset);
> + if (insn.getOpcode() == ir::OP_SIMD_ID) {
> + if (curbeRegs.find(laneid) == curbeRegs.end())
> + allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
If handle SEL_OP_SIMD_ID in gen_insn_selection, need not special handle here.
> + } else {
> + const uint32_t srcNum = insn.getSrcNum();
> + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> + const ir::Register reg = insn.getSrc(srcID);
> + if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
> + if (srcID != 0) continue;
> + const unsigned char bti =
> ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
> + const unsigned char type =
> ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
> + ir::ImageInfoKey key(bti, type);
> + const ir::Register imageInfo = insn.getSrc(0);
> + if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
> + uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
> + insertCurbeReg(imageInfo, offset);
> + }
> + continue;
> }
> - continue;
> + if (fn.isSpecialReg(reg) == false) continue;
> + if (curbeRegs.find(reg) != curbeRegs.end()) continue;
> + if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0);
> + INSERT_REG(lsize0, LOCAL_SIZE_X)
> + INSERT_REG(lsize1, LOCAL_SIZE_Y)
> + INSERT_REG(lsize2, LOCAL_SIZE_Z)
> + INSERT_REG(gsize0, GLOBAL_SIZE_X)
> + INSERT_REG(gsize1, GLOBAL_SIZE_Y)
> + INSERT_REG(gsize2, GLOBAL_SIZE_Z)
> + INSERT_REG(goffset0, GLOBAL_OFFSET_X)
> + INSERT_REG(goffset1, GLOBAL_OFFSET_Y)
> + INSERT_REG(goffset2, GLOBAL_OFFSET_Z)
> + INSERT_REG(workdim, WORK_DIM)
> + INSERT_REG(numgroup0, GROUP_NUM_X)
> + INSERT_REG(numgroup1, GROUP_NUM_Y)
> + INSERT_REG(numgroup2, GROUP_NUM_Z)
> + INSERT_REG(stackptr, STACK_POINTER)
> + INSERT_REG(printfbptr, PRINTF_BUF_POINTER)
> + INSERT_REG(printfiptr, PRINTF_INDEX_POINTER)
> + do {} while(0);
> }
> - if (fn.isSpecialReg(reg) == false) continue;
> - if (curbeRegs.find(reg) != curbeRegs.end()) continue;
> - if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0);
> - INSERT_REG(lsize0, LOCAL_SIZE_X)
> - INSERT_REG(lsize1, LOCAL_SIZE_Y)
> - INSERT_REG(lsize2, LOCAL_SIZE_Z)
> - INSERT_REG(gsize0, GLOBAL_SIZE_X)
> - INSERT_REG(gsize1, GLOBAL_SIZE_Y)
> - INSERT_REG(gsize2, GLOBAL_SIZE_Z)
> - INSERT_REG(goffset0, GLOBAL_OFFSET_X)
> - INSERT_REG(goffset1, GLOBAL_OFFSET_Y)
> - INSERT_REG(goffset2, GLOBAL_OFFSET_Z)
> - INSERT_REG(workdim, WORK_DIM)
> - INSERT_REG(numgroup0, GROUP_NUM_X)
> - INSERT_REG(numgroup1, GROUP_NUM_Y)
> - INSERT_REG(numgroup2, GROUP_NUM_Z)
> - INSERT_REG(stackptr, STACK_POINTER)
> - INSERT_REG(printfbptr, PRINTF_BUF_POINTER)
> - INSERT_REG(printfiptr, PRINTF_INDEX_POINTER)
> - do {} while(0);
> }
> });
> #undef INSERT_REG
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 6ca88db..3ac675e 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -124,6 +124,7 @@ namespace gbe
>
> /*! Final Gen ISA emission helper functions */
> void emitLabelInstruction(const SelectionInstruction &insn);
> + virtual void emitNullaryInstruction(const SelectionInstruction
> + &insn);
> virtual void emitUnaryInstruction(const SelectionInstruction &insn);
> virtual void emitUnaryWithTempInstruction(const SelectionInstruction
> &insn);
> virtual void emitBinaryInstruction(const SelectionInstruction &insn); diff --
> git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index d054820..fd7e1a4 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -1,5 +1,6 @@
> // Family Latency SIMD16 SIMD8
> DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
> +DECL_GEN7_SCHEDULE(Nullary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 40, 20)
> DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index c240261..1586098 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -477,6 +477,8 @@ namespace gbe
> /*! To make function prototypes more readable */
> typedef const GenRegister &Reg;
>
> +#define ALU0(OP) \
> + INLINE void OP(Reg dst) { ALU0(SEL_OP_##OP, dst); }
> #define ALU1(OP) \
> INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); } #define
> ALU1WithTemp(OP) \ @@ -530,12 +532,15 @@ namespace gbe
> ALU2WithTemp(HADD)
> ALU2WithTemp(RHADD)
> ALU2(UPSAMPLE_LONG)
> + ALU2(SIMD_SHUFFLE)
> + ALU0(SIMD_ID)
> ALU1WithTemp(CONVI_TO_I64)
> ALU1WithTemp(CONVF_TO_I64)
> ALU1(CONVI64_TO_I)
> I64Shift(I64SHL)
> I64Shift(I64SHR)
> I64Shift(I64ASR)
> +#undef ALU0
> #undef ALU1
> #undef ALU1WithTemp
> #undef ALU2
> @@ -622,6 +627,8 @@ namespace gbe
> void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
> /*! Extended math function (1 argument) */
> void MATH(Reg dst, uint32_t function, Reg src);
> + /*! Encode nullary instructions */
> + void ALU0(SelectionOpcode opcode, Reg dst);
> /*! Encode unary instructions */
> void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
> /*! Encode unary with temp reg instructions */ @@ -1435,6 +1442,11 @@
> namespace gbe
> insn->dst(i + 1) = tmp[i];
> }
>
> + void Selection::Opaque::ALU0(SelectionOpcode opcode, Reg dst) {
> + SelectionInstruction *insn = this->appendInsn(opcode, 1, 0);
> + insn->dst(0) = dst;
> + }
> +
> void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
> SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
> insn->dst(0) = dst;
> @@ -2054,6 +2066,42 @@ namespace gbe
> #define DECL_CTOR(FAMILY, INSN_NUM, COST) \
> FAMILY##Pattern(void) : OneToManyPattern<FAMILY##Pattern,
> ir::FAMILY>(INSN_NUM, COST) {}
>
> + /*! Nullary instruction patterns */
> + class NullaryInstructionPattern : public SelectionPattern {
> + public:
> + NullaryInstructionPattern(void) : SelectionPattern(1,1) {
> + for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
> + if (ir::isOpcodeFrom<ir::NullaryInstruction>(ir::Opcode(op)) == true)
> + this->opcodes.push_back(ir::Opcode(op));
> + }
> +
> + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> + using namespace ir;
> + const ir::NullaryInstruction &insn = cast<NullaryInstruction>(dag.insn);
> + const Opcode opcode = insn.getOpcode();
> + const Type type = insn.getType();
> + GenRegister dst = sel.selReg(insn.getDst(0), type);
> +
> + sel.push();
> + switch (opcode) {
> + case ir::OP_SIMD_SIZE:
> + {
> + const GenRegister src = GenRegister::immud(sel.curr.execWidth);
> + sel.curr.execWidth = 1;
> + sel.MOV(dst, src);
> + }
> + break;
> + case ir::OP_SIMD_ID:
> + sel.SIMD_ID(dst);
> + break;
> + default: NOT_SUPPORTED;
> + }
> + sel.pop();
> + return true;
> + }
> + };
> +
> /*! Unary instruction patterns */
> DECL_PATTERN(UnaryInstruction)
> {
> @@ -2563,6 +2611,17 @@ namespace gbe
> case OP_UPSAMPLE_LONG:
> sel.UPSAMPLE_LONG(dst, src0, src1);
> break;
> + case OP_SIMD_SHUFFLE:
> + {
> + if (src1.file == GEN_IMMEDIATE_VALUE) {
> + sel.SIMD_SHUFFLE(dst, src0, src1);
> + } else {
> + GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth,
> sel.reg(FAMILY_DWORD));
> + sel.SHL(shiftL, src1, GenRegister::immud(0x2));
> + sel.SIMD_SHUFFLE(dst, src0, shiftL);
> + }
> + }
> + break;
> default: NOT_IMPLEMENTED;
> }
> sel.pop();
> @@ -4789,6 +4848,7 @@ namespace gbe
> this->insert<GetImageInfoInstructionPattern>();
> this->insert<ReadARFInstructionPattern>();
> this->insert<RegionInstructionPattern>();
> + this->insert<NullaryInstructionPattern>();
>
> // Sort all the patterns with the number of instructions they output
> for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git
> a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 09f5aaf..87ccee3 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -77,6 +77,8 @@ DECL_SELECTION_IR(RHADD,
> BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD,
> I64HADDInstruction) DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction)
> DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
> +DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
> +DECL_SELECTION_IR(SIMD_ID, NullaryInstruction)
> DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
> DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
> DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) diff --git
> a/backend/src/backend/program.h b/backend/src/backend/program.h
> index dc5662f..c4023ec 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -99,6 +99,7 @@ enum gbe_curbe_type {
> GBE_CURBE_THREAD_NUM,
> GBE_CURBE_ZERO,
> GBE_CURBE_ONE,
> + GBE_CURBE_LANE_ID,
> GBE_CURBE_SLM_OFFSET,
> };
>
> diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index
> cf5109d..af65ff3 100644
> --- a/backend/src/ir/context.hpp
> +++ b/backend/src/ir/context.hpp
> @@ -176,6 +176,12 @@ namespace ir {
> DECL_THREE_SRC_INSN(MAD);
> #undef DECL_THREE_SRC_INSN
>
> + /*! For all nullary functions */
> + void ALU0(Opcode opcode, Type type, Register dst) {
> + const Instruction insn = gbe::ir::ALU0(opcode, type, dst);
> + this->append(insn);
> + }
> +
> /*! For all unary functions */
> void ALU1(Opcode opcode, Type type, Register dst, Register src) {
> const Instruction insn = gbe::ir::ALU1(opcode, type, dst, src); diff --git
> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index
> 797552f..9c3331b 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -131,6 +131,17 @@ namespace ir {
> Register src[srcNum]; //!< Indices of the sources
> };
>
> + /*! All 0-source arithmetic instructions */
> + class ALIGNED_INSTRUCTION NullaryInstruction : public NaryInstruction<0>
> + {
> + public:
> + NullaryInstruction(Opcode opcode, Type type, Register dst) {
> + this->opcode = opcode;
> + this->type = type;
> + this->dst[0] = dst;
> + }
> + };
> +
> /*! All 1-source arithmetic instructions */
> class ALIGNED_INSTRUCTION UnaryInstruction : public NaryInstruction<1>
> {
> @@ -1305,6 +1316,10 @@ namespace ir {
> }; \
> }
>
> +START_INTROSPECTION(NullaryInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(NullaryInstruction)
> +
> START_INTROSPECTION(UnaryInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(UnaryInstruction)
> @@ -1532,6 +1547,7 @@ END_FUNCTION(Instruction, Register)
> return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
> }
>
> +DECL_MEM_FN(NullaryInstruction, Type, getType(void), getType())
> DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
> DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType())
> DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes()) @@
> -1586,6 +1602,21 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> // Implements the emission functions
> ///////////////////////////////////////////////////////////////////////////
>
> + // For all nullary functions with given opcode Instruction
> + ALU0(Opcode opcode, Type type, Register dst) {
> + return internal::NullaryInstruction(opcode, type, dst).convert(); }
> +
> + // All unary functions
> +#define DECL_EMIT_FUNCTION(NAME) \
> + Instruction NAME(Type type, Register dst) { \
> + return ALU0(OP_##NAME, type, dst);\
> + }
> +
> + DECL_EMIT_FUNCTION(SIMD_SIZE)
> +
> +#undef DECL_EMIT_FUNCTION
> +
> // For all unary functions with given opcode
> Instruction ALU1(Opcode opcode, Type type, Register dst, Register src) {
> return internal::UnaryInstruction(opcode, type, dst, src).convert(); @@ -
> 1645,6 +1676,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> DECL_EMIT_FUNCTION(RHADD)
> DECL_EMIT_FUNCTION(I64HADD)
> DECL_EMIT_FUNCTION(I64RHADD)
> + DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
>
> #undef DECL_EMIT_FUNCTION
>
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 24d27aa..6dd3e81 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -198,6 +198,15 @@ namespace ir {
> /*! Output the instruction string in the given stream */
> std::ostream &operator<< (std::ostream &out, const Instruction &proxy);
>
> + /*! Nullary instruction instructions are typed. */ class
> + NullaryInstruction : public Instruction {
> + public:
> + /*! Get the type manipulated by the instruction */
> + Type getType(void) const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn); };
> +
> /*! Unary instructions are typed. dst and sources share the same type */
> class UnaryInstruction : public Instruction {
> public:
> @@ -558,6 +567,12 @@ namespace ir {
> /// All emission functions
> ///////////////////////////////////////////////////////////////////////////
>
> + /*! alu0.type dst */
> + Instruction ALU0(Opcode opcode, Type type, Register dst); /*!
> + simd_size.type dst */ Instruction SIMD_SIZE(Type type, Register dst);
> + /*! simd_id.type dst */ Instruction SIMD_ID(Type type, Register dst);
> /*! alu1.type dst src */
> Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
> /*! mov.type dst src */
> @@ -670,6 +685,8 @@ namespace ir {
> Instruction GT(Type type, Register dst, Register src0, Register src1);
> /*! ord.type dst src0 src1 */
> Instruction ORD(Type type, Register dst, Register src0, Register src1);
> + /*! simd_shuffle.type dst src0 src1 */ Instruction SIMD_SHUFFLE(Type
> + type, Register dst, Register src0, Register src1);
> /*! BITCAST.{dstType <- srcType} dst src */
> Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src,
> uint8_t dstNum, uint8_t srcNum);
> /*! cvt.{dstType <- srcType} dst src */ diff --git
> a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index
> de4abfb..76269bd 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -25,6 +25,8 @@
> * \file instruction.hxx
> * \author Benjamin Segovia <benjamin.segovia at intel.com>
> */
> +DECL_INSN(SIMD_SIZE, NullaryInstruction) DECL_INSN(SIMD_ID,
> +NullaryInstruction)
> DECL_INSN(MOV, UnaryInstruction)
> DECL_INSN(COS, UnaryInstruction)
> DECL_INSN(SIN, UnaryInstruction)
> @@ -57,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR,
> BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND,
> BinaryInstruction)
> +DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
> DECL_INSN(SEL, SelectInstruction)
> DECL_INSN(EQ, CompareInstruction)
> DECL_INSN(NE, CompareInstruction)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index
> 2b1ffdb..26c4129 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -66,6 +66,11 @@ namespace ir {
> const uint32_t srcNum = insn.getSrcNum();
> const uint32_t dstNum = insn.getDstNum();
> bool uniform = true;
> +
> + //have no way to decide the dst uniform if there is no source
> + if (srcNum == 0)
> + uniform = false;
> +
> for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> const Register reg = insn.getSrc(srcID);
> if (!fn.isUniformRegister(reg)) diff --git a/backend/src/ir/profile.cpp
> b/backend/src/ir/profile.cpp index 4c272bd..55aedb4 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -43,6 +43,7 @@ namespace ir {
> "zero", "one",
> "retVal", "slm_offset",
> "printf_buffer_pointer", "printf_index_buffer_pointer",
> + "lane_id",
> "invalid"
> };
>
> @@ -86,6 +87,7 @@ namespace ir {
> DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1);
> DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
> DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
> + DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
> DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
> }
> #undef DECL_NEW_REG
> diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index
> 7259d9f..d310128 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -71,8 +71,9 @@ namespace ir {
> static const Register slmoffset = Register(27); // Group's SLM offset in
> total 64K SLM
> static const Register printfbptr = Register(28); // printf buffer address .
> static const Register printfiptr = Register(29); // printf index buffer address.
> - static const Register invalid = Register(30); // used for valid comparation.
> - static const uint32_t regNum = 31; // number of special registers
> + static const Register laneid = Register(30); // printf index buffer address.
Actually, laneid is same as ocl::stackptr, can you reuse or rename it?
> + static const Register invalid = Register(31); // used for valid comparation.
> + static const uint32_t regNum = 32; // number of special registers
> extern const char *specialRegMean[]; // special register name.
> } /* namespace ocl */
>
> diff --git a/backend/src/libocl/CMakeLists.txt
> b/backend/src/libocl/CMakeLists.txt
> index 16f00ee..623affc 100644
> --- a/backend/src/libocl/CMakeLists.txt
> +++ b/backend/src/libocl/CMakeLists.txt
> @@ -90,7 +90,7 @@ MACRO(GENERATE_SOURCE_PY _mod)
> )
> ENDMACRO(GENERATE_SOURCE_PY)
>
> -SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational
> ocl_integer ocl_math)
> +SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational
> ocl_integer
> +ocl_math ocl_simd)
> FOREACH(M ${OCL_PY_GENERATED_MODULES})
> GENERATE_HEADER_PY(${M})
> GENERATE_SOURCE_PY(${M})
> diff --git a/backend/src/libocl/include/ocl.h
> b/backend/src/libocl/include/ocl.h
> index e886670..a53f4c0 100644
> --- a/backend/src/libocl/include/ocl.h
> +++ b/backend/src/libocl/include/ocl.h
> @@ -30,6 +30,7 @@
> #include "ocl_image.h"
> #include "ocl_integer.h"
> #include "ocl_math.h"
> +#include "ocl_simd.h"
> #include "ocl_misc.h"
> #include "ocl_printf.h"
> #include "ocl_relational.h"
> diff --git a/backend/src/libocl/include/ocl_misc.h
> b/backend/src/libocl/include/ocl_misc.h
> index aa3f504..359025b 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -128,14 +128,6 @@ DEF(ulong)
> #undef DEC16
> #undef DEC16X
>
> -
> -/* Temp to add the SIMD functions here. */ -
> /////////////////////////////////////////////////////////////////////////////
> -// SIMD level function
> -/////////////////////////////////////////////////////////////////////////////
> -short __gen_ocl_simd_any(short);
> -short __gen_ocl_simd_all(short);
> -
> struct time_stamp {
> // time tick
> ulong tick;
> diff --git a/backend/src/libocl/script/ocl_simd.def
> b/backend/src/libocl/script/ocl_simd.def
> new file mode 100644
> index 0000000..ccda619
> --- /dev/null
> +++ b/backend/src/libocl/script/ocl_simd.def
> @@ -0,0 +1,4 @@
> +##simd level functions
> +floatn __gen_ocl_simd_shuffle(floatn x, uint c) intn
> +__gen_ocl_simd_shuffle(intn x, uint c) uintn
> +__gen_ocl_simd_shuffle(uintn x, uint c)
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> new file mode 100644
> index 0000000..b9da5e2
> --- /dev/null
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> @@ -0,0 +1,19 @@
> +/*
> + * Copyright @ 2015 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#include "ocl_simd.h"
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> new file mode 100644
> index 0000000..42afc7b
> --- /dev/null
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> @@ -0,0 +1,34 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#ifndef __OCL_SIMD_H__
> +#define __OCL_SIMD_H__
> +
> +#include "ocl_types.h"
> +
> +///////////////////////////////////////////////////////////////////////
> +//////
> +// SIMD level function
> +///////////////////////////////////////////////////////////////////////
> +//////
> +short __gen_ocl_simd_any(short);
> +short __gen_ocl_simd_all(short);
> +
> +uint __gen_ocl_get_simd_size(void);
> +uint __gen_ocl_get_simd_id(void);
> +
> +OVERLOADABLE float __gen_ocl_simd_shuffle(float x, uint c);
> +OVERLOADABLE int __gen_ocl_simd_shuffle(int x, uint c); OVERLOADABLE
> +uint __gen_ocl_simd_shuffle(uint x, uint c);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index bf03a13..4fcb8bb 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2790,10 +2790,17 @@ namespace gbe
> case GEN_OCL_CONV_F32_TO_F16:
> case GEN_OCL_SIMD_ANY:
> case GEN_OCL_SIMD_ALL:
> + case GEN_OCL_SIMD_SHUFFLE:
> case GEN_OCL_READ_TM:
> case GEN_OCL_REGION:
> this->newRegister(&I);
> break;
> + case GEN_OCL_SIMD_SIZE:
> + this->newRegister(&I, NULL, true);
> + break;
> + case GEN_OCL_SIMD_ID:
> + this->newRegister(&I, NULL, false);
> + break;
> case GEN_OCL_PRINTF:
> break;
> default:
> @@ -3053,6 +3060,26 @@ namespace gbe
> ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
> break;
> }
> + case GEN_OCL_SIMD_SIZE:
> + {
> + const ir::Register dst = this->getRegister(&I);
> + ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
> + break;
> + }
> + case GEN_OCL_SIMD_ID:
> + {
> + const ir::Register dst = this->getRegister(&I);
> + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
> + break;
> + }
> + case GEN_OCL_SIMD_SHUFFLE:
> + {
> + const ir::Register src0 = this->getRegister(*AI); ++AI;
> + const ir::Register src1 = this->getRegister(*AI); ++AI;
> + const ir::Register dst = this->getRegister(&I);
> + ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
> + break;
> + }
> case GEN_OCL_READ_TM:
> {
> const ir::Register dst = this->getRegister(&I); diff --git
> a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 9536a3c..714a293 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -155,6 +155,10 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16,
> __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY,
> __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL,
> __gen_ocl_simd_all)
>
> +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
> +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
> +DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, __gen_ocl_simd_shuffle)
> +
> DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
>
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 253c4f2..3f73de0 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -202,6 +202,14 @@ cl_curbe_fill(cl_kernel ker,
> UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD
>
> + /* __gen_ocl_get_simd_id needs it */
> + if ((offset = interp_kernel_get_curbe_offset(ker->opaque,
> GBE_CURBE_LANE_ID, 0)) >= 0) {
> + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
> + uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
> + int32_t i;
> + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; }
> +
> /* Write identity for the stack pointer. This is required by the stack pointer
> * computation in the kernel
> */
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list