[Beignet] [PATCH V2] GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu Sep 18 00:04:43 PDT 2014
LGTM, just pushed, thanks!
On Thu, Sep 18, 2014 at 03:51:08PM +0800, Ruiling Song wrote:
> Gen provide tm0 register for intra-kernel profiling.
> Here we provide an API __gen_ocl_get_timestamp() to return
> the timestamp in TM.
>
> The return type is defined as:
> struct time_stamp {
> ulong tick;
> uint event;
> };
>
> 'tick' is a 64bit time tick. 'event' stores a value which means
> whether a tmEvent has occured (non-zero) or not (0). tmEvent includes
> time-impacting event such as context switch or frequency change
> since last time tm0 was read.
>
> I add a sample in the kernels/compiler_time_stamp.cl. Hope it
> would help you understand how to use it.
>
> V2:
> Introduce ir::ARFRegister to avoid directly use of nr/subnr in Gen IR.
> Rename __gen_ocl_extract_reg to __gen_ocl_region.
> Rename beignet_get_time_stamp to __gen_ocl_get_timestamp.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 3 +
> backend/src/backend/gen_context.cpp | 1 +
> backend/src/backend/gen_defs.hpp | 1 +
> backend/src/backend/gen_insn_scheduling.cpp | 13 +++-
> backend/src/backend/gen_insn_selection.cpp | 67 +++++++++++++++++++
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/ir/instruction.cpp | 94 +++++++++++++++++++++++++++
> backend/src/ir/instruction.hpp | 20 ++++++
> backend/src/ir/instruction.hxx | 2 +
> backend/src/ir/liveness.cpp | 1 +
> backend/src/ir/register.hpp | 15 +++++
> backend/src/libocl/include/ocl_misc.h | 9 +++
> backend/src/libocl/src/ocl_misc.cl | 13 ++++
> backend/src/llvm/llvm_gen_backend.cpp | 22 +++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +
> kernels/compiler_time_stamp.cl | 29 +++++++++
> utests/CMakeLists.txt | 1 +
> utests/compiler_time_stamp.cpp | 54 +++++++++++++++
> 18 files changed, 346 insertions(+), 3 deletions(-)
> create mode 100644 kernels/compiler_time_stamp.cl
> create mode 100644 utests/compiler_time_stamp.cpp
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index c120b60..266b501 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -552,6 +552,9 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
> string (file, "ip");
> return -1;
> break;
> + case GEN_ARF_TM:
> + format (file, "tm%d", _reg_nr & 0x0f);
> + break;
> default:
> format (file, "ARF%d", _reg_nr);
> break;
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 2550567..175878d 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -202,6 +202,7 @@ namespace gbe
> const GenRegister src = ra->genReg(insn.src(0));
> switch (insn.opcode) {
> case SEL_OP_MOV: p->MOV(dst, src, insn.extra.function); break;
> + case SEL_OP_READ_ARF: p->MOV(dst, src); break;
> case SEL_OP_FBH: p->FBH(dst, src); break;
> case SEL_OP_FBL: p->FBL(dst, src); break;
> case SEL_OP_NOT: p->NOT(dst, src); break;
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index f0da50a..19aad95 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -261,6 +261,7 @@ enum GenMessageTarget {
> #define GEN_ARF_CONTROL 0x80
> #define GEN_ARF_NOTIFICATION_COUNT 0x90
> #define GEN_ARF_IP 0xA0
> +#define GEN_ARF_TM 0xC0
>
> #define GEN_MRF_COMPR4 (1 << 7)
>
> diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
> index 106d608..ead3e26 100644
> --- a/backend/src/backend/gen_insn_scheduling.cpp
> +++ b/backend/src/backend/gen_insn_scheduling.cpp
> @@ -190,6 +190,10 @@ namespace gbe
> static const uint32_t MAX_FLAG_REGISTER = 8u;
> /*! Maximum number of *physical* accumulators registers */
> static const uint32_t MAX_ACC_REGISTER = 1u;
> + /*! Maximum number of *physical* tm registers */
> + static const uint32_t MAX_TM_REGISTER = 1u;
> + /*! Maximum number of *physical* arf registers */
> + static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_TM_REGISTER;
> /*! Stores the last node that wrote to a register / memory ... */
> vector<ScheduleDAGNode*> nodes;
> /*! store nodes each node depends on */
> @@ -237,12 +241,12 @@ namespace gbe
> {
> if (scheduler.policy == PRE_ALLOC) {
> this->grfNum = selection.getRegNum();
> - nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
> + nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
> } else {
> const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
> GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
> this->grfNum = simdWidth == 8 ? 128 : 64;
> - nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
> + nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
> }
> insnNodes.resize(selection.getLargestBlockSize());
> }
> @@ -327,6 +331,8 @@ namespace gbe
> } else if (file == GEN_ARF_ACCUMULATOR) {
> GBE_ASSERT(nr < MAX_ACC_REGISTER);
> return grfNum + MAX_FLAG_REGISTER + nr;
> + } else if (file == GEN_ARF_TM) {
> + return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
> } else {
> NOT_SUPPORTED;
> return 0;
> @@ -348,7 +354,7 @@ namespace gbe
> }
>
> uint32_t DependencyTracker::getIndex(uint32_t bti) const {
> - const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
> + const uint32_t memDelta = grfNum + MAX_ARF_REGISTER;
> return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY);
> }
>
> @@ -583,6 +589,7 @@ namespace gbe
> ScheduleDAGNode *node = tracker.insnNodes[insnID];
> if (node->insn.isBranch() || node->insn.isLabel()
> || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF
> + || node->insn.opcode == SEL_OP_READ_ARF
> || node->insn.opcode == SEL_OP_BARRIER)
> tracker.makeBarrier(insnID, insnNum);
> }
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index d631579..f284ae1 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -458,6 +458,7 @@ namespace gbe
> #define I64Shift(OP) \
> INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
> ALU1(MOV)
> + ALU1(READ_ARF)
> ALU1WithTemp(MOV_DF)
> ALU1WithTemp(LOAD_DF_IMM)
> ALU1(LOAD_INT64_IMM)
> @@ -3979,6 +3980,70 @@ namespace gbe
> DECL_CTOR(GetImageInfoInstruction, 1, 1);
> };
>
> + class ReadARFInstructionPattern : public SelectionPattern
> + {
> + public:
> + ReadARFInstructionPattern(void) : SelectionPattern(1,1) {
> + this->opcodes.push_back(ir::OP_READ_ARF);
> + }
> +
> + INLINE uint32_t getRegNum(ir::ARFRegister arf) const {
> + if (arf == ir::ARF_TM) {
> + return 0xc0;
> + } else {
> + GBE_ASSERT(0);
> + return 0;
> + }
> + }
> +
> + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> + using namespace ir;
> + const ir::ReadARFInstruction &insn = cast<ir::ReadARFInstruction>(dag.insn);
> + GenRegister dst;
> + dst = sel.selReg(insn.getDst(0), insn.getType());
> +
> + sel.push();
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + sel.curr.execWidth = 8;
> + sel.READ_ARF(dst, GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> + getRegNum(insn.getARFRegister()),
> + 0,
> + getGenType(insn.getType()),
> + GEN_VERTICAL_STRIDE_8,
> + GEN_WIDTH_8,
> + GEN_HORIZONTAL_STRIDE_1));
> + sel.pop();
> + return true;
> + }
> + };
> +
> + /*! Get a region of a register */
> + class RegionInstructionPattern : public SelectionPattern
> + {
> + public:
> + RegionInstructionPattern(void) : SelectionPattern(1,1) {
> + this->opcodes.push_back(ir::OP_REGION);
> + }
> + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> + using namespace ir;
> + const ir::RegionInstruction &insn = cast<ir::RegionInstruction>(dag.insn);
> + GenRegister dst, src;
> + dst = sel.selReg(insn.getDst(0), ir::TYPE_U32);
> + src = GenRegister::ud1grf(insn.getSrc(0));
> + src.subphysical = 1;
> + src = GenRegister::offset(src, 0, insn.getOffset()*4);
> +
> + sel.push();
> + sel.curr.noMask = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.MOV(dst, src);
> + sel.pop();
> + markAllChildren(dag);
> + return true;
> + }
> + };
> +
> /*! Branch instruction pattern */
> class BranchInstructionPattern : public SelectionPattern
> {
> @@ -4190,6 +4255,8 @@ namespace gbe
> this->insert<SelectModifierInstructionPattern>();
> this->insert<SampleInstructionPattern>();
> this->insert<GetImageInfoInstructionPattern>();
> + this->insert<ReadARFInstructionPattern>();
> + this->insert<RegionInstructionPattern>();
>
> // Sort all the patterns with the number of instructions they output
> for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 2d70982..048a844 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -85,3 +85,4 @@ DECL_SELECTION_IR(BRD, UnaryInstruction)
> DECL_SELECTION_IR(IF, UnaryInstruction)
> DECL_SELECTION_IR(ENDIF, UnaryInstruction)
> DECL_SELECTION_IR(ELSE, UnaryInstruction)
> +DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 370fb87..2d86480 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -668,6 +668,48 @@ namespace ir {
> Register dst[0], src[0];
> };
>
> + class ALIGNED_INSTRUCTION ReadARFInstruction :
> + public BasePolicy,
> + public NSrcPolicy<ReadARFInstruction, 0>,
> + public NDstPolicy<ReadARFInstruction, 1>
> + {
> + public:
> + INLINE ReadARFInstruction(Type type, Register dst, ARFRegister arf) {
> + this->type = type;
> + this->dst[0] = dst;
> + this->opcode = OP_READ_ARF;
> + this->arf = arf;
> + }
> + INLINE ir::ARFRegister getARFRegister(void) const { return this->arf; }
> + INLINE Type getType(void) const { return this->type; }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const;
> + Type type;
> + ARFRegister arf;
> + Register dst[1];
> + Register src[0];
> + };
> +
> + class ALIGNED_INSTRUCTION RegionInstruction :
> + public BasePolicy,
> + public NSrcPolicy<RegionInstruction, 1>,
> + public NDstPolicy<RegionInstruction, 1>
> + {
> + public:
> + INLINE RegionInstruction(Register dst, Register src, uint32_t offset) {
> + this->offset = offset;
> + this->dst[0] = dst;
> + this->src[0] = src;
> + this->opcode = OP_REGION;
> + }
> + INLINE uint32_t getOffset(void) const { return this->offset; }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const;
> + uint32_t offset;
> + Register dst[1];
> + Register src[1];
> + };
> +
> class ALIGNED_INSTRUCTION LabelInstruction :
> public BasePolicy,
> public NSrcPolicy<LabelInstruction, 0>,
> @@ -1022,6 +1064,30 @@ namespace ir {
> return true;
> }
>
> + INLINE bool ReadARFInstruction::wellFormed(const Function &fn, std::string &whyNot) const
> + {
> + if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32)) {
> + whyNot = "Only support S32/U32 type";
> + return false;
> + }
> +
> + const RegisterFamily family = getFamily(this->type);
> + if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
> + return false;
> +
> + return true;
> + }
> +
> + INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
> + {
> + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
> + return false;
> + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, whyNot) == false))
> + return false;
> +
> + return true;
> + }
> +
> // Only a label index is required
> INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const
> {
> @@ -1138,6 +1204,16 @@ namespace ir {
> out << ": " << (int)bti.bti[i];
> }
>
> + INLINE void ReadARFInstruction::out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << " %" << this->getDst(fn, 0) << " arf:" << arf;
> + }
> +
> + INLINE void RegionInstruction::out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0) << " offset: " << this->offset;
> + }
> +
> INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const {
> this->outOpcode(out);
> out << " $" << labelIndex;
> @@ -1287,6 +1363,14 @@ START_INTROSPECTION(SyncInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(SyncInstruction)
>
> +START_INTROSPECTION(ReadARFInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(ReadARFInstruction)
> +
> +START_INTROSPECTION(RegionInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(RegionInstruction)
> +
> START_INTROSPECTION(LabelInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(LabelInstruction)
> @@ -1471,6 +1555,9 @@ DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
> DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), getInversePredicated())
> DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
> DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
> +DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
> +DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
> +DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
> DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
> DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType())
> DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), getSamplerIndex())
> @@ -1667,6 +1754,13 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
> return internal::SyncInstruction(parameters).convert();
> }
>
> + Instruction READ_ARF(Type type, Register dst, ARFRegister arf) {
> + return internal::ReadARFInstruction(type, dst, arf).convert();
> + }
> + Instruction REGION(Register dst, Register src, uint32_t offset) {
> + return internal::RegionInstruction(dst, src, offset).convert();
> + }
> +
> // LABEL
> Instruction LABEL(LabelIndex labelIndex) {
> return internal::LabelInstruction(labelIndex).convert();
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 39fb2db..3526a41 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -496,6 +496,23 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + /*! Read one register (8 DWORD) in arf */
> + class ReadARFInstruction : public Instruction {
> + public:
> + Type getType() const;
> + ir::ARFRegister getARFRegister() const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn);
> + };
> +
> + /*! return a region of a register, make sure the offset does not exceed the register size */
> + class RegionInstruction : public Instruction {
> + public:
> + uint32_t getOffset(void) const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn);
> + };
> +
> /*! Specialize the instruction. Also performs typechecking first based on the
> * opcode. Crashes if it fails
> */
> @@ -680,6 +697,9 @@ namespace ir {
> Instruction LOADI(Type type, Register dst, ImmediateIndex value);
> /*! sync.params... (see Sync instruction) */
> Instruction SYNC(uint32_t parameters);
> +
> + Instruction READ_ARF(Type type, Register dst, ARFRegister arf);
> + Instruction REGION(Register dst, Register src, uint32_t offset);
> /*! typed write */
> Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType);
> /*! sample textures */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index abc984f..40b5305 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -79,6 +79,8 @@ DECL_INSN(TYPED_WRITE, TypedWriteInstruction)
> DECL_INSN(SAMPLE, SampleInstruction)
> DECL_INSN(SYNC, SyncInstruction)
> DECL_INSN(LABEL, LabelInstruction)
> +DECL_INSN(READ_ARF, ReadARFInstruction)
> +DECL_INSN(REGION, RegionInstruction)
> DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
> DECL_INSN(MUL_HI, BinaryInstruction)
> DECL_INSN(I64_MUL_HI, BinaryInstruction)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
> index 2a0aa54..eaf6728 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -79,6 +79,7 @@ namespace ir {
> opCode != ir::OP_MUL_HI &&
> opCode != ir::OP_HADD &&
> opCode != ir::OP_RHADD &&
> + opCode != ir::OP_READ_ARF &&
> opCode != ir::OP_ADDSAT &&
> (dstNum == 1 || insn.getOpcode() != ir::OP_LOAD) &&
> !extentRegs->contains(reg)
> diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
> index 5995ba5..7e53e1a 100644
> --- a/backend/src/ir/register.hpp
> +++ b/backend/src/ir/register.hpp
> @@ -63,6 +63,21 @@ namespace ir {
> return 0;
> }
>
> + enum ARFRegister {
> + ARF_NULL = 0,
> + ARF_ADDRESS,
> + ARF_ACCUMULATOR,
> + ARF_FLAG,
> + ARF_MASK,
> + ARF_MASK_STACK,
> + ARF_MASK_STACK_DEPTH,
> + ARF_STATE,
> + ARF_CONTROL,
> + ARF_NOTIFICATION_COUNT,
> + ARF_IP,
> + ARF_TM
> + };
> +
> /*! A register can be either a byte, a word, a dword or a qword. We store this
> * value into a register data (which makes the register file)
> */
> diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h
> index 8bd1eb3..5aa1c42 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -136,4 +136,13 @@ DEF(ulong)
> short __gen_ocl_simd_any(short);
> short __gen_ocl_simd_all(short);
>
> +struct time_stamp {
> + // time tick
> + ulong tick;
> + // If context-switch or frequency change occurs since last read of tm,
> + // event will be non-zero, otherwise, it will be zero.
> + uint event;
> +};
> +
> +struct time_stamp __gen_ocl_get_timestamp(void);
> #endif
> diff --git a/backend/src/libocl/src/ocl_misc.cl b/backend/src/libocl/src/ocl_misc.cl
> index 9b4f2d4..ee86f7d 100644
> --- a/backend/src/libocl/src/ocl_misc.cl
> +++ b/backend/src/libocl/src/ocl_misc.cl
> @@ -216,3 +216,16 @@ DEF(ulong)
> #undef DEC8X
> #undef DEC16
> #undef DEC16X
> +
> +uint __gen_ocl_read_tm(void);
> +uint __gen_ocl_region(ushort offset, uint data);
> +
> +struct time_stamp __gen_ocl_get_timestamp(void) {
> + struct time_stamp val;
> +
> + uint tm = __gen_ocl_read_tm();
> + val.tick = ((ulong)__gen_ocl_region(1, tm) << 32) | __gen_ocl_region(0, tm);
> + val.event = __gen_ocl_region(2, tm);
> +
> + return val;
> +};
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 918af24..39b441f 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2646,6 +2646,8 @@ namespace gbe
> case GEN_OCL_CONV_F32_TO_F16:
> case GEN_OCL_SIMD_ANY:
> case GEN_OCL_SIMD_ALL:
> + case GEN_OCL_READ_TM:
> + case GEN_OCL_REGION:
> this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> @@ -2798,6 +2800,26 @@ namespace gbe
> ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
> break;
> }
> + case GEN_OCL_READ_TM:
> + {
> + const ir::Register dst = this->getRegister(&I);
> + ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
> + break;
> + }
> + case GEN_OCL_REGION:
> + {
> + const ir::Register dst = this->getRegister(&I);
> + // offset must be immediate
> + GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
> + assert(CPV);
> + const ir::Immediate &x = processConstantImm(CPV);
> +
> + AI++;
> + const ir::Register src = this->getRegister(*AI);
> +
> + ctx.REGION(dst, src, x.getIntegerValue());
> + break;
> + }
> case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
> case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
> case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 05639a9..f508bcc 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -190,5 +190,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
> DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
> DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
>
> +DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> +DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
> +
> // printf function
> DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
> diff --git a/kernels/compiler_time_stamp.cl b/kernels/compiler_time_stamp.cl
> new file mode 100644
> index 0000000..e7e53a6
> --- /dev/null
> +++ b/kernels/compiler_time_stamp.cl
> @@ -0,0 +1,29 @@
> +__kernel void
> +compiler_time_stamp(__global int *src, __global int *dst)
> +{
> + int i;
> + int final[16];
> + struct time_stamp t1, t2, t3;
> + t1 = __gen_ocl_get_timestamp();
> + for (i = 0; i < 16; ++i) {
> + int array[16], j;
> + for (j = 0; j < 16; ++j)
> + array[j] = get_global_id(0);
> + for (j = 0; j < src[0]; ++j)
> + array[j] = 1+src[j];
> + final[i] = array[i];
> + if(i == 7)
> + t2 = __gen_ocl_get_timestamp();
> + }
> + t3 = __gen_ocl_get_timestamp();
> + // currently printf does not support long type.
> + // printf("tmEvt %d %d %d tmDiff %lu %lu\n", t3-t1, t2-t1);
> +
> + // time_stamp.event maybe not zero, then the time diff is not accurate,
> + // because a time event occurs before the time stamp.
> + printf("tmEvt %d %d %d tmDiff %u %u\n", t1.event, t2.event, t3.event,
> + (uint)(t3.tick-t1.tick), (uint)(t2.tick-t1.tick));
> +
> + dst[get_global_id(0)] = final[get_global_id(0)];
> +}
> +
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index 2bd6be0..b45ecf9 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -172,6 +172,7 @@ set (utests_sources
> compiler_getelementptr_bitcast.cpp
> compiler_simd_any.cpp
> compiler_simd_all.cpp
> + compiler_time_stamp.cpp
> compiler_double_precision.cpp
> load_program_from_bin_file.cpp
> load_program_from_gen_bin.cpp
> diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp
> new file mode 100644
> index 0000000..1655b01
> --- /dev/null
> +++ b/utests/compiler_time_stamp.cpp
> @@ -0,0 +1,54 @@
> +#include "utest_helper.hpp"
> +
> +static void cpu(int global_id, int *src, int *dst) {
> + int i;
> + int final[16];
> + for (i = 0; i < 16; ++i) {
> + int array[16], j;
> + for (j = 0; j < 16; ++j)
> + array[j] = global_id;
> + for (j = 0; j < src[0]; ++j)
> + array[j] = 1+src[j];
> + final[i] = array[i];
> + }
> + dst[global_id] = final[global_id];
> +}
> +
> +void compiler_time_stamp(void)
> +{
> + const size_t n = 16;
> + int cpu_dst[16], cpu_src[16];
> +
> + // Setup kernel and buffers
> + OCL_CREATE_KERNEL("compiler_time_stamp");
> + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> + globals[0] = 16;
> + locals[0] = 16;
> +
> + // Run random tests
> + for (uint32_t pass = 0; pass < 1; ++pass) {
> + OCL_MAP_BUFFER(0);
> + for (int32_t i = 0; i < (int32_t) n; ++i)
> + cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
> + OCL_UNMAP_BUFFER(0);
> +
> + // Run the kernel on GPU
> + OCL_NDRANGE(1);
> +
> + // Run on CPU
> + for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
> +
> + // Compare
> + OCL_MAP_BUFFER(1);
> + for (int32_t i = 0; i < 11; ++i)
> + OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
> + OCL_UNMAP_BUFFER(1);
> + }
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);
> +
> +
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list