[Beignet] [PATCH V2] GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.
Ruiling Song
ruiling.song at intel.com
Thu Sep 18 00:51:08 PDT 2014
Gen provide tm0 register for intra-kernel profiling.
Here we provide an API __gen_ocl_get_timestamp() to return
the timestamp in TM.
The return type is defined as:
struct time_stamp {
ulong tick;
uint event;
};
'tick' is a 64bit time tick. 'event' stores a value which means
whether a tmEvent has occured (non-zero) or not (0). tmEvent includes
time-impacting event such as context switch or frequency change
since last time tm0 was read.
I add a sample in the kernels/compiler_time_stamp.cl. Hope it
would help you understand how to use it.
V2:
Introduce ir::ARFRegister to avoid directly use of nr/subnr in Gen IR.
Rename __gen_ocl_extract_reg to __gen_ocl_region.
Rename beignet_get_time_stamp to __gen_ocl_get_timestamp.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 3 +
backend/src/backend/gen_context.cpp | 1 +
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_insn_scheduling.cpp | 13 +++-
backend/src/backend/gen_insn_selection.cpp | 67 +++++++++++++++++++
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ir/instruction.cpp | 94 +++++++++++++++++++++++++++
backend/src/ir/instruction.hpp | 20 ++++++
backend/src/ir/instruction.hxx | 2 +
backend/src/ir/liveness.cpp | 1 +
backend/src/ir/register.hpp | 15 +++++
backend/src/libocl/include/ocl_misc.h | 9 +++
backend/src/libocl/src/ocl_misc.cl | 13 ++++
backend/src/llvm/llvm_gen_backend.cpp | 22 +++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +
kernels/compiler_time_stamp.cl | 29 +++++++++
utests/CMakeLists.txt | 1 +
utests/compiler_time_stamp.cpp | 54 +++++++++++++++
18 files changed, 346 insertions(+), 3 deletions(-)
create mode 100644 kernels/compiler_time_stamp.cl
create mode 100644 utests/compiler_time_stamp.cpp
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index c120b60..266b501 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -552,6 +552,9 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
string (file, "ip");
return -1;
break;
+ case GEN_ARF_TM:
+ format (file, "tm%d", _reg_nr & 0x0f);
+ break;
default:
format (file, "ARF%d", _reg_nr);
break;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 2550567..175878d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -202,6 +202,7 @@ namespace gbe
const GenRegister src = ra->genReg(insn.src(0));
switch (insn.opcode) {
case SEL_OP_MOV: p->MOV(dst, src, insn.extra.function); break;
+ case SEL_OP_READ_ARF: p->MOV(dst, src); break;
case SEL_OP_FBH: p->FBH(dst, src); break;
case SEL_OP_FBL: p->FBL(dst, src); break;
case SEL_OP_NOT: p->NOT(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index f0da50a..19aad95 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -261,6 +261,7 @@ enum GenMessageTarget {
#define GEN_ARF_CONTROL 0x80
#define GEN_ARF_NOTIFICATION_COUNT 0x90
#define GEN_ARF_IP 0xA0
+#define GEN_ARF_TM 0xC0
#define GEN_MRF_COMPR4 (1 << 7)
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 106d608..ead3e26 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -190,6 +190,10 @@ namespace gbe
static const uint32_t MAX_FLAG_REGISTER = 8u;
/*! Maximum number of *physical* accumulators registers */
static const uint32_t MAX_ACC_REGISTER = 1u;
+ /*! Maximum number of *physical* tm registers */
+ static const uint32_t MAX_TM_REGISTER = 1u;
+ /*! Maximum number of *physical* arf registers */
+ static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_TM_REGISTER;
/*! Stores the last node that wrote to a register / memory ... */
vector<ScheduleDAGNode*> nodes;
/*! store nodes each node depends on */
@@ -237,12 +241,12 @@ namespace gbe
{
if (scheduler.policy == PRE_ALLOC) {
this->grfNum = selection.getRegNum();
- nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
} else {
const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
this->grfNum = simdWidth == 8 ? 128 : 64;
- nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+ nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
}
insnNodes.resize(selection.getLargestBlockSize());
}
@@ -327,6 +331,8 @@ namespace gbe
} else if (file == GEN_ARF_ACCUMULATOR) {
GBE_ASSERT(nr < MAX_ACC_REGISTER);
return grfNum + MAX_FLAG_REGISTER + nr;
+ } else if (file == GEN_ARF_TM) {
+ return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
} else {
NOT_SUPPORTED;
return 0;
@@ -348,7 +354,7 @@ namespace gbe
}
uint32_t DependencyTracker::getIndex(uint32_t bti) const {
- const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+ const uint32_t memDelta = grfNum + MAX_ARF_REGISTER;
return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY);
}
@@ -583,6 +589,7 @@ namespace gbe
ScheduleDAGNode *node = tracker.insnNodes[insnID];
if (node->insn.isBranch() || node->insn.isLabel()
|| node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF
+ || node->insn.opcode == SEL_OP_READ_ARF
|| node->insn.opcode == SEL_OP_BARRIER)
tracker.makeBarrier(insnID, insnNum);
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d631579..f284ae1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -458,6 +458,7 @@ namespace gbe
#define I64Shift(OP) \
INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
ALU1(MOV)
+ ALU1(READ_ARF)
ALU1WithTemp(MOV_DF)
ALU1WithTemp(LOAD_DF_IMM)
ALU1(LOAD_INT64_IMM)
@@ -3979,6 +3980,70 @@ namespace gbe
DECL_CTOR(GetImageInfoInstruction, 1, 1);
};
+ class ReadARFInstructionPattern : public SelectionPattern
+ {
+ public:
+ ReadARFInstructionPattern(void) : SelectionPattern(1,1) {
+ this->opcodes.push_back(ir::OP_READ_ARF);
+ }
+
+ INLINE uint32_t getRegNum(ir::ARFRegister arf) const {
+ if (arf == ir::ARF_TM) {
+ return 0xc0;
+ } else {
+ GBE_ASSERT(0);
+ return 0;
+ }
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+ using namespace ir;
+ const ir::ReadARFInstruction &insn = cast<ir::ReadARFInstruction>(dag.insn);
+ GenRegister dst;
+ dst = sel.selReg(insn.getDst(0), insn.getType());
+
+ sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 8;
+ sel.READ_ARF(dst, GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ getRegNum(insn.getARFRegister()),
+ 0,
+ getGenType(insn.getType()),
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1));
+ sel.pop();
+ return true;
+ }
+ };
+
+ /*! Get a region of a register */
+ class RegionInstructionPattern : public SelectionPattern
+ {
+ public:
+ RegionInstructionPattern(void) : SelectionPattern(1,1) {
+ this->opcodes.push_back(ir::OP_REGION);
+ }
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+ using namespace ir;
+ const ir::RegionInstruction &insn = cast<ir::RegionInstruction>(dag.insn);
+ GenRegister dst, src;
+ dst = sel.selReg(insn.getDst(0), ir::TYPE_U32);
+ src = GenRegister::ud1grf(insn.getSrc(0));
+ src.subphysical = 1;
+ src = GenRegister::offset(src, 0, insn.getOffset()*4);
+
+ sel.push();
+ sel.curr.noMask = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.MOV(dst, src);
+ sel.pop();
+ markAllChildren(dag);
+ return true;
+ }
+ };
+
/*! Branch instruction pattern */
class BranchInstructionPattern : public SelectionPattern
{
@@ -4190,6 +4255,8 @@ namespace gbe
this->insert<SelectModifierInstructionPattern>();
this->insert<SampleInstructionPattern>();
this->insert<GetImageInfoInstructionPattern>();
+ this->insert<ReadARFInstructionPattern>();
+ this->insert<RegionInstructionPattern>();
// Sort all the patterns with the number of instructions they output
for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 2d70982..048a844 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -85,3 +85,4 @@ DECL_SELECTION_IR(BRD, UnaryInstruction)
DECL_SELECTION_IR(IF, UnaryInstruction)
DECL_SELECTION_IR(ENDIF, UnaryInstruction)
DECL_SELECTION_IR(ELSE, UnaryInstruction)
+DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 370fb87..2d86480 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -668,6 +668,48 @@ namespace ir {
Register dst[0], src[0];
};
+ class ALIGNED_INSTRUCTION ReadARFInstruction :
+ public BasePolicy,
+ public NSrcPolicy<ReadARFInstruction, 0>,
+ public NDstPolicy<ReadARFInstruction, 1>
+ {
+ public:
+ INLINE ReadARFInstruction(Type type, Register dst, ARFRegister arf) {
+ this->type = type;
+ this->dst[0] = dst;
+ this->opcode = OP_READ_ARF;
+ this->arf = arf;
+ }
+ INLINE ir::ARFRegister getARFRegister(void) const { return this->arf; }
+ INLINE Type getType(void) const { return this->type; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Type type;
+ ARFRegister arf;
+ Register dst[1];
+ Register src[0];
+ };
+
+ class ALIGNED_INSTRUCTION RegionInstruction :
+ public BasePolicy,
+ public NSrcPolicy<RegionInstruction, 1>,
+ public NDstPolicy<RegionInstruction, 1>
+ {
+ public:
+ INLINE RegionInstruction(Register dst, Register src, uint32_t offset) {
+ this->offset = offset;
+ this->dst[0] = dst;
+ this->src[0] = src;
+ this->opcode = OP_REGION;
+ }
+ INLINE uint32_t getOffset(void) const { return this->offset; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ uint32_t offset;
+ Register dst[1];
+ Register src[1];
+ };
+
class ALIGNED_INSTRUCTION LabelInstruction :
public BasePolicy,
public NSrcPolicy<LabelInstruction, 0>,
@@ -1022,6 +1064,30 @@ namespace ir {
return true;
}
+ INLINE bool ReadARFInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32)) {
+ whyNot = "Only support S32/U32 type";
+ return false;
+ }
+
+ const RegisterFamily family = getFamily(this->type);
+ if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+ return false;
+
+ return true;
+ }
+
+ INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, whyNot) == false))
+ return false;
+
+ return true;
+ }
+
// Only a label index is required
INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
@@ -1138,6 +1204,16 @@ namespace ir {
out << ": " << (int)bti.bti[i];
}
+ INLINE void ReadARFInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " %" << this->getDst(fn, 0) << " arf:" << arf;
+ }
+
+ INLINE void RegionInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0) << " offset: " << this->offset;
+ }
+
INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const {
this->outOpcode(out);
out << " $" << labelIndex;
@@ -1287,6 +1363,14 @@ START_INTROSPECTION(SyncInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(SyncInstruction)
+START_INTROSPECTION(ReadARFInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(ReadARFInstruction)
+
+START_INTROSPECTION(RegionInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(RegionInstruction)
+
START_INTROSPECTION(LabelInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(LabelInstruction)
@@ -1471,6 +1555,9 @@ DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), getInversePredicated())
DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
+DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
+DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
+DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType())
DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), getSamplerIndex())
@@ -1667,6 +1754,13 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
return internal::SyncInstruction(parameters).convert();
}
+ Instruction READ_ARF(Type type, Register dst, ARFRegister arf) {
+ return internal::ReadARFInstruction(type, dst, arf).convert();
+ }
+ Instruction REGION(Register dst, Register src, uint32_t offset) {
+ return internal::RegionInstruction(dst, src, offset).convert();
+ }
+
// LABEL
Instruction LABEL(LabelIndex labelIndex) {
return internal::LabelInstruction(labelIndex).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 39fb2db..3526a41 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -496,6 +496,23 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! Read one register (8 DWORD) in arf */
+ class ReadARFInstruction : public Instruction {
+ public:
+ Type getType() const;
+ ir::ARFRegister getARFRegister() const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! return a region of a register, make sure the offset does not exceed the register size */
+ class RegionInstruction : public Instruction {
+ public:
+ uint32_t getOffset(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! Specialize the instruction. Also performs typechecking first based on the
* opcode. Crashes if it fails
*/
@@ -680,6 +697,9 @@ namespace ir {
Instruction LOADI(Type type, Register dst, ImmediateIndex value);
/*! sync.params... (see Sync instruction) */
Instruction SYNC(uint32_t parameters);
+
+ Instruction READ_ARF(Type type, Register dst, ARFRegister arf);
+ Instruction REGION(Register dst, Register src, uint32_t offset);
/*! typed write */
Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType);
/*! sample textures */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index abc984f..40b5305 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -79,6 +79,8 @@ DECL_INSN(TYPED_WRITE, TypedWriteInstruction)
DECL_INSN(SAMPLE, SampleInstruction)
DECL_INSN(SYNC, SyncInstruction)
DECL_INSN(LABEL, LabelInstruction)
+DECL_INSN(READ_ARF, ReadARFInstruction)
+DECL_INSN(REGION, RegionInstruction)
DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_INSN(MUL_HI, BinaryInstruction)
DECL_INSN(I64_MUL_HI, BinaryInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 2a0aa54..eaf6728 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -79,6 +79,7 @@ namespace ir {
opCode != ir::OP_MUL_HI &&
opCode != ir::OP_HADD &&
opCode != ir::OP_RHADD &&
+ opCode != ir::OP_READ_ARF &&
opCode != ir::OP_ADDSAT &&
(dstNum == 1 || insn.getOpcode() != ir::OP_LOAD) &&
!extentRegs->contains(reg)
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 5995ba5..7e53e1a 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -63,6 +63,21 @@ namespace ir {
return 0;
}
+ enum ARFRegister {
+ ARF_NULL = 0,
+ ARF_ADDRESS,
+ ARF_ACCUMULATOR,
+ ARF_FLAG,
+ ARF_MASK,
+ ARF_MASK_STACK,
+ ARF_MASK_STACK_DEPTH,
+ ARF_STATE,
+ ARF_CONTROL,
+ ARF_NOTIFICATION_COUNT,
+ ARF_IP,
+ ARF_TM
+ };
+
/*! A register can be either a byte, a word, a dword or a qword. We store this
* value into a register data (which makes the register file)
*/
diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h
index 8bd1eb3..5aa1c42 100644
--- a/backend/src/libocl/include/ocl_misc.h
+++ b/backend/src/libocl/include/ocl_misc.h
@@ -136,4 +136,13 @@ DEF(ulong)
short __gen_ocl_simd_any(short);
short __gen_ocl_simd_all(short);
+struct time_stamp {
+ // time tick
+ ulong tick;
+ // If context-switch or frequency change occurs since last read of tm,
+ // event will be non-zero, otherwise, it will be zero.
+ uint event;
+};
+
+struct time_stamp __gen_ocl_get_timestamp(void);
#endif
diff --git a/backend/src/libocl/src/ocl_misc.cl b/backend/src/libocl/src/ocl_misc.cl
index 9b4f2d4..ee86f7d 100644
--- a/backend/src/libocl/src/ocl_misc.cl
+++ b/backend/src/libocl/src/ocl_misc.cl
@@ -216,3 +216,16 @@ DEF(ulong)
#undef DEC8X
#undef DEC16
#undef DEC16X
+
+uint __gen_ocl_read_tm(void);
+uint __gen_ocl_region(ushort offset, uint data);
+
+struct time_stamp __gen_ocl_get_timestamp(void) {
+ struct time_stamp val;
+
+ uint tm = __gen_ocl_read_tm();
+ val.tick = ((ulong)__gen_ocl_region(1, tm) << 32) | __gen_ocl_region(0, tm);
+ val.event = __gen_ocl_region(2, tm);
+
+ return val;
+};
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 918af24..39b441f 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2646,6 +2646,8 @@ namespace gbe
case GEN_OCL_CONV_F32_TO_F16:
case GEN_OCL_SIMD_ANY:
case GEN_OCL_SIMD_ALL:
+ case GEN_OCL_READ_TM:
+ case GEN_OCL_REGION:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -2798,6 +2800,26 @@ namespace gbe
ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
break;
}
+ case GEN_OCL_READ_TM:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
+ break;
+ }
+ case GEN_OCL_REGION:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ // offset must be immediate
+ GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
+ assert(CPV);
+ const ir::Immediate &x = processConstantImm(CPV);
+
+ AI++;
+ const ir::Register src = this->getRegister(*AI);
+
+ ctx.REGION(dst, src, x.getIntegerValue());
+ break;
+ }
case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 05639a9..f508bcc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -190,5 +190,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
+DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
+DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
+
// printf function
DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
diff --git a/kernels/compiler_time_stamp.cl b/kernels/compiler_time_stamp.cl
new file mode 100644
index 0000000..e7e53a6
--- /dev/null
+++ b/kernels/compiler_time_stamp.cl
@@ -0,0 +1,29 @@
+__kernel void
+compiler_time_stamp(__global int *src, __global int *dst)
+{
+ int i;
+ int final[16];
+ struct time_stamp t1, t2, t3;
+ t1 = __gen_ocl_get_timestamp();
+ for (i = 0; i < 16; ++i) {
+ int array[16], j;
+ for (j = 0; j < 16; ++j)
+ array[j] = get_global_id(0);
+ for (j = 0; j < src[0]; ++j)
+ array[j] = 1+src[j];
+ final[i] = array[i];
+ if(i == 7)
+ t2 = __gen_ocl_get_timestamp();
+ }
+ t3 = __gen_ocl_get_timestamp();
+ // currently printf does not support long type.
+ // printf("tmEvt %d %d %d tmDiff %lu %lu\n", t3-t1, t2-t1);
+
+ // time_stamp.event maybe not zero, then the time diff is not accurate,
+ // because a time event occurs before the time stamp.
+ printf("tmEvt %d %d %d tmDiff %u %u\n", t1.event, t2.event, t3.event,
+ (uint)(t3.tick-t1.tick), (uint)(t2.tick-t1.tick));
+
+ dst[get_global_id(0)] = final[get_global_id(0)];
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 2bd6be0..b45ecf9 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -172,6 +172,7 @@ set (utests_sources
compiler_getelementptr_bitcast.cpp
compiler_simd_any.cpp
compiler_simd_all.cpp
+ compiler_time_stamp.cpp
compiler_double_precision.cpp
load_program_from_bin_file.cpp
load_program_from_gen_bin.cpp
diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp
new file mode 100644
index 0000000..1655b01
--- /dev/null
+++ b/utests/compiler_time_stamp.cpp
@@ -0,0 +1,54 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+ int i;
+ int final[16];
+ for (i = 0; i < 16; ++i) {
+ int array[16], j;
+ for (j = 0; j < 16; ++j)
+ array[j] = global_id;
+ for (j = 0; j < src[0]; ++j)
+ array[j] = 1+src[j];
+ final[i] = array[i];
+ }
+ dst[global_id] = final[global_id];
+}
+
+void compiler_time_stamp(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_time_stamp");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 1; ++pass) {
+ OCL_MAP_BUFFER(0);
+ for (int32_t i = 0; i < (int32_t) n; ++i)
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < 11; ++i)
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);
+
+
--
1.7.10.4
More information about the Beignet
mailing list