[Beignet] [PATCH V2] GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.

Song, Ruiling ruiling.song at intel.com
Thu Sep 18 00:43:21 PDT 2014


I am sorry for that... I will re-send it very soon.

-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Thursday, September 18, 2014 3:27 PM
To: Song, Ruiling; beignet at lists.freedesktop.org
Subject: RE: [Beignet] [PATCH V2] GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.

This version LGTM, could you add a note to indicate the change in this version?

Just like:

V2:
XXXX

Thanks,
Zhigang Gong.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf 
> Of Ruiling Song
> Sent: Thursday, September 18, 2014 2:42 PM
> To: beignet at lists.freedesktop.org
> Cc: Ruiling Song
> Subject: [Beignet] [PATCH V2] GBE/libocl: Add 
> __gen_ocl_get_timestamp() to get timestamp.
> 
> Gen provide tm0 register for intra-kernel profiling.
> Here we provide an API __gen_ocl_get_timestamp() to return the 
> timestamp in TM.
> 
> The return type is defined as:
> struct time_stamp {
>   ulong tick;
>   uint event;
> };
> 
> 'tick' is a 64bit time tick. 'event' stores a value which means 
> whether a
tmEvent
> has occured (non-zero) or not (0). tmEvent includes time-impacting 
> event
such
> as context switch or frequency change since last time tm0 was read.
> 
> I add a sample in the kernels/compiler_time_stamp.cl. Hope it would 
> help
you
> understand how to use it.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/gen/gen_mesa_disasm.c   |    3 +
>  backend/src/backend/gen_context.cpp         |    1 +
>  backend/src/backend/gen_defs.hpp            |    1 +
>  backend/src/backend/gen_insn_scheduling.cpp |   13 +++-
>  backend/src/backend/gen_insn_selection.cpp  |   67
> +++++++++++++++++++
>  backend/src/backend/gen_insn_selection.hxx  |    1 +
>  backend/src/ir/instruction.cpp              |   94
> +++++++++++++++++++++++++++
>  backend/src/ir/instruction.hpp              |   20 ++++++
>  backend/src/ir/instruction.hxx              |    2 +
>  backend/src/ir/liveness.cpp                 |    1 +
>  backend/src/ir/register.hpp                 |   15 +++++
>  backend/src/libocl/include/ocl_misc.h       |    9 +++
>  backend/src/libocl/src/ocl_misc.cl          |   13 ++++
>  backend/src/llvm/llvm_gen_backend.cpp       |   22 +++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx  |    3 +
>  kernels/compiler_time_stamp.cl              |   29 +++++++++
>  utests/CMakeLists.txt                       |    1 +
>  utests/compiler_time_stamp.cpp              |   54 +++++++++++++++
>  18 files changed, 346 insertions(+), 3 deletions(-)  create mode 
> 100644 kernels/compiler_time_stamp.cl  create mode 100644 
> utests/compiler_time_stamp.cpp
> 
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index c120b60..266b501 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -552,6 +552,9 @@ static int reg (FILE *file, uint32_t _reg_file,
uint32_t
> _reg_nr)
>          string (file, "ip");
>          return -1;
>          break;
> +      case GEN_ARF_TM:
> +        format (file, "tm%d", _reg_nr & 0x0f);
> +        break;
>        default:
>          format (file, "ARF%d", _reg_nr);
>          break;
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 2550567..175878d 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -202,6 +202,7 @@ namespace gbe
>      const GenRegister src = ra->genReg(insn.src(0));
>      switch (insn.opcode) {
>        case SEL_OP_MOV: p->MOV(dst, src, insn.extra.function); break;
> +      case SEL_OP_READ_ARF: p->MOV(dst, src); break;
>        case SEL_OP_FBH: p->FBH(dst, src); break;
>        case SEL_OP_FBL: p->FBL(dst, src); break;
>        case SEL_OP_NOT: p->NOT(dst, src); break; diff --git 
> a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index f0da50a..19aad95 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -261,6 +261,7 @@ enum GenMessageTarget {
>  #define GEN_ARF_CONTROL               0x80
>  #define GEN_ARF_NOTIFICATION_COUNT    0x90
>  #define GEN_ARF_IP                    0xA0
> +#define GEN_ARF_TM                    0xC0
> 
>  #define GEN_MRF_COMPR4   (1 << 7)
> 
> diff --git a/backend/src/backend/gen_insn_scheduling.cpp
> b/backend/src/backend/gen_insn_scheduling.cpp
> index 106d608..ead3e26 100644
> --- a/backend/src/backend/gen_insn_scheduling.cpp
> +++ b/backend/src/backend/gen_insn_scheduling.cpp
> @@ -190,6 +190,10 @@ namespace gbe
>      static const uint32_t MAX_FLAG_REGISTER = 8u;
>      /*! Maximum number of *physical* accumulators registers */
>      static const uint32_t MAX_ACC_REGISTER = 1u;
> +    /*! Maximum number of *physical* tm registers */
> +    static const uint32_t MAX_TM_REGISTER = 1u;
> +    /*! Maximum number of *physical* arf registers */
> +    static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + 
> + MAX_ACC_REGISTER + MAX_TM_REGISTER;
>      /*! Stores the last node that wrote to a register / memory ... */
>      vector<ScheduleDAGNode*> nodes;
>      /*! store nodes each node depends on */ @@ -237,12 +241,12 @@ 
> namespace gbe
>    {
>      if (scheduler.policy == PRE_ALLOC) {
>        this->grfNum = selection.getRegNum();
> -      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER +
> MAX_MEM_SYSTEM);
> +      nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
>      } else {
>        const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
>        GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
>        this->grfNum = simdWidth == 8 ? 128 : 64;
> -      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER +
> MAX_MEM_SYSTEM);
> +      nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
>      }
>      insnNodes.resize(selection.getLargestBlockSize());
>    }
> @@ -327,6 +331,8 @@ namespace gbe
>          } else if (file == GEN_ARF_ACCUMULATOR) {
>            GBE_ASSERT(nr < MAX_ACC_REGISTER);
>            return grfNum + MAX_FLAG_REGISTER + nr;
> +        } else if (file == GEN_ARF_TM) {
> +          return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
>          } else {
>            NOT_SUPPORTED;
>            return 0;
> @@ -348,7 +354,7 @@ namespace gbe
>    }
> 
>    uint32_t DependencyTracker::getIndex(uint32_t bti) const {
> -    const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER +
> MAX_ACC_REGISTER;
> +    const uint32_t memDelta = grfNum + MAX_ARF_REGISTER;
>      return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ?
> memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY);
>    }
> 
> @@ -583,6 +589,7 @@ namespace gbe
>        ScheduleDAGNode *node = tracker.insnNodes[insnID];
>        if (node->insn.isBranch() || node->insn.isLabel()
>            || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == 
> SEL_OP_IF
> +          || node->insn.opcode == SEL_OP_READ_ARF
>            || node->insn.opcode == SEL_OP_BARRIER)
>          tracker.makeBarrier(insnID, insnNum);
>      }
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index d631579..f284ae1 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -458,6 +458,7 @@ namespace gbe
>  #define I64Shift(OP) \
>    INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { 
> I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
>      ALU1(MOV)
> +    ALU1(READ_ARF)
>      ALU1WithTemp(MOV_DF)
>      ALU1WithTemp(LOAD_DF_IMM)
>      ALU1(LOAD_INT64_IMM)
> @@ -3979,6 +3980,70 @@ namespace gbe
>      DECL_CTOR(GetImageInfoInstruction, 1, 1);
>    };
> 
> +  class ReadARFInstructionPattern : public SelectionPattern  {
> +  public:
> +    ReadARFInstructionPattern(void) : SelectionPattern(1,1) {
> +      this->opcodes.push_back(ir::OP_READ_ARF);
> +    }
> +
> +    INLINE uint32_t getRegNum(ir::ARFRegister arf) const {
> +      if (arf == ir::ARF_TM) {
> +        return 0xc0;
> +      } else {
> +        GBE_ASSERT(0);
> +        return 0;
> +      }
> +    }
> +
> +    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> +      using namespace ir;
> +      const ir::ReadARFInstruction &insn =
> cast<ir::ReadARFInstruction>(dag.insn);
> +      GenRegister dst;
> +      dst = sel.selReg(insn.getDst(0), insn.getType());
> +
> +      sel.push();
> +        sel.curr.predicate = GEN_PREDICATE_NONE;
> +        sel.curr.noMask = 1;
> +        sel.curr.execWidth = 8;
> +        sel.READ_ARF(dst,
> GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> +                      getRegNum(insn.getARFRegister()),
> +                      0,
> +                      getGenType(insn.getType()),
> +                      GEN_VERTICAL_STRIDE_8,
> +                      GEN_WIDTH_8,
> +                      GEN_HORIZONTAL_STRIDE_1));
> +      sel.pop();
> +      return true;
> +    }
> +  };
> +
> +  /*! Get a region of a register */
> +  class RegionInstructionPattern : public SelectionPattern  {
> +  public:
> +    RegionInstructionPattern(void) : SelectionPattern(1,1) {
> +      this->opcodes.push_back(ir::OP_REGION);
> +    }
> +    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> +      using namespace ir;
> +      const ir::RegionInstruction &insn =
> cast<ir::RegionInstruction>(dag.insn);
> +      GenRegister dst, src;
> +      dst = sel.selReg(insn.getDst(0), ir::TYPE_U32);
> +      src = GenRegister::ud1grf(insn.getSrc(0));
> +      src.subphysical = 1;
> +      src = GenRegister::offset(src, 0, insn.getOffset()*4);
> +
> +      sel.push();
> +        sel.curr.noMask = 1;
> +        sel.curr.predicate = GEN_PREDICATE_NONE;
> +        sel.MOV(dst, src);
> +      sel.pop();
> +      markAllChildren(dag);
> +      return true;
> +    }
> +  };
> +
>    /*! Branch instruction pattern */
>    class BranchInstructionPattern : public SelectionPattern
>    {
> @@ -4190,6 +4255,8 @@ namespace gbe
>      this->insert<SelectModifierInstructionPattern>();
>      this->insert<SampleInstructionPattern>();
>      this->insert<GetImageInfoInstructionPattern>();
> +    this->insert<ReadARFInstructionPattern>();
> +    this->insert<RegionInstructionPattern>();
> 
>      // Sort all the patterns with the number of instructions they output
>      for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git 
> a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 2d70982..048a844 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -85,3 +85,4 @@ DECL_SELECTION_IR(BRD, UnaryInstruction) 
> DECL_SELECTION_IR(IF, UnaryInstruction)  DECL_SELECTION_IR(ENDIF,
> UnaryInstruction)  DECL_SELECTION_IR(ELSE, UnaryInstruction)
> +DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
> diff --git a/backend/src/ir/instruction.cpp
b/backend/src/ir/instruction.cpp
> index 370fb87..2d86480 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -668,6 +668,48 @@ namespace ir {
>        Register dst[0], src[0];
>      };
> 
> +    class ALIGNED_INSTRUCTION ReadARFInstruction :
> +      public BasePolicy,
> +      public NSrcPolicy<ReadARFInstruction, 0>,
> +      public NDstPolicy<ReadARFInstruction, 1>
> +    {
> +    public:
> +      INLINE ReadARFInstruction(Type type, Register dst, ARFRegister 
> + arf)
{
> +        this->type = type;
> +        this->dst[0] = dst;
> +        this->opcode = OP_READ_ARF;
> +        this->arf = arf;
> +      }
> +      INLINE ir::ARFRegister getARFRegister(void) const { return
this->arf; }
> +      INLINE Type getType(void) const { return this->type; }
> +      INLINE bool wellFormed(const Function &fn, std::string &why) const;
> +      INLINE void out(std::ostream &out, const Function &fn) const;
> +      Type type;
> +      ARFRegister arf;
> +      Register dst[1];
> +      Register src[0];
> +    };
> +
> +    class ALIGNED_INSTRUCTION RegionInstruction :
> +      public BasePolicy,
> +      public NSrcPolicy<RegionInstruction, 1>,
> +      public NDstPolicy<RegionInstruction, 1>
> +    {
> +    public:
> +      INLINE RegionInstruction(Register dst, Register src, uint32_t
offset) {
> +        this->offset = offset;
> +        this->dst[0] = dst;
> +        this->src[0] = src;
> +        this->opcode = OP_REGION;
> +      }
> +      INLINE uint32_t getOffset(void) const { return this->offset; }
> +      INLINE bool wellFormed(const Function &fn, std::string &why) const;
> +      INLINE void out(std::ostream &out, const Function &fn) const;
> +      uint32_t offset;
> +      Register dst[1];
> +      Register src[1];
> +    };
> +
>      class ALIGNED_INSTRUCTION LabelInstruction :
>        public BasePolicy,
>        public NSrcPolicy<LabelInstruction, 0>, @@ -1022,6 +1064,30 @@ 
> namespace ir {
>        return true;
>      }
> 
> +    INLINE bool ReadARFInstruction::wellFormed(const Function &fn,
> std::string &whyNot) const
> +    {
> +      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32)) {
> +        whyNot = "Only support S32/U32 type";
> +        return false;
> +      }
> +
> +      const RegisterFamily family = getFamily(this->type);
> +      if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) ==
false))
> +        return false;
> +
> +      return true;
> +    }
> +
> +    INLINE bool RegionInstruction::wellFormed(const Function &fn,
> std::string &whyNot) const
> +    {
> +      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, 
> + whyNot)
> == false))
> +        return false;
> +      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, 
> + whyNot)
> == false))
> +        return false;
> +
> +      return true;
> +    }
> +
>      // Only a label index is required
>      INLINE bool LabelInstruction::wellFormed(const Function &fn,
std::string
> &whyNot) const
>      {
> @@ -1138,6 +1204,16 @@ namespace ir {
>          out << ": " << (int)bti.bti[i];
>      }
> 
> +    INLINE void ReadARFInstruction::out(std::ostream &out, const 
> + Function
> &fn) const {
> +      this->outOpcode(out);
> +      out << " %" << this->getDst(fn, 0) << " arf:" << arf;
> +    }
> +
> +    INLINE void RegionInstruction::out(std::ostream &out, const 
> + Function
> &fn) const {
> +      this->outOpcode(out);
> +      out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 
> + 0)
<< "
> offset: " << this->offset;
> +    }
> +
>      INLINE void LabelInstruction::out(std::ostream &out, const 
> Function
&fn)
> const {
>        this->outOpcode(out);
>        out << " $" << labelIndex;
> @@ -1287,6 +1363,14 @@ START_INTROSPECTION(SyncInstruction)
>  #include "ir/instruction.hxx"
>  END_INTROSPECTION(SyncInstruction)
> 
> +START_INTROSPECTION(ReadARFInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(ReadARFInstruction)
> +
> +START_INTROSPECTION(RegionInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(RegionInstruction)
> +
>  START_INTROSPECTION(LabelInstruction)
>  #include "ir/instruction.hxx"
>  END_INTROSPECTION(LabelInstruction)
> @@ -1471,6 +1555,9 @@ DECL_MEM_FN(BranchInstruction, bool, 
> isPredicated(void), isPredicated())  DECL_MEM_FN(BranchInstruction, 
> bool, getInversePredicated(void), getInversePredicated()) 
> DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void),
> getLabelIndex())  DECL_MEM_FN(SyncInstruction, uint32_t, 
> getParameters(void), getParameters())
> +DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType()) 
> +DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void),
> +getARFRegister()) DECL_MEM_FN(RegionInstruction, uint32_t, 
> +getOffset(void), getOffset())
>  DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) 
> DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) 
> DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void),
> getSamplerIndex()) @@ -1667,6 +1754,13 @@ 
> DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), 
> getImageIndex
>      return internal::SyncInstruction(parameters).convert();
>    }
> 
> +  Instruction READ_ARF(Type type, Register dst, ARFRegister arf) {
> +    return internal::ReadARFInstruction(type, dst, arf).convert();  } 
> + Instruction REGION(Register dst, Register src, uint32_t offset) {
> +    return internal::RegionInstruction(dst, src, offset).convert();  
> + }
> +
>    // LABEL
>    Instruction LABEL(LabelIndex labelIndex) {
>      return internal::LabelInstruction(labelIndex).convert();
> diff --git a/backend/src/ir/instruction.hpp
b/backend/src/ir/instruction.hpp
> index 39fb2db..3526a41 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -496,6 +496,23 @@ namespace ir {
>      static bool isClassOf(const Instruction &insn);
>    };
> 
> +  /*! Read one register (8 DWORD) in arf */  class ReadARFInstruction :
> + public Instruction {
> +  public:
> +    Type getType() const;
> +    ir::ARFRegister getARFRegister() const;
> +    /*! Return true if the given instruction is an instance of this 
> + class
*/
> +    static bool isClassOf(const Instruction &insn);  };
> +
> +  /*! return a region of a register, make sure the offset does not 
> + exceed the register size */  class RegionInstruction : public 
> + Instruction {
> +  public:
> +    uint32_t getOffset(void) const;
> +    /*! Return true if the given instruction is an instance of this 
> + class
*/
> +    static bool isClassOf(const Instruction &insn);  };
> +
>    /*! Specialize the instruction. Also performs typechecking first 
> based
on the
>     *  opcode. Crashes if it fails
>     */
> @@ -680,6 +697,9 @@ namespace ir {
>    Instruction LOADI(Type type, Register dst, ImmediateIndex value);
>    /*! sync.params... (see Sync instruction) */
>    Instruction SYNC(uint32_t parameters);
> +
> +  Instruction READ_ARF(Type type, Register dst, ARFRegister arf); 
> + Instruction REGION(Register dst, Register src, uint32_t offset);
>    /*! typed write */
>    Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type 
> srcType, Type coordType);
>    /*! sample textures */
> diff --git a/backend/src/ir/instruction.hxx
b/backend/src/ir/instruction.hxx index
> abc984f..40b5305 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -79,6 +79,8 @@ DECL_INSN(TYPED_WRITE, TypedWriteInstruction) 
> DECL_INSN(SAMPLE, SampleInstruction)  DECL_INSN(SYNC, SyncInstruction) 
> DECL_INSN(LABEL, LabelInstruction)
> +DECL_INSN(READ_ARF, ReadARFInstruction) DECL_INSN(REGION,
> +RegionInstruction)
>  DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(MUL_HI, 
> BinaryInstruction)  DECL_INSN(I64_MUL_HI,
> BinaryInstruction) diff --git a/backend/src/ir/liveness.cpp 
> b/backend/src/ir/liveness.cpp index 2a0aa54..eaf6728 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -79,6 +79,7 @@ namespace ir {
>                opCode != ir::OP_MUL_HI &&
>                opCode != ir::OP_HADD &&
>                opCode != ir::OP_RHADD &&
> +              opCode != ir::OP_READ_ARF &&
>                opCode != ir::OP_ADDSAT &&
>                (dstNum == 1 || insn.getOpcode() != ir::OP_LOAD) &&
>                !extentRegs->contains(reg) diff --git 
> a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp index 
> 5995ba5..7e53e1a 100644
> --- a/backend/src/ir/register.hpp
> +++ b/backend/src/ir/register.hpp
> @@ -63,6 +63,21 @@ namespace ir {
>      return 0;
>    }
> 
> +  enum ARFRegister {
> +    ARF_NULL = 0,
> +    ARF_ADDRESS,
> +    ARF_ACCUMULATOR,
> +    ARF_FLAG,
> +    ARF_MASK,
> +    ARF_MASK_STACK,
> +    ARF_MASK_STACK_DEPTH,
> +    ARF_STATE,
> +    ARF_CONTROL,
> +    ARF_NOTIFICATION_COUNT,
> +    ARF_IP,
> +    ARF_TM
> +  };
> +
>    /*! A register can be either a byte, a word, a dword or a qword. We
store
> this
>     *  value into a register data (which makes the register file)
>     */
> diff --git a/backend/src/libocl/include/ocl_misc.h
> b/backend/src/libocl/include/ocl_misc.h
> index 8bd1eb3..5aa1c42 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -136,4 +136,13 @@ DEF(ulong)
>  short __gen_ocl_simd_any(short);
>  short __gen_ocl_simd_all(short);
> 
> +struct time_stamp {
> +  // time tick
> +  ulong tick;
> +  // If context-switch or frequency change occurs since last read of 
> +tm,
> +  // event will be non-zero, otherwise, it will be zero.
> +  uint event;
> +};
> +
> +struct time_stamp __gen_ocl_get_timestamp(void);
>  #endif
> diff --git a/backend/src/libocl/src/ocl_misc.cl
> b/backend/src/libocl/src/ocl_misc.cl
> index 9b4f2d4..ee86f7d 100644
> --- a/backend/src/libocl/src/ocl_misc.cl
> +++ b/backend/src/libocl/src/ocl_misc.cl
> @@ -216,3 +216,16 @@ DEF(ulong)
>  #undef DEC8X
>  #undef DEC16
>  #undef DEC16X
> +
> +uint __gen_ocl_read_tm(void);
> +uint __gen_ocl_region(ushort offset, uint data);
> +
> +struct time_stamp __gen_ocl_get_timestamp(void) {
> +  struct time_stamp val;
> +
> +  uint tm = __gen_ocl_read_tm();
> +  val.tick = ((ulong)__gen_ocl_region(1, tm) << 32) | 
> + __gen_ocl_region(0, tm);  val.event = __gen_ocl_region(2, tm);
> +
> +  return val;
> +};
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 918af24..39b441f 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2646,6 +2646,8 @@ namespace gbe
>        case GEN_OCL_CONV_F32_TO_F16:
>        case GEN_OCL_SIMD_ANY:
>        case GEN_OCL_SIMD_ALL:
> +      case GEN_OCL_READ_TM:
> +      case GEN_OCL_REGION:
>          this->newRegister(&I);
>          break;
>        case GEN_OCL_PRINTF:
> @@ -2798,6 +2800,26 @@ namespace gbe
>              ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
>              break;
>            }
> +          case GEN_OCL_READ_TM:
> +          {
> +            const ir::Register dst = this->getRegister(&I);
> +            ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
> +            break;
> +          }
> +          case GEN_OCL_REGION:
> +          {
> +            const ir::Register dst = this->getRegister(&I);
> +            // offset must be immediate
> +            GBE_ASSERT(AI != AE); Constant *CPV =
> dyn_cast<Constant>(*AI);
> +            assert(CPV);
> +            const ir::Immediate &x = processConstantImm(CPV);
> +
> +            AI++;
> +            const ir::Register src = this->getRegister(*AI);
> +
> +            ctx.REGION(dst, src, x.getIntegerValue());
> +            break;
> +          }
>            case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS);
> break;
>            case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN);
> break;
>            case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG);
> break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 05639a9..f508bcc 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -190,5 +190,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16,
> __gen_ocl_f32to16)  DECL_LLVM_GEN_FUNCTION(SIMD_ANY,
> __gen_ocl_simd_any)  DECL_LLVM_GEN_FUNCTION(SIMD_ALL,
> __gen_ocl_simd_all)
> 
> +DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) 
> +DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
> +
>  // printf function
>  DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) diff --git 
> a/kernels/compiler_time_stamp.cl b/kernels/compiler_time_stamp.cl new 
> file mode 100644 index 0000000..e7e53a6
> --- /dev/null
> +++ b/kernels/compiler_time_stamp.cl
> @@ -0,0 +1,29 @@
> +__kernel void
> +compiler_time_stamp(__global int *src, __global int *dst) {
> +  int i;
> +  int final[16];
> +  struct time_stamp t1, t2, t3;
> +  t1 = __gen_ocl_get_timestamp();
> +  for (i = 0; i < 16; ++i) {
> +    int array[16], j;
> +    for (j = 0; j < 16; ++j)
> +      array[j] = get_global_id(0);
> +    for (j = 0; j < src[0]; ++j)
> +      array[j] = 1+src[j];
> +    final[i] = array[i];
> +    if(i == 7)
> +      t2 = __gen_ocl_get_timestamp();
> +  }
> +  t3 = __gen_ocl_get_timestamp();
> +  // currently printf does not support long type.
> +  // printf("tmEvt %d %d %d  tmDiff %lu %lu\n", t3-t1, t2-t1);
> +
> +  // time_stamp.event maybe not zero, then the time diff is not 
> + accurate,  // because a time event occurs before the time stamp.
> +  printf("tmEvt %d %d %d  tmDiff %u %u\n", t1.event, t2.event, t3.event,
> +        (uint)(t3.tick-t1.tick), (uint)(t2.tick-t1.tick));
> +
> +  dst[get_global_id(0)] = final[get_global_id(0)]; }
> +
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index
> 2bd6be0..b45ecf9 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -172,6 +172,7 @@ set (utests_sources
>    compiler_getelementptr_bitcast.cpp
>    compiler_simd_any.cpp
>    compiler_simd_all.cpp
> +  compiler_time_stamp.cpp
>    compiler_double_precision.cpp
>    load_program_from_bin_file.cpp
>    load_program_from_gen_bin.cpp
> diff --git a/utests/compiler_time_stamp.cpp 
> b/utests/compiler_time_stamp.cpp new file mode 100644 index
> 0000000..1655b01
> --- /dev/null
> +++ b/utests/compiler_time_stamp.cpp
> @@ -0,0 +1,54 @@
> +#include "utest_helper.hpp"
> +
> +static void cpu(int global_id, int *src, int *dst) {
> +  int i;
> +  int final[16];
> +  for (i = 0; i < 16; ++i) {
> +    int array[16], j;
> +    for (j = 0; j < 16; ++j)
> +      array[j] = global_id;
> +    for (j = 0; j < src[0]; ++j)
> +      array[j] = 1+src[j];
> +    final[i] = array[i];
> +  }
> +  dst[global_id] = final[global_id];
> +}
> +
> +void compiler_time_stamp(void)
> +{
> +  const size_t n = 16;
> +  int cpu_dst[16], cpu_src[16];
> +
> +  // Setup kernel and buffers
> +  OCL_CREATE_KERNEL("compiler_time_stamp");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); 
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); 
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);  OCL_SET_ARG(1, 
> + sizeof(cl_mem), &buf[1]);  globals[0] = 16;  locals[0] = 16;
> +
> +  // Run random tests
> +  for (uint32_t pass = 0; pass < 1; ++pass) {
> +    OCL_MAP_BUFFER(0);
> +    for (int32_t i = 0; i < (int32_t) n; ++i)
> +      cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
> +    OCL_UNMAP_BUFFER(0);
> +
> +    // Run the kernel on GPU
> +    OCL_NDRANGE(1);
> +
> +    // Run on CPU
> +    for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, 
> + cpu_dst);
> +
> +    // Compare
> +    OCL_MAP_BUFFER(1);
> +    for (int32_t i = 0; i < 11; ++i)
> +      OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
> +    OCL_UNMAP_BUFFER(1);
> +  }
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);
> +
> +
> --
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet



More information about the Beignet mailing list