[Beignet] [PATCH V2] GBE/libocl: Add __gen_ocl_get_timestamp() to get timestamp.

Ruiling Song ruiling.song at intel.com
Thu Sep 18 00:51:08 PDT 2014


Gen provide tm0 register for intra-kernel profiling.
Here we provide an API __gen_ocl_get_timestamp() to return
the timestamp in TM.

The return type is defined as:
struct time_stamp {
  ulong tick;
  uint event;
};

'tick' is a 64bit time tick. 'event' stores a value which means
whether a tmEvent has occured (non-zero) or not (0). tmEvent includes
time-impacting event such as context switch or frequency change
since last time tm0 was read.

I add a sample in the kernels/compiler_time_stamp.cl. Hope it
would help you understand how to use it.

V2:
Introduce ir::ARFRegister to avoid directly use of nr/subnr in Gen IR.
Rename __gen_ocl_extract_reg to __gen_ocl_region.
Rename beignet_get_time_stamp to __gen_ocl_get_timestamp.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c   |    3 +
 backend/src/backend/gen_context.cpp         |    1 +
 backend/src/backend/gen_defs.hpp            |    1 +
 backend/src/backend/gen_insn_scheduling.cpp |   13 +++-
 backend/src/backend/gen_insn_selection.cpp  |   67 +++++++++++++++++++
 backend/src/backend/gen_insn_selection.hxx  |    1 +
 backend/src/ir/instruction.cpp              |   94 +++++++++++++++++++++++++++
 backend/src/ir/instruction.hpp              |   20 ++++++
 backend/src/ir/instruction.hxx              |    2 +
 backend/src/ir/liveness.cpp                 |    1 +
 backend/src/ir/register.hpp                 |   15 +++++
 backend/src/libocl/include/ocl_misc.h       |    9 +++
 backend/src/libocl/src/ocl_misc.cl          |   13 ++++
 backend/src/llvm/llvm_gen_backend.cpp       |   22 +++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx  |    3 +
 kernels/compiler_time_stamp.cl              |   29 +++++++++
 utests/CMakeLists.txt                       |    1 +
 utests/compiler_time_stamp.cpp              |   54 +++++++++++++++
 18 files changed, 346 insertions(+), 3 deletions(-)
 create mode 100644 kernels/compiler_time_stamp.cl
 create mode 100644 utests/compiler_time_stamp.cpp

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index c120b60..266b501 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -552,6 +552,9 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
         string (file, "ip");
         return -1;
         break;
+      case GEN_ARF_TM:
+        format (file, "tm%d", _reg_nr & 0x0f);
+        break;
       default:
         format (file, "ARF%d", _reg_nr);
         break;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 2550567..175878d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -202,6 +202,7 @@ namespace gbe
     const GenRegister src = ra->genReg(insn.src(0));
     switch (insn.opcode) {
       case SEL_OP_MOV: p->MOV(dst, src, insn.extra.function); break;
+      case SEL_OP_READ_ARF: p->MOV(dst, src); break;
       case SEL_OP_FBH: p->FBH(dst, src); break;
       case SEL_OP_FBL: p->FBL(dst, src); break;
       case SEL_OP_NOT: p->NOT(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index f0da50a..19aad95 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -261,6 +261,7 @@ enum GenMessageTarget {
 #define GEN_ARF_CONTROL               0x80
 #define GEN_ARF_NOTIFICATION_COUNT    0x90
 #define GEN_ARF_IP                    0xA0
+#define GEN_ARF_TM                    0xC0
 
 #define GEN_MRF_COMPR4   (1 << 7)
 
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 106d608..ead3e26 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -190,6 +190,10 @@ namespace gbe
     static const uint32_t MAX_FLAG_REGISTER = 8u;
     /*! Maximum number of *physical* accumulators registers */
     static const uint32_t MAX_ACC_REGISTER = 1u;
+    /*! Maximum number of *physical* tm registers */
+    static const uint32_t MAX_TM_REGISTER = 1u;
+    /*! Maximum number of *physical* arf registers */
+    static const uint32_t MAX_ARF_REGISTER = MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_TM_REGISTER;
     /*! Stores the last node that wrote to a register / memory ... */
     vector<ScheduleDAGNode*> nodes;
     /*! store nodes each node depends on */
@@ -237,12 +241,12 @@ namespace gbe
   {
     if (scheduler.policy == PRE_ALLOC) {
       this->grfNum = selection.getRegNum();
-      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+      nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
     } else {
       const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
       GBE_ASSERT(simdWidth == 8 || simdWidth == 16);
       this->grfNum = simdWidth == 8 ? 128 : 64;
-      nodes.resize(grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER + MAX_MEM_SYSTEM);
+      nodes.resize(grfNum + MAX_ARF_REGISTER + MAX_MEM_SYSTEM);
     }
     insnNodes.resize(selection.getLargestBlockSize());
   }
@@ -327,6 +331,8 @@ namespace gbe
         } else if (file == GEN_ARF_ACCUMULATOR) {
           GBE_ASSERT(nr < MAX_ACC_REGISTER);
           return grfNum + MAX_FLAG_REGISTER + nr;
+        } else if (file == GEN_ARF_TM) {
+          return grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
         } else {
           NOT_SUPPORTED;
           return 0;
@@ -348,7 +354,7 @@ namespace gbe
   }
 
   uint32_t DependencyTracker::getIndex(uint32_t bti) const {
-    const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
+    const uint32_t memDelta = grfNum + MAX_ARF_REGISTER;
     return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY);
   }
 
@@ -583,6 +589,7 @@ namespace gbe
       ScheduleDAGNode *node = tracker.insnNodes[insnID];
       if (node->insn.isBranch() || node->insn.isLabel()
           || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF
+          || node->insn.opcode == SEL_OP_READ_ARF
           || node->insn.opcode == SEL_OP_BARRIER)
         tracker.makeBarrier(insnID, insnNum);
     }
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d631579..f284ae1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -458,6 +458,7 @@ namespace gbe
 #define I64Shift(OP) \
   INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
     ALU1(MOV)
+    ALU1(READ_ARF)
     ALU1WithTemp(MOV_DF)
     ALU1WithTemp(LOAD_DF_IMM)
     ALU1(LOAD_INT64_IMM)
@@ -3979,6 +3980,70 @@ namespace gbe
     DECL_CTOR(GetImageInfoInstruction, 1, 1);
   };
 
+  class ReadARFInstructionPattern : public SelectionPattern
+  {
+  public:
+    ReadARFInstructionPattern(void) : SelectionPattern(1,1) {
+      this->opcodes.push_back(ir::OP_READ_ARF);
+    }
+
+    INLINE uint32_t getRegNum(ir::ARFRegister arf) const {
+      if (arf == ir::ARF_TM) {
+        return 0xc0;
+      } else {
+        GBE_ASSERT(0);
+        return 0;
+      }
+    }
+
+    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+      using namespace ir;
+      const ir::ReadARFInstruction &insn = cast<ir::ReadARFInstruction>(dag.insn);
+      GenRegister dst;
+      dst = sel.selReg(insn.getDst(0), insn.getType());
+
+      sel.push();
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+        sel.curr.execWidth = 8;
+        sel.READ_ARF(dst, GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+                      getRegNum(insn.getARFRegister()),
+                      0,
+                      getGenType(insn.getType()),
+                      GEN_VERTICAL_STRIDE_8,
+                      GEN_WIDTH_8,
+                      GEN_HORIZONTAL_STRIDE_1));
+      sel.pop();
+      return true;
+    }
+  };
+
+  /*! Get a region of a register */
+  class RegionInstructionPattern : public SelectionPattern
+  {
+  public:
+    RegionInstructionPattern(void) : SelectionPattern(1,1) {
+      this->opcodes.push_back(ir::OP_REGION);
+    }
+    INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
+      using namespace ir;
+      const ir::RegionInstruction &insn = cast<ir::RegionInstruction>(dag.insn);
+      GenRegister dst, src;
+      dst = sel.selReg(insn.getDst(0), ir::TYPE_U32);
+      src = GenRegister::ud1grf(insn.getSrc(0));
+      src.subphysical = 1;
+      src = GenRegister::offset(src, 0, insn.getOffset()*4);
+
+      sel.push();
+        sel.curr.noMask = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.MOV(dst, src);
+      sel.pop();
+      markAllChildren(dag);
+      return true;
+    }
+  };
+
   /*! Branch instruction pattern */
   class BranchInstructionPattern : public SelectionPattern
   {
@@ -4190,6 +4255,8 @@ namespace gbe
     this->insert<SelectModifierInstructionPattern>();
     this->insert<SampleInstructionPattern>();
     this->insert<GetImageInfoInstructionPattern>();
+    this->insert<ReadARFInstructionPattern>();
+    this->insert<RegionInstructionPattern>();
 
     // Sort all the patterns with the number of instructions they output
     for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 2d70982..048a844 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -85,3 +85,4 @@ DECL_SELECTION_IR(BRD, UnaryInstruction)
 DECL_SELECTION_IR(IF, UnaryInstruction)
 DECL_SELECTION_IR(ENDIF, UnaryInstruction)
 DECL_SELECTION_IR(ELSE, UnaryInstruction)
+DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 370fb87..2d86480 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -668,6 +668,48 @@ namespace ir {
       Register dst[0], src[0];
     };
 
+    class ALIGNED_INSTRUCTION ReadARFInstruction :
+      public BasePolicy,
+      public NSrcPolicy<ReadARFInstruction, 0>,
+      public NDstPolicy<ReadARFInstruction, 1>
+    {
+    public:
+      INLINE ReadARFInstruction(Type type, Register dst, ARFRegister arf) {
+        this->type = type;
+        this->dst[0] = dst;
+        this->opcode = OP_READ_ARF;
+        this->arf = arf;
+      }
+      INLINE ir::ARFRegister getARFRegister(void) const { return this->arf; }
+      INLINE Type getType(void) const { return this->type; }
+      INLINE bool wellFormed(const Function &fn, std::string &why) const;
+      INLINE void out(std::ostream &out, const Function &fn) const;
+      Type type;
+      ARFRegister arf;
+      Register dst[1];
+      Register src[0];
+    };
+
+    class ALIGNED_INSTRUCTION RegionInstruction :
+      public BasePolicy,
+      public NSrcPolicy<RegionInstruction, 1>,
+      public NDstPolicy<RegionInstruction, 1>
+    {
+    public:
+      INLINE RegionInstruction(Register dst, Register src, uint32_t offset) {
+        this->offset = offset;
+        this->dst[0] = dst;
+        this->src[0] = src;
+        this->opcode = OP_REGION;
+      }
+      INLINE uint32_t getOffset(void) const { return this->offset; }
+      INLINE bool wellFormed(const Function &fn, std::string &why) const;
+      INLINE void out(std::ostream &out, const Function &fn) const;
+      uint32_t offset;
+      Register dst[1];
+      Register src[1];
+    };
+
     class ALIGNED_INSTRUCTION LabelInstruction :
       public BasePolicy,
       public NSrcPolicy<LabelInstruction, 0>,
@@ -1022,6 +1064,30 @@ namespace ir {
       return true;
     }
 
+    INLINE bool ReadARFInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+    {
+      if (UNLIKELY( this->type != TYPE_U32 && this->type != TYPE_S32)) {
+        whyNot = "Only support S32/U32 type";
+        return false;
+      }
+
+      const RegisterFamily family = getFamily(this->type);
+      if (UNLIKELY(checkRegisterData(family, dst[0], fn, whyNot) == false))
+        return false;
+
+      return true;
+    }
+
+    INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+    {
+      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, src[0], fn, whyNot) == false))
+        return false;
+      if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst[0], fn, whyNot) == false))
+        return false;
+
+      return true;
+    }
+
     // Only a label index is required
     INLINE bool LabelInstruction::wellFormed(const Function &fn, std::string &whyNot) const
     {
@@ -1138,6 +1204,16 @@ namespace ir {
         out << ": " << (int)bti.bti[i];
     }
 
+    INLINE void ReadARFInstruction::out(std::ostream &out, const Function &fn) const {
+      this->outOpcode(out);
+      out << " %" << this->getDst(fn, 0) << " arf:" << arf;
+    }
+
+    INLINE void RegionInstruction::out(std::ostream &out, const Function &fn) const {
+      this->outOpcode(out);
+      out << " %" << this->getDst(fn, 0) << " %" << this->getSrc(fn, 0) << " offset: " << this->offset;
+    }
+
     INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const {
       this->outOpcode(out);
       out << " $" << labelIndex;
@@ -1287,6 +1363,14 @@ START_INTROSPECTION(SyncInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(SyncInstruction)
 
+START_INTROSPECTION(ReadARFInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(ReadARFInstruction)
+
+START_INTROSPECTION(RegionInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(RegionInstruction)
+
 START_INTROSPECTION(LabelInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(LabelInstruction)
@@ -1471,6 +1555,9 @@ DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
 DECL_MEM_FN(BranchInstruction, bool, getInversePredicated(void), getInversePredicated())
 DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
 DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
+DECL_MEM_FN(ReadARFInstruction, Type, getType(void), getType())
+DECL_MEM_FN(ReadARFInstruction, ARFRegister, getARFRegister(void), getARFRegister())
+DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
 DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
 DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType())
 DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), getSamplerIndex())
@@ -1667,6 +1754,13 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
     return internal::SyncInstruction(parameters).convert();
   }
 
+  Instruction READ_ARF(Type type, Register dst, ARFRegister arf) {
+    return internal::ReadARFInstruction(type, dst, arf).convert();
+  }
+  Instruction REGION(Register dst, Register src, uint32_t offset) {
+    return internal::RegionInstruction(dst, src, offset).convert();
+  }
+
   // LABEL
   Instruction LABEL(LabelIndex labelIndex) {
     return internal::LabelInstruction(labelIndex).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 39fb2db..3526a41 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -496,6 +496,23 @@ namespace ir {
     static bool isClassOf(const Instruction &insn);
   };
 
+  /*! Read one register (8 DWORD) in arf */
+  class ReadARFInstruction : public Instruction {
+  public:
+    Type getType() const;
+    ir::ARFRegister getARFRegister() const;
+    /*! Return true if the given instruction is an instance of this class */
+    static bool isClassOf(const Instruction &insn);
+  };
+
+  /*! return a region of a register, make sure the offset does not exceed the register size */
+  class RegionInstruction : public Instruction {
+  public:
+    uint32_t getOffset(void) const;
+    /*! Return true if the given instruction is an instance of this class */
+    static bool isClassOf(const Instruction &insn);
+  };
+
   /*! Specialize the instruction. Also performs typechecking first based on the
    *  opcode. Crashes if it fails
    */
@@ -680,6 +697,9 @@ namespace ir {
   Instruction LOADI(Type type, Register dst, ImmediateIndex value);
   /*! sync.params... (see Sync instruction) */
   Instruction SYNC(uint32_t parameters);
+
+  Instruction READ_ARF(Type type, Register dst, ARFRegister arf);
+  Instruction REGION(Register dst, Register src, uint32_t offset);
   /*! typed write */
   Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType);
   /*! sample textures */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index abc984f..40b5305 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -79,6 +79,8 @@ DECL_INSN(TYPED_WRITE, TypedWriteInstruction)
 DECL_INSN(SAMPLE, SampleInstruction)
 DECL_INSN(SYNC, SyncInstruction)
 DECL_INSN(LABEL, LabelInstruction)
+DECL_INSN(READ_ARF, ReadARFInstruction)
+DECL_INSN(REGION, RegionInstruction)
 DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
 DECL_INSN(MUL_HI, BinaryInstruction)
 DECL_INSN(I64_MUL_HI, BinaryInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 2a0aa54..eaf6728 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -79,6 +79,7 @@ namespace ir {
               opCode != ir::OP_MUL_HI &&
               opCode != ir::OP_HADD &&
               opCode != ir::OP_RHADD &&
+              opCode != ir::OP_READ_ARF &&
               opCode != ir::OP_ADDSAT &&
               (dstNum == 1 || insn.getOpcode() != ir::OP_LOAD) &&
               !extentRegs->contains(reg)
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 5995ba5..7e53e1a 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -63,6 +63,21 @@ namespace ir {
     return 0;
   }
 
+  enum ARFRegister {
+    ARF_NULL = 0,
+    ARF_ADDRESS,
+    ARF_ACCUMULATOR,
+    ARF_FLAG,
+    ARF_MASK,
+    ARF_MASK_STACK,
+    ARF_MASK_STACK_DEPTH,
+    ARF_STATE,
+    ARF_CONTROL,
+    ARF_NOTIFICATION_COUNT,
+    ARF_IP,
+    ARF_TM
+  };
+
   /*! A register can be either a byte, a word, a dword or a qword. We store this
    *  value into a register data (which makes the register file) 
    */
diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h
index 8bd1eb3..5aa1c42 100644
--- a/backend/src/libocl/include/ocl_misc.h
+++ b/backend/src/libocl/include/ocl_misc.h
@@ -136,4 +136,13 @@ DEF(ulong)
 short __gen_ocl_simd_any(short);
 short __gen_ocl_simd_all(short);
 
+struct time_stamp {
+  // time tick
+  ulong tick;
+  // If context-switch or frequency change occurs since last read of tm,
+  // event will be non-zero, otherwise, it will be zero.
+  uint event;
+};
+
+struct time_stamp __gen_ocl_get_timestamp(void);
 #endif
diff --git a/backend/src/libocl/src/ocl_misc.cl b/backend/src/libocl/src/ocl_misc.cl
index 9b4f2d4..ee86f7d 100644
--- a/backend/src/libocl/src/ocl_misc.cl
+++ b/backend/src/libocl/src/ocl_misc.cl
@@ -216,3 +216,16 @@ DEF(ulong)
 #undef DEC8X
 #undef DEC16
 #undef DEC16X
+
+uint __gen_ocl_read_tm(void);
+uint __gen_ocl_region(ushort offset, uint data);
+
+struct time_stamp __gen_ocl_get_timestamp(void) {
+  struct time_stamp val;
+
+  uint tm = __gen_ocl_read_tm();
+  val.tick = ((ulong)__gen_ocl_region(1, tm) << 32) | __gen_ocl_region(0, tm);
+  val.event = __gen_ocl_region(2, tm);
+
+  return val;
+};
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 918af24..39b441f 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2646,6 +2646,8 @@ namespace gbe
       case GEN_OCL_CONV_F32_TO_F16:
       case GEN_OCL_SIMD_ANY:
       case GEN_OCL_SIMD_ALL:
+      case GEN_OCL_READ_TM:
+      case GEN_OCL_REGION:
         this->newRegister(&I);
         break;
       case GEN_OCL_PRINTF:
@@ -2798,6 +2800,26 @@ namespace gbe
             ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src);
             break;
           }
+          case GEN_OCL_READ_TM:
+          {
+            const ir::Register dst = this->getRegister(&I);
+            ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
+            break;
+          }
+          case GEN_OCL_REGION:
+          {
+            const ir::Register dst = this->getRegister(&I);
+            // offset must be immediate
+            GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
+            assert(CPV);
+            const ir::Immediate &x = processConstantImm(CPV);
+
+            AI++;
+            const ir::Register src = this->getRegister(*AI);
+
+            ctx.REGION(dst, src, x.getIntegerValue());
+            break;
+          }
           case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 05639a9..f508bcc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -190,5 +190,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
 DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
 
+DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
+DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
+
 // printf function
 DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
diff --git a/kernels/compiler_time_stamp.cl b/kernels/compiler_time_stamp.cl
new file mode 100644
index 0000000..e7e53a6
--- /dev/null
+++ b/kernels/compiler_time_stamp.cl
@@ -0,0 +1,29 @@
+__kernel void
+compiler_time_stamp(__global int *src, __global int *dst)
+{
+  int i;
+  int final[16];
+  struct time_stamp t1, t2, t3;
+  t1 = __gen_ocl_get_timestamp();
+  for (i = 0; i < 16; ++i) {
+    int array[16], j;
+    for (j = 0; j < 16; ++j)
+      array[j] = get_global_id(0);
+    for (j = 0; j < src[0]; ++j)
+      array[j] = 1+src[j];
+    final[i] = array[i];
+    if(i == 7)
+      t2 = __gen_ocl_get_timestamp();
+  }
+  t3 = __gen_ocl_get_timestamp();
+  // currently printf does not support long type.
+  // printf("tmEvt %d %d %d  tmDiff %lu %lu\n", t3-t1, t2-t1);
+
+  // time_stamp.event maybe not zero, then the time diff is not accurate,
+  // because a time event occurs before the time stamp.
+  printf("tmEvt %d %d %d  tmDiff %u %u\n", t1.event, t2.event, t3.event,
+        (uint)(t3.tick-t1.tick), (uint)(t2.tick-t1.tick));
+
+  dst[get_global_id(0)] = final[get_global_id(0)];
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 2bd6be0..b45ecf9 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -172,6 +172,7 @@ set (utests_sources
   compiler_getelementptr_bitcast.cpp
   compiler_simd_any.cpp
   compiler_simd_all.cpp
+  compiler_time_stamp.cpp
   compiler_double_precision.cpp
   load_program_from_bin_file.cpp
   load_program_from_gen_bin.cpp
diff --git a/utests/compiler_time_stamp.cpp b/utests/compiler_time_stamp.cpp
new file mode 100644
index 0000000..1655b01
--- /dev/null
+++ b/utests/compiler_time_stamp.cpp
@@ -0,0 +1,54 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+  int i;
+  int final[16];
+  for (i = 0; i < 16; ++i) {
+    int array[16], j;
+    for (j = 0; j < 16; ++j)
+      array[j] = global_id;
+    for (j = 0; j < src[0]; ++j)
+      array[j] = 1+src[j];
+    final[i] = array[i];
+  }
+  dst[global_id] = final[global_id];
+}
+
+void compiler_time_stamp(void)
+{
+  const size_t n = 16;
+  int cpu_dst[16], cpu_src[16];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_time_stamp");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = 16;
+  locals[0] = 16;
+
+  // Run random tests
+  for (uint32_t pass = 0; pass < 1; ++pass) {
+    OCL_MAP_BUFFER(0);
+    for (int32_t i = 0; i < (int32_t) n; ++i)
+      cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16;
+    OCL_UNMAP_BUFFER(0);
+
+    // Run the kernel on GPU
+    OCL_NDRANGE(1);
+
+    // Run on CPU
+    for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+    // Compare
+    OCL_MAP_BUFFER(1);
+    for (int32_t i = 0; i < 11; ++i)
+      OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]);
+    OCL_UNMAP_BUFFER(1);
+  }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_time_stamp);
+
+
-- 
1.7.10.4



More information about the Beignet mailing list