[Beignet] [PATCH 1/7] add debugloc for inserted llvm instructions

Bai Yannan yannan.bai at intel.com
Fri Sep 18 02:01:51 PDT 2015


    add debugloc when llvm instuctions inserted, the debugloc is inherited from the contiguous one.

Signed-off-by: Bai Yannan <yannan.bai at intel.com>
---
 backend/src/backend/program.cpp                  |   7 +
 backend/src/llvm/llvm_gen_backend.cpp            |  25 ++
 backend/src/llvm/llvm_loadstore_optimization.cpp |  18 ++
 backend/src/llvm/llvm_printf_parser.cpp          |  20 ++
 backend/src/llvm/llvm_sampler_fix.cpp            |  17 ++
 backend/src/llvm/llvm_scalarize.cpp              |  18 ++
 backend/src/llvm/llvm_timestamp.cpp              | 337 +++++++++++++++++++++++
 backend/src/llvm/llvm_to_gen.cpp                 |  10 +-
 8 files changed, 451 insertions(+), 1 deletion(-)
 create mode 100644 backend/src/llvm/llvm_timestamp.cpp

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index f5865c2..af817de 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -49,6 +49,7 @@
 #include <iostream>
 #include <unistd.h>
 #include <mutex>
+#include <cstdlib>
 
 #ifdef GBE_COMPILER_AVAILABLE
 /* Not defined for LLVM 3.0 */
@@ -554,6 +555,12 @@ namespace gbe {
     args.push_back("stringInput.cl");
     args.push_back("-ffp-contract=off");
 
+	if(getenv("OCL_PROFILING")) {
+		char * isProfiling =  getenv("OCL_PROFILING");
+		if(*isProfiling == '1')
+			args.push_back("-g");
+	}
+
     // The compiler invocation needs a DiagnosticsEngine so it can report problems
     std::string ErrorString;
     llvm::raw_string_ostream ErrorInfo(ErrorString);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 4905415..238370a 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -108,6 +108,8 @@
 
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
 #include "llvm/IR/Mangler.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Target/Mangler.h"
 #endif
@@ -178,6 +180,20 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
+	if(OCL_PROFILING) { 								\
+		llvm::BasicBlock *bb = INSN->getParent();		\
+		llvm::BasicBlock::iterator iter =bb->begin();	\
+		while(!(iter++)->isIdenticalTo(INSN))	;		\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
+		llvm::DebugLoc dg = iter->getDebugLoc();		\
+		while(!N)	N = (++iter)->getMetadata("dbg");	\
+		BUILDER.SetCurrentDebugLocation(dg);			\
+	}
+// end define SETDEBUGLOCATION
+
+
 namespace gbe
 {
   /*! Gen IR manipulates only scalar types */
@@ -977,6 +993,7 @@ namespace gbe
           Value *trueVal = getPointerBase((*iter).second[0]);
           Value *falseVal = getPointerBase((*iter).second[1]);
           Builder.SetInsertPoint(si);
+		  SETDEBUGLOCATION(Builder, si);
           Value *base = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
           pointerBaseMap.insert(std::make_pair(ptr, base));
         return base;
@@ -984,6 +1001,7 @@ namespace gbe
           PHINode *phi = dyn_cast<PHINode>(ptr);
           IRBuilder<> Builder(phi->getParent());
           Builder.SetInsertPoint(phi);
+		  SETDEBUGLOCATION(Builder, phi);
 
           PHINode *basePhi = Builder.CreatePHI(ptr->getType(), phi->getNumIncomingValues());
           unsigned srcNum = pointers.size();
@@ -997,7 +1015,10 @@ namespace gbe
             IRBuilder<> Builder2(phi->getIncomingBlock(x));
             BasicBlock *predBB = phi->getIncomingBlock(x);
             if (predBB->getTerminator())
+            {
               Builder2.SetInsertPoint(predBB->getTerminator());
+			  SETDEBUGLOCATION(Builder2, predBB->getTerminator());
+            }
 
 #if (LLVM_VERSION_MAJOR== 3 && LLVM_VERSION_MINOR < 6)
   // llvm 3.5 and older version don't have CreateBitOrPointerCast() define
@@ -1065,6 +1086,7 @@ namespace gbe
           Value *trueVal = getBtiRegister((*iter).second[0]);
           Value *falseVal = getBtiRegister((*iter).second[1]);
           Builder.SetInsertPoint(si);
+		  SETDEBUGLOCATION(Builder, si);
           Value *bti = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
           BtiValueMap.insert(std::make_pair(Val, bti));
           return bti;
@@ -1072,6 +1094,7 @@ namespace gbe
           PHINode *phi = dyn_cast<PHINode>(Val);
           IRBuilder<> Builder(phi->getParent());
           Builder.SetInsertPoint(phi);
+		  SETDEBUGLOCATION(Builder, phi);
 
           PHINode *btiPhi = Builder.CreatePHI(IntegerType::get(Val->getContext(), 32), phi->getNumIncomingValues());
           PtrOrigMapIter iter = pointerOrigMap.find(Val);
@@ -1240,6 +1263,7 @@ namespace gbe
             isLoad = false;
           }
           Builder.SetInsertPoint(cast<Instruction>(theUser));
+		  SETDEBUGLOCATION(Builder, cast<Instruction>(theUser));
 
           Type *int32Ty = Type::getInt32Ty(ptr->getContext());
           Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
@@ -1351,6 +1375,7 @@ namespace gbe
 
       IRBuilder<> Builder(&entry);
       Builder.SetInsertPoint(bbIter);
+	  SETDEBUGLOCATION(Builder, bbIter);
 
       PointerType * AITy = cast<AllocaInst>(base)->getType();
       Value * btiArray = Builder.CreateAlloca(AITy->getElementType(), ArraySize, base->getName() + ".bti");
diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
index c6349fa..989b539 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -41,6 +41,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 #include "llvm/Pass.h"
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
@@ -55,6 +57,20 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 
 using namespace llvm;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
+	if(OCL_PROFILING) { 								\
+		llvm::BasicBlock *bb = INSN->getParent();		\
+		llvm::BasicBlock::iterator iter =bb->begin();	\
+		while(!(iter++)->isIdenticalTo(INSN))	;		\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
+		llvm::DebugLoc dg = iter->getDebugLoc();		\
+		while(!N)	N = (++iter)->getMetadata("dbg");	\
+		BUILDER.SetCurrentDebugLocation(dg);			\
+	}
+// end define SETDEBUGLOCATION
+
 namespace gbe {
   class GenLoadStoreOptimization : public BasicBlockPass {
 
@@ -167,6 +183,7 @@ namespace gbe {
     unsigned addrSpace = ld->getPointerAddressSpace();
     // insert before first load
     Builder.SetInsertPoint(ld);
+	SETDEBUGLOCATION(Builder, ld);
     VectorType *vecTy = VectorType::get(ld->getType(), size);
     Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
                                         PointerType::get(vecTy, addrSpace));
@@ -226,6 +243,7 @@ namespace gbe {
     unsigned align = st->getAlignment();
     // insert before the last store
     Builder.SetInsertPoint(merged[size-1]);
+	SETDEBUGLOCATION(Builder, merged[size-1]);
 
     Type *dataTy = st->getValueOperand()->getType();
     VectorType *vecTy = VectorType::get(dataTy, size);
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index 3d84457..3848bfa 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -59,6 +59,9 @@
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
+
 #else
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
@@ -73,6 +76,21 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(INSN, ISBEGIN) 					\
+	if(OCL_PROFILING) {										\
+		llvm::BasicBlock *bb=INSN->getParent(); 				\
+		llvm::BasicBlock::iterator iter = bb->begin();			\
+		if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))	;	\
+		else while(!iter->isIdenticalTo(INSN)) iter++;			\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 			\
+		llvm::DebugLoc dg = iter->getDebugLoc();				\
+		if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}		\
+		else	while(!N) {iter++;N = iter->getMetadata("dbg");}		\
+		builder->SetCurrentDebugLocation(dg);					\
+	}
+// end define SETDEBUGLOCATION
+
 namespace gbe
 {
   using namespace ir;
@@ -659,6 +677,7 @@ error:
       Value* val = NULL;
 
       builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
+      SETDEBUGLOCATION(F.begin()->begin(), true);
 
       /* FIXME: Because the OpenCL language do not support va macro, and we do not want
          to introduce the va_list, va_start and va_end into our code, we just simulate
@@ -721,6 +740,7 @@ error:
     /* Now generate the instructions. */
     for (auto pInfo : infoVect) {
       builder->SetInsertPoint(pInfo.call);
+	  SETDEBUGLOCATION(pInfo.call, false);
       deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), generateOnePrintfInstruction(pInfo)));
     }
 
diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
index 8c76324..a1146d3 100644
--- a/backend/src/llvm/llvm_sampler_fix.cpp
+++ b/backend/src/llvm/llvm_sampler_fix.cpp
@@ -36,6 +36,8 @@
 #include "llvm/IR/IRBuilder.h"
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Support/CFG.h"
 #endif
@@ -47,6 +49,19 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
+	if(OCL_PROFILING) { 								\
+		llvm::BasicBlock *bb = INSN->getParent();		\
+		llvm::BasicBlock::iterator iter =bb->begin();	\
+		while(!(iter++)->isIdenticalTo(INSN))	;		\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
+		llvm::DebugLoc dg = iter->getDebugLoc();		\
+		while(!N)	N = (++iter)->getMetadata("dbg");	\
+		BUILDER.SetCurrentDebugLocation(dg);			\
+	}
+// end define SETDEBUGLOCATION
+
 namespace gbe {
 
   class SamplerFix : public FunctionPass {
@@ -82,6 +97,7 @@ namespace gbe {
           IRBuilder<> Builder(I->getParent());
 
           Builder.SetInsertPoint(I);
+		  SETDEBUGLOCATION(Builder, I);
           Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
           Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask);
           Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
@@ -108,6 +124,7 @@ namespace gbe {
         } else {
           IRBuilder<> Builder(I->getParent());
           Builder.SetInsertPoint(I);
+		  SETDEBUGLOCATION(Builder, I);
           Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE);
           Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask);
           needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0));
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index bc985c6..d4e87af 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -87,6 +87,8 @@
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
@@ -98,6 +100,19 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
+	if(OCL_PROFILING) { 								\
+		llvm::BasicBlock *bb = INSN->getParent();		\
+		llvm::BasicBlock::iterator iter =bb->begin();	\
+		while(!(iter++)->isIdenticalTo(INSN))	;		\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
+		llvm::DebugLoc dg = iter->getDebugLoc();		\
+		while(!N)	N = (++iter)->getMetadata("dbg");	\
+		BUILDER->SetCurrentDebugLocation(dg);			\
+	}
+// end define SETDEBUGLOCATION
+
 namespace gbe {
 
   struct VectorValues {
@@ -231,6 +246,7 @@ namespace gbe {
     void setAppendPoint(Instruction *insn)  {
       BasicBlock::iterator next(insn);
       builder->SetInsertPoint(++next);
+	  SETDEBUGLOCATION(builder, next);
     }
 
     DenseMap<Value*, VectorValues> vectorVals;
@@ -526,6 +542,7 @@ namespace gbe {
     assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
            "Scalarizing an op whose arguments haven't been scalarized ");
     builder->SetInsertPoint(inst);
+	SETDEBUGLOCATION(builder, inst);
 
     if (IsPerComponentOp(inst))
       return scalarizePerComponent(inst);
@@ -793,6 +810,7 @@ namespace gbe {
     ReversePostOrderTraversal<Function*> rpot(&F);
     BasicBlock::iterator instI = (*rpot.begin())->begin();
     builder->SetInsertPoint(instI);
+	SETDEBUGLOCATION(builder, instI);
 
     Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
 
diff --git a/backend/src/llvm/llvm_timestamp.cpp b/backend/src/llvm/llvm_timestamp.cpp
new file mode 100644
index 0000000..f847d38
--- /dev/null
+++ b/backend/src/llvm/llvm_timestamp.cpp
@@ -0,0 +1,337 @@
+
+
+/**
+ * \file llvm_timestamp.cpp
+ *
+ */
+
+ 
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MINOR <= 2
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#else
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+#include "llvm/Pass.h"
+#if LLVM_VERSION_MINOR <= 1
+#include "llvm/Support/IRBuilder.h"
+#elif LLVM_VERSION_MINOR == 2
+#include "llvm/IRBuilder.h"
+#else
+#include "llvm/IR/IRBuilder.h"
+#endif /* LLVM_VERSION_MINOR <= 1 */
+
+#if LLVM_VERSION_MINOR >= 5
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h" //ynbai
+#include "llvm/IR/DebugInfo.h"
+
+#else
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#endif
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Attributes.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "sys/map.hpp"
+#include "ir/timestamp.hpp"
+
+#include <iostream>
+#include <vector>
+#include <fstream>
+
+using namespace llvm;
+using std::vector;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+
+namespace gbe
+{
+  using namespace ir;
+ 
+  class TimestampParser : public FunctionPass
+  {
+  public:
+    static char ID;
+	typedef std::pair<Instruction*, bool> TimestampInst;
+    Module* module;
+    IRBuilder<>* builder;
+    Type* intTy;
+    Value* tbuf_ptr;
+	Value* g1Xg2Xg3;
+    Value* wg_offset;
+	int tm_num;
+
+    TimestampParser(void) : FunctionPass(ID)
+    {
+      module = NULL;
+      builder = NULL;
+      intTy = NULL;
+      tbuf_ptr = NULL;
+	  g1Xg2Xg3 = NULL;
+      wg_offset = NULL;
+	  tm_num = 0;
+    }
+
+    ~TimestampParser(void)
+    {
+    }
+
+	INLINE void storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt);
+	bool generateOneTimestampInstruction(llvm::BasicBlock *BB);
+
+    virtual const char *getPassName() const
+    {
+      return "Timestamp Parser";
+    }
+
+    virtual bool runOnFunction(llvm::Function &F);
+  };
+
+  INLINE void TimestampParser::storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt){
+	/* timestamp format in the buffer:
+		tm_start_region_2, tm_start_region_1, tm_start_region_0, tm_end_region_2, tm_end_region_1, tm_end_region_0
+		^
+		|
+		addr_base
+	*/
+	param[0] = ConstantInt::get(IntegerType::get(module->getContext(), 16), idx);
+	SmallVector<Value *, 2> Args(param, param+2);
+	Value* region = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(	
+       "__gen_ocl_region", FunctionType::get(IntegerType::getInt32Ty(module->getContext()), ParamTys, false))),	
+                              Args);
+	Value* bi = builder->CreateAdd(addr_base, ConstantInt::get(intTy, ((!isSt)*3 + 2 - idx)*sizeof(uint)));
+	Value* data_addr = builder->CreateIntToPtr(bi, Type::getInt32PtrTy(module->getContext(), 1));	
+	builder->CreateStore(region, data_addr);
+
+  }
+  
+  bool TimestampParser::generateOneTimestampInstruction(llvm::BasicBlock *BB)
+  {
+
+#define SETDEBUGLOCATION(INSN, ASC) 									\	
+	if(OCL_PROFILING) {										\
+		llvm::BasicBlock *bb = INSN->getParent();						\
+		llvm::BasicBlock::iterator iter = ASC?bb->begin():bb->end();	\
+		while(!(ASC?iter++:iter--)->isIdenticalTo(INSN))	;			\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 					\
+		llvm::DebugLoc dg = iter->getDebugLoc();						\
+		while(!N)	N = (ASC?++iter:--iter)->getMetadata("dbg");		\
+		builder->SetCurrentDebugLocation(dg);							\
+	}
+// end define SETDEBUGLOCATION
+
+	Value * op0 = NULL;
+	Value * val = NULL;
+		
+	/////////////////////////////////////////////////////
+      /* Calculate the data address.
+      data_addr = (data_offset + tbuf_ptr + wg_offset * sizeof(uint32)) + 
+      			totalSizeofSize * global_size2 * global_size1 * global_size0 * tm_num 
+      data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
+      totalSizeofSize = tm_num * sizeof(uint32)
+      */
+    CallInst *read_tm_inst_st = NULL, *read_tm_inst_end = NULL;
+		
+	// ecch basic block has 2 timestamps, 
+	//and tm_num is even when it is at the begin of bb 
+	int bbNum = (tm_num-tm_num%2)/2; 
+	Value* data_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, bbNum));
+    // index_offset + wg_offset
+    op0 = builder->CreateAdd(data_offset, wg_offset);
+    // (index_offset + wg_offset)* sizeof(int)
+    op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(uint)*6));
+	Value* b = builder->CreateAdd(tbuf_ptr,op0);
+	
+	read_tm_inst_st = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                          "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
+                          NULL)));
+	tm_num++;
+	
+	llvm::BasicBlock::iterator BI = BB->end();
+	builder->SetInsertPoint(--BI);
+	SETDEBUGLOCATION(BI, false);
+	read_tm_inst_end = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                          "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
+                          NULL)));
+	tm_num++;
+	
+	Value* para[2];
+	std::vector<Type *> ParamTys(2);
+	ParamTys[0]=IntegerType::get(module->getContext(), 16);
+	ParamTys[1]=IntegerType::getInt32Ty(module->getContext());
+
+	for(int i=0; i<3; i++)
+	{
+		para[1]=read_tm_inst_st;
+		storeRegionOffsetI(b, i, para, ParamTys, true);
+		para[1]=read_tm_inst_end;
+		storeRegionOffsetI(b, i, para, ParamTys, false);
+	}
+
+	CallInst* timestamp_inst = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                              "__gen_ocl_timestamp", Type::getVoidTy(module->getContext()),
+                              NULL)));
+	return true;
+  }
+
+  bool TimestampParser::runOnFunction(llvm::Function &F)
+  {
+
+#define SETDEBUGLOCATION(INSN, ISBEGIN) 					\
+	if(OCL_PROFILING) {										\
+		llvm::BasicBlock *bb=INSN->getParent();					\
+		llvm::BasicBlock::iterator iter = bb->begin();			\
+		if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))	;	\
+		else while(!iter->isIdenticalTo(INSN)) iter++;			\
+		llvm::MDNode *N = iter->getMetadata("dbg"); 			\
+		llvm::DebugLoc dg = iter->getDebugLoc();				\
+		if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}	\
+		else	while(!N) {iter++;N = iter->getMetadata("dbg");}	\
+		builder->SetCurrentDebugLocation(dg);						\
+	}
+// end define SETDEBUGLOCATION
+
+    bool hasTimestamp = false;
+    switch (F.getCallingConv()) {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+      case CallingConv::PTX_Device:
+        return false;
+      case CallingConv::PTX_Kernel:
+#else
+      case CallingConv::C:
+      case CallingConv::Fast:
+      case CallingConv::SPIR_KERNEL:
+#endif
+        break;
+      default:
+        GBE_ASSERTM(false, "Unsupported calling convention");
+    }
+
+    module = F.getParent();
+    intTy = IntegerType::get(module->getContext(), 32);
+
+    // As we inline all function calls, so skip non-kernel functions
+    bool bKernel = isKernelFunction(F);
+    if(!bKernel) return false;
+
+    builder = new IRBuilder<>(module->getContext());
+
+	if (!tbuf_ptr) {
+      /* alloc a new buffer ptr to collect the timestamps. */
+      Type *ptrTy = Type::getInt32PtrTy(module->getContext());
+	  llvm::Constant *tBuf = new GlobalVariable(*module, ptrTy, false,
+                                GlobalVariable::ExternalLinkage,
+                                nullptr,
+                                StringRef("__gen_ocl_timestamp_buf"),
+                                nullptr,
+                                GlobalVariable::NotThreadLocal,
+                                1);
+      tbuf_ptr = builder->CreatePtrToInt(tBuf, Type::getInt32Ty(module->getContext()));
+    }
+
+	//if (!wg_offset || !g1Xg2Xg3) {
+      Value* op0 = NULL;
+      Value* val = NULL;
+
+      builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
+      SETDEBUGLOCATION(F.begin()->begin(), true);
+	  
+      /* FIXME: Because the OpenCL language do not support va macro, and we do not want
+         to introduce the va_list, va_start and va_end into our code, we just simulate
+         the function calls to caculate the offset caculation here. */
+
+	
+ 
+#define BUILD_CALL_INST(name) \
+	CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
+				 "__gen_ocl_get_"#name, 					\
+				 IntegerType::getInt32Ty(module->getContext()), 		\
+				 NULL)))
+
+      BUILD_CALL_INST(group_id2);
+      BUILD_CALL_INST(group_id1);
+      BUILD_CALL_INST(group_id0);
+      BUILD_CALL_INST(global_size2);
+      BUILD_CALL_INST(global_size1);
+      BUILD_CALL_INST(global_size0);
+      BUILD_CALL_INST(local_id2);
+      BUILD_CALL_INST(local_id1);
+      BUILD_CALL_INST(local_id0);
+      BUILD_CALL_INST(local_size2);
+      BUILD_CALL_INST(local_size1);
+      BUILD_CALL_INST(local_size0);
+
+#undef BUILD_CALL_INST
+
+      /* calculate offset for later usage.
+         wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
+         + (local_id1 + local_size1 * group_id1) * global_size0
+         + (local_id0 + local_size0 * group_id0))  */
+
+
+      // local_size2 * group_id2
+      val = builder->CreateMul(local_size2, group_id2);
+      // local_id2 + local_size2 * group_id2
+      val = builder->CreateAdd(local_id2, val);
+      // global_size1 * global_size0
+      op0 = builder->CreateMul(global_size1, global_size0);
+      // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
+      Value* offset1 = builder->CreateMul(val, op0);
+      // local_size1 * group_id1
+      val = builder->CreateMul(local_size1, group_id1);
+      // local_id1 + local_size1 * group_id1
+      val = builder->CreateAdd(local_id1, val);
+      // (local_id1 + local_size1 * group_id1) * global_size_0
+      Value* offset2 = builder->CreateMul(val, global_size0);
+      // local_size0 * group_id0
+      val = builder->CreateMul(local_size0, group_id0);
+      // local_id0 + local_size0 * group_id0
+      val = builder->CreateAdd(local_id0, val);
+      // The total sum
+      val = builder->CreateAdd(val, offset1);
+      wg_offset = builder->CreateAdd(val, offset2);
+
+      // global_size2 * global_size1
+      op0 = builder->CreateMul(global_size2, global_size1);
+      // global_size2 * global_size1 * global_size0
+      g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
+
+	  generateOneTimestampInstruction(F.begin());// first basic block
+
+   // }
+
+	
+	for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) 
+		if(B!=F.begin()){
+			builder->SetInsertPoint(B->getFirstInsertionPt());// insert at the beginning of each basicblock. except first basicblock
+			SETDEBUGLOCATION(B->getFirstInsertionPt(),false);
+			generateOneTimestampInstruction(B);
+	}
+
+    delete builder;
+
+    return false;
+  }
+
+  FunctionPass* createTimestampParserPass()
+  {
+    return new TimestampParser();
+  }
+  char TimestampParser::ID = 0;
+
+} // end namespace
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 891f2a1..012b754 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -71,6 +71,8 @@
 #include <fcntl.h>
 #include <memory>
 
+#include <stdlib.h>
+
 namespace gbe
 {
   BVAR(OCL_OUTPUT_CFG, false);
@@ -226,7 +228,7 @@ namespace gbe
   {
     std::string errInfo;
     std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
-    if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
+    //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
       o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false));
 
     // Get the module from its file
@@ -300,6 +302,12 @@ namespace gbe
     passes.add(createLowerSwitchPass());           // simplify cfg will generate switch-case instruction
     passes.add(createScalarizePass());             // Expand all vector ops
 
+	//setenv("OCL_PROFILING","0",0);
+	if(OCL_PROFILING) {
+		passes.add(createTimestampParserPass());  // by ynbai
+		passes.add(createExpandConstantExprPass());
+	}
+
     if(OCL_OUTPUT_CFG)
       passes.add(createCFGPrinterPass());
     if(OCL_OUTPUT_CFG_ONLY)
-- 
1.9.1



More information about the Beignet mailing list