[Beignet] [PATCH 1/7] add debugloc for inserted llvm instructions

He Junyan junyan.he at inbox.com
Thu Oct 8 07:26:24 PDT 2015


This patch set can basically work, but has some problems.

Common issue:
1. Please pay attention to the code format, the line should not
   begin with TAB and should not have trailing SPACEs.
2. Some tmp comments should be deleted, and no need for author
   name in code.
3. I think the reorganise the patches by stage of the backend,
   one patch for each stage.

Details are as following: 


On Fri, Sep 18, 2015 at 05:01:51PM +0800, Bai Yannan wrote:
> Date: Fri, 18 Sep 2015 17:01:51 +0800
> From: Bai Yannan <yannan.bai at intel.com>
> To: beignet at lists.freedesktop.org
> Cc: Bai Yannan <yannan.bai at intel.com>
> Subject: [Beignet] [PATCH 1/7] add debugloc for inserted llvm instructions
> X-Mailer: git-send-email 1.9.1
> 
>     add debugloc when llvm instuctions inserted, the debugloc is inherited from the contiguous one.
> 
> Signed-off-by: Bai Yannan <yannan.bai at intel.com>
> ---
>  backend/src/backend/program.cpp                  |   7 +
>  backend/src/llvm/llvm_gen_backend.cpp            |  25 ++
>  backend/src/llvm/llvm_loadstore_optimization.cpp |  18 ++
>  backend/src/llvm/llvm_printf_parser.cpp          |  20 ++
>  backend/src/llvm/llvm_sampler_fix.cpp            |  17 ++
>  backend/src/llvm/llvm_scalarize.cpp              |  18 ++
>  backend/src/llvm/llvm_timestamp.cpp              | 337 +++++++++++++++++++++++
>  backend/src/llvm/llvm_to_gen.cpp                 |  10 +-
>  8 files changed, 451 insertions(+), 1 deletion(-)
>  create mode 100644 backend/src/llvm/llvm_timestamp.cpp
> 
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index f5865c2..af817de 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -49,6 +49,7 @@
>  #include <iostream>
>  #include <unistd.h>
>  #include <mutex>
> +#include <cstdlib>
>  
>  #ifdef GBE_COMPILER_AVAILABLE
>  /* Not defined for LLVM 3.0 */
> @@ -554,6 +555,12 @@ namespace gbe {
>      args.push_back("stringInput.cl");
>      args.push_back("-ffp-contract=off");
>  
> +	if(getenv("OCL_PROFILING")) {
> +		char * isProfiling =  getenv("OCL_PROFILING");
> +		if(*isProfiling == '1')
> +			args.push_back("-g");
> +	}
I think here we need to use BVAR or IVAR auxiliary functions instead of using
system getenv.

> +
>      // The compiler invocation needs a DiagnosticsEngine so it can report problems
>      std::string ErrorString;
>      llvm::raw_string_ostream ErrorInfo(ErrorString);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 4905415..238370a 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -108,6 +108,8 @@
>  
>  #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
>  #include "llvm/IR/Mangler.h"
> +#include "llvm/IR/DebugLoc.h"
> +#include "llvm/IR/DebugInfo.h"
>  #else
>  #include "llvm/Target/Mangler.h"
>  #endif
> @@ -178,6 +180,20 @@
>  
>  using namespace llvm;
>  
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
> +	if(OCL_PROFILING) { 								\
> +		llvm::BasicBlock *bb = INSN->getParent();		\
> +		llvm::BasicBlock::iterator iter =bb->begin();	\
> +		while(!(iter++)->isIdenticalTo(INSN))	;		\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
> +		llvm::DebugLoc dg = iter->getDebugLoc();		\
> +		while(!N)	N = (++iter)->getMetadata("dbg");	\
> +		BUILDER.SetCurrentDebugLocation(dg);			\
> +	}
> +// end define SETDEBUGLOCATION
I notice that all the SETDEBUGLOCATION macro have the almost same logic.
I prefer to rewrite it as a function and place it at some common place.

> +
> +
>  namespace gbe
>  {
>    /*! Gen IR manipulates only scalar types */
> @@ -977,6 +993,7 @@ namespace gbe
>            Value *trueVal = getPointerBase((*iter).second[0]);
>            Value *falseVal = getPointerBase((*iter).second[1]);
>            Builder.SetInsertPoint(si);
> +		  SETDEBUGLOCATION(Builder, si);
>            Value *base = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
>            pointerBaseMap.insert(std::make_pair(ptr, base));
>          return base;
> @@ -984,6 +1001,7 @@ namespace gbe
>            PHINode *phi = dyn_cast<PHINode>(ptr);
>            IRBuilder<> Builder(phi->getParent());
>            Builder.SetInsertPoint(phi);
> +		  SETDEBUGLOCATION(Builder, phi);
>  
>            PHINode *basePhi = Builder.CreatePHI(ptr->getType(), phi->getNumIncomingValues());
>            unsigned srcNum = pointers.size();
> @@ -997,7 +1015,10 @@ namespace gbe
>              IRBuilder<> Builder2(phi->getIncomingBlock(x));
>              BasicBlock *predBB = phi->getIncomingBlock(x);
>              if (predBB->getTerminator())
> +            {
>                Builder2.SetInsertPoint(predBB->getTerminator());
> +			  SETDEBUGLOCATION(Builder2, predBB->getTerminator());
> +            }
>  
>  #if (LLVM_VERSION_MAJOR== 3 && LLVM_VERSION_MINOR < 6)
>    // llvm 3.5 and older version don't have CreateBitOrPointerCast() define
> @@ -1065,6 +1086,7 @@ namespace gbe
>            Value *trueVal = getBtiRegister((*iter).second[0]);
>            Value *falseVal = getBtiRegister((*iter).second[1]);
>            Builder.SetInsertPoint(si);
> +		  SETDEBUGLOCATION(Builder, si);
>            Value *bti = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
>            BtiValueMap.insert(std::make_pair(Val, bti));
>            return bti;
> @@ -1072,6 +1094,7 @@ namespace gbe
>            PHINode *phi = dyn_cast<PHINode>(Val);
>            IRBuilder<> Builder(phi->getParent());
>            Builder.SetInsertPoint(phi);
> +		  SETDEBUGLOCATION(Builder, phi);
>  
>            PHINode *btiPhi = Builder.CreatePHI(IntegerType::get(Val->getContext(), 32), phi->getNumIncomingValues());
>            PtrOrigMapIter iter = pointerOrigMap.find(Val);
> @@ -1240,6 +1263,7 @@ namespace gbe
>              isLoad = false;
>            }
>            Builder.SetInsertPoint(cast<Instruction>(theUser));
> +		  SETDEBUGLOCATION(Builder, cast<Instruction>(theUser));
>  
>            Type *int32Ty = Type::getInt32Ty(ptr->getContext());
>            Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
> @@ -1351,6 +1375,7 @@ namespace gbe
>  
>        IRBuilder<> Builder(&entry);
>        Builder.SetInsertPoint(bbIter);
> +	  SETDEBUGLOCATION(Builder, bbIter);
>  
>        PointerType * AITy = cast<AllocaInst>(base)->getType();
>        Value * btiArray = Builder.CreateAlloca(AITy->getElementType(), ArraySize, base->getName() + ".bti");
> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
> index c6349fa..989b539 100644
> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> @@ -41,6 +41,8 @@
>  #include "llvm/IR/Instructions.h"
>  #include "llvm/IR/IntrinsicInst.h"
>  #include "llvm/IR/Module.h"
> +#include "llvm/IR/DebugLoc.h"
> +#include "llvm/IR/DebugInfo.h"
>  #endif  /* LLVM_VERSION_MINOR <= 2 */
>  #include "llvm/Pass.h"
>  #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
> @@ -55,6 +57,20 @@
>  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
>  
>  using namespace llvm;
> +
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
> +	if(OCL_PROFILING) { 								\
> +		llvm::BasicBlock *bb = INSN->getParent();		\
> +		llvm::BasicBlock::iterator iter =bb->begin();	\
> +		while(!(iter++)->isIdenticalTo(INSN))	;		\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
> +		llvm::DebugLoc dg = iter->getDebugLoc();		\
> +		while(!N)	N = (++iter)->getMetadata("dbg");	\
> +		BUILDER.SetCurrentDebugLocation(dg);			\
> +	}
> +// end define SETDEBUGLOCATION
> +
>  namespace gbe {
>    class GenLoadStoreOptimization : public BasicBlockPass {
>  
> @@ -167,6 +183,7 @@ namespace gbe {
>      unsigned addrSpace = ld->getPointerAddressSpace();
>      // insert before first load
>      Builder.SetInsertPoint(ld);
> +	SETDEBUGLOCATION(Builder, ld);
>      VectorType *vecTy = VectorType::get(ld->getType(), size);
>      Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
>                                          PointerType::get(vecTy, addrSpace));
> @@ -226,6 +243,7 @@ namespace gbe {
>      unsigned align = st->getAlignment();
>      // insert before the last store
>      Builder.SetInsertPoint(merged[size-1]);
> +	SETDEBUGLOCATION(Builder, merged[size-1]);
>  
>      Type *dataTy = st->getValueOperand()->getType();
>      VectorType *vecTy = VectorType::get(dataTy, size);
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
> index 3d84457..3848bfa 100644
> --- a/backend/src/llvm/llvm_printf_parser.cpp
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -59,6 +59,9 @@
>  #if LLVM_VERSION_MINOR >= 5
>  #include "llvm/IR/CallSite.h"
>  #include "llvm/IR/CFG.h"
> +#include "llvm/IR/DebugLoc.h"
> +#include "llvm/IR/DebugInfo.h"
> +
>  #else
>  #include "llvm/Support/CallSite.h"
>  #include "llvm/Support/CFG.h"
> @@ -73,6 +76,21 @@
>  
>  using namespace llvm;
>  
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +#define SETDEBUGLOCATION(INSN, ISBEGIN) 					\
> +	if(OCL_PROFILING) {										\
> +		llvm::BasicBlock *bb=INSN->getParent(); 				\
> +		llvm::BasicBlock::iterator iter = bb->begin();			\
> +		if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))	;	\
> +		else while(!iter->isIdenticalTo(INSN)) iter++;			\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 			\
> +		llvm::DebugLoc dg = iter->getDebugLoc();				\
> +		if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}		\
> +		else	while(!N) {iter++;N = iter->getMetadata("dbg");}		\
> +		builder->SetCurrentDebugLocation(dg);					\
> +	}
> +// end define SETDEBUGLOCATION
> +
>  namespace gbe
>  {
>    using namespace ir;
> @@ -659,6 +677,7 @@ error:
>        Value* val = NULL;
>  
>        builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
> +      SETDEBUGLOCATION(F.begin()->begin(), true);
>  
>        /* FIXME: Because the OpenCL language do not support va macro, and we do not want
>           to introduce the va_list, va_start and va_end into our code, we just simulate
> @@ -721,6 +740,7 @@ error:
>      /* Now generate the instructions. */
>      for (auto pInfo : infoVect) {
>        builder->SetInsertPoint(pInfo.call);
> +	  SETDEBUGLOCATION(pInfo.call, false);
>        deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), generateOnePrintfInstruction(pInfo)));
>      }
>  
> diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
> index 8c76324..a1146d3 100644
> --- a/backend/src/llvm/llvm_sampler_fix.cpp
> +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> @@ -36,6 +36,8 @@
>  #include "llvm/IR/IRBuilder.h"
>  #if LLVM_VERSION_MINOR >= 5
>  #include "llvm/IR/CFG.h"
> +#include "llvm/IR/DebugLoc.h"
> +#include "llvm/IR/DebugInfo.h"
>  #else
>  #include "llvm/Support/CFG.h"
>  #endif
> @@ -47,6 +49,19 @@
>  
>  using namespace llvm;
>  
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
> +	if(OCL_PROFILING) { 								\
> +		llvm::BasicBlock *bb = INSN->getParent();		\
> +		llvm::BasicBlock::iterator iter =bb->begin();	\
> +		while(!(iter++)->isIdenticalTo(INSN))	;		\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
> +		llvm::DebugLoc dg = iter->getDebugLoc();		\
> +		while(!N)	N = (++iter)->getMetadata("dbg");	\
> +		BUILDER.SetCurrentDebugLocation(dg);			\
> +	}
> +// end define SETDEBUGLOCATION
> +
>  namespace gbe {
>  
>    class SamplerFix : public FunctionPass {
> @@ -82,6 +97,7 @@ namespace gbe {
>            IRBuilder<> Builder(I->getParent());
>  
>            Builder.SetInsertPoint(I);
> +		  SETDEBUGLOCATION(Builder, I);
>            Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
>            Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask);
>            Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> @@ -108,6 +124,7 @@ namespace gbe {
>          } else {
>            IRBuilder<> Builder(I->getParent());
>            Builder.SetInsertPoint(I);
> +		  SETDEBUGLOCATION(Builder, I);
>            Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE);
>            Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask);
>            needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0));
> diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
> index bc985c6..d4e87af 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -87,6 +87,8 @@
>  #if LLVM_VERSION_MINOR >= 5
>  #include "llvm/IR/CallSite.h"
>  #include "llvm/IR/CFG.h"
> +#include "llvm/IR/DebugLoc.h"
> +#include "llvm/IR/DebugInfo.h"
>  #else
>  #include "llvm/Support/CallSite.h"
>  #include "llvm/Support/CFG.h"
> @@ -98,6 +100,19 @@
>  
>  using namespace llvm;
>  
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +#define SETDEBUGLOCATION(BUILDER, INSN) 				\	
> +	if(OCL_PROFILING) { 								\
> +		llvm::BasicBlock *bb = INSN->getParent();		\
> +		llvm::BasicBlock::iterator iter =bb->begin();	\
> +		while(!(iter++)->isIdenticalTo(INSN))	;		\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 	\
> +		llvm::DebugLoc dg = iter->getDebugLoc();		\
> +		while(!N)	N = (++iter)->getMetadata("dbg");	\
> +		BUILDER->SetCurrentDebugLocation(dg);			\
> +	}
> +// end define SETDEBUGLOCATION
> +
>  namespace gbe {
>  
>    struct VectorValues {
> @@ -231,6 +246,7 @@ namespace gbe {
>      void setAppendPoint(Instruction *insn)  {
>        BasicBlock::iterator next(insn);
>        builder->SetInsertPoint(++next);
> +	  SETDEBUGLOCATION(builder, next);
>      }
>  
>      DenseMap<Value*, VectorValues> vectorVals;
> @@ -526,6 +542,7 @@ namespace gbe {
>      assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
>             "Scalarizing an op whose arguments haven't been scalarized ");
>      builder->SetInsertPoint(inst);
> +	SETDEBUGLOCATION(builder, inst);
>  
>      if (IsPerComponentOp(inst))
>        return scalarizePerComponent(inst);
> @@ -793,6 +810,7 @@ namespace gbe {
>      ReversePostOrderTraversal<Function*> rpot(&F);
>      BasicBlock::iterator instI = (*rpot.begin())->begin();
>      builder->SetInsertPoint(instI);
> +	SETDEBUGLOCATION(builder, instI);
>  
>      Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
>  
> diff --git a/backend/src/llvm/llvm_timestamp.cpp b/backend/src/llvm/llvm_timestamp.cpp
> new file mode 100644
> index 0000000..f847d38
> --- /dev/null
> +++ b/backend/src/llvm/llvm_timestamp.cpp
> @@ -0,0 +1,337 @@
> +
> +
> +/**
> + * \file llvm_timestamp.cpp
> + *
> + */
> +
> + 
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include "llvm/Config/llvm-config.h"
> +#if LLVM_VERSION_MINOR <= 2
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#else
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#endif  /* LLVM_VERSION_MINOR <= 2 */
> +#include "llvm/Pass.h"
> +#if LLVM_VERSION_MINOR <= 1
> +#include "llvm/Support/IRBuilder.h"
> +#elif LLVM_VERSION_MINOR == 2
> +#include "llvm/IRBuilder.h"
> +#else
> +#include "llvm/IR/IRBuilder.h"
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +
> +#if LLVM_VERSION_MINOR >= 5
> +#include "llvm/IR/CallSite.h"
> +#include "llvm/IR/CFG.h"
> +#include "llvm/IR/DebugLoc.h" //ynbai
> +#include "llvm/IR/DebugInfo.h"
> +
> +#else
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/CFG.h"
> +#endif
> +
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/IR/Attributes.h"
> +
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "sys/map.hpp"
> +#include "ir/timestamp.hpp"
> +
> +#include <iostream>
> +#include <vector>
> +#include <fstream>
> +
> +using namespace llvm;
> +using std::vector;
> +
> +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
> +
> +namespace gbe
> +{
> +  using namespace ir;
> + 
> +  class TimestampParser : public FunctionPass
> +  {
> +  public:
> +    static char ID;
> +	typedef std::pair<Instruction*, bool> TimestampInst;
> +    Module* module;
> +    IRBuilder<>* builder;
> +    Type* intTy;
> +    Value* tbuf_ptr;
> +	Value* g1Xg2Xg3;
> +    Value* wg_offset;
> +	int tm_num;
> +
> +    TimestampParser(void) : FunctionPass(ID)
> +    {
> +      module = NULL;
> +      builder = NULL;
> +      intTy = NULL;
> +      tbuf_ptr = NULL;
> +	  g1Xg2Xg3 = NULL;
> +      wg_offset = NULL;
> +	  tm_num = 0;
> +    }
> +
> +    ~TimestampParser(void)
> +    {
> +    }
> +
> +	INLINE void storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt);
> +	bool generateOneTimestampInstruction(llvm::BasicBlock *BB);
> +
> +    virtual const char *getPassName() const
> +    {
> +      return "Timestamp Parser";
> +    }
> +
> +    virtual bool runOnFunction(llvm::Function &F);
> +  };
> +
> +  INLINE void TimestampParser::storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt){
> +	/* timestamp format in the buffer:
> +		tm_start_region_2, tm_start_region_1, tm_start_region_0, tm_end_region_2, tm_end_region_1, tm_end_region_0
> +		^
> +		|
> +		addr_base
> +	*/
> +	param[0] = ConstantInt::get(IntegerType::get(module->getContext(), 16), idx);
> +	SmallVector<Value *, 2> Args(param, param+2);
> +	Value* region = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(	
> +       "__gen_ocl_region", FunctionType::get(IntegerType::getInt32Ty(module->getContext()), ParamTys, false))),	
> +                              Args);
> +	Value* bi = builder->CreateAdd(addr_base, ConstantInt::get(intTy, ((!isSt)*3 + 2 - idx)*sizeof(uint)));
> +	Value* data_addr = builder->CreateIntToPtr(bi, Type::getInt32PtrTy(module->getContext(), 1));	
> +	builder->CreateStore(region, data_addr);
> +
> +  }
> +  
> +  bool TimestampParser::generateOneTimestampInstruction(llvm::BasicBlock *BB)
> +  {
> +
> +#define SETDEBUGLOCATION(INSN, ASC) 									\	
> +	if(OCL_PROFILING) {										\
> +		llvm::BasicBlock *bb = INSN->getParent();						\
> +		llvm::BasicBlock::iterator iter = ASC?bb->begin():bb->end();	\
> +		while(!(ASC?iter++:iter--)->isIdenticalTo(INSN))	;			\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 					\
> +		llvm::DebugLoc dg = iter->getDebugLoc();						\
> +		while(!N)	N = (ASC?++iter:--iter)->getMetadata("dbg");		\
> +		builder->SetCurrentDebugLocation(dg);							\
> +	}
> +// end define SETDEBUGLOCATION
> +
> +	Value * op0 = NULL;
> +	Value * val = NULL;
> +		
> +	/////////////////////////////////////////////////////
> +      /* Calculate the data address.
> +      data_addr = (data_offset + tbuf_ptr + wg_offset * sizeof(uint32)) + 
> +      			totalSizeofSize * global_size2 * global_size1 * global_size0 * tm_num 
> +      data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
> +      totalSizeofSize = tm_num * sizeof(uint32)
> +      */
> +    CallInst *read_tm_inst_st = NULL, *read_tm_inst_end = NULL;
> +		
> +	// ecch basic block has 2 timestamps, 
> +	//and tm_num is even when it is at the begin of bb 
> +	int bbNum = (tm_num-tm_num%2)/2; 
> +	Value* data_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, bbNum));
> +    // index_offset + wg_offset
> +    op0 = builder->CreateAdd(data_offset, wg_offset);
> +    // (index_offset + wg_offset)* sizeof(int)
> +    op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(uint)*6));
> +	Value* b = builder->CreateAdd(tbuf_ptr,op0);
> +	
> +	read_tm_inst_st = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                          "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
> +                          NULL)));
> +	tm_num++;
> +	
> +	llvm::BasicBlock::iterator BI = BB->end();
> +	builder->SetInsertPoint(--BI);
> +	SETDEBUGLOCATION(BI, false);
> +	read_tm_inst_end = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                          "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
> +                          NULL)));
> +	tm_num++;
> +	
> +	Value* para[2];
> +	std::vector<Type *> ParamTys(2);
> +	ParamTys[0]=IntegerType::get(module->getContext(), 16);
> +	ParamTys[1]=IntegerType::getInt32Ty(module->getContext());
> +
> +	for(int i=0; i<3; i++)
> +	{
> +		para[1]=read_tm_inst_st;
> +		storeRegionOffsetI(b, i, para, ParamTys, true);
> +		para[1]=read_tm_inst_end;
> +		storeRegionOffsetI(b, i, para, ParamTys, false);
> +	}
> +
> +	CallInst* timestamp_inst = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                              "__gen_ocl_timestamp", Type::getVoidTy(module->getContext()),
> +                              NULL)));
> +	return true;
> +  }
> +
> +  bool TimestampParser::runOnFunction(llvm::Function &F)
> +  {
> +
> +#define SETDEBUGLOCATION(INSN, ISBEGIN) 					\
> +	if(OCL_PROFILING) {										\
> +		llvm::BasicBlock *bb=INSN->getParent();					\
> +		llvm::BasicBlock::iterator iter = bb->begin();			\
> +		if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))	;	\
> +		else while(!iter->isIdenticalTo(INSN)) iter++;			\
> +		llvm::MDNode *N = iter->getMetadata("dbg"); 			\
> +		llvm::DebugLoc dg = iter->getDebugLoc();				\
> +		if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}	\
> +		else	while(!N) {iter++;N = iter->getMetadata("dbg");}	\
> +		builder->SetCurrentDebugLocation(dg);						\
> +	}
> +// end define SETDEBUGLOCATION
> +
> +    bool hasTimestamp = false;
> +    switch (F.getCallingConv()) {
> +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
> +      case CallingConv::PTX_Device:
> +        return false;
> +      case CallingConv::PTX_Kernel:
> +#else
> +      case CallingConv::C:
> +      case CallingConv::Fast:
> +      case CallingConv::SPIR_KERNEL:
> +#endif
> +        break;
> +      default:
> +        GBE_ASSERTM(false, "Unsupported calling convention");
> +    }
> +
> +    module = F.getParent();
> +    intTy = IntegerType::get(module->getContext(), 32);
> +
> +    // As we inline all function calls, so skip non-kernel functions
> +    bool bKernel = isKernelFunction(F);
> +    if(!bKernel) return false;
> +
> +    builder = new IRBuilder<>(module->getContext());
> +
> +	if (!tbuf_ptr) {
> +      /* alloc a new buffer ptr to collect the timestamps. */
> +      Type *ptrTy = Type::getInt32PtrTy(module->getContext());
> +	  llvm::Constant *tBuf = new GlobalVariable(*module, ptrTy, false,
> +                                GlobalVariable::ExternalLinkage,
> +                                nullptr,
> +                                StringRef("__gen_ocl_timestamp_buf"),
> +                                nullptr,
> +                                GlobalVariable::NotThreadLocal,
> +                                1);
> +      tbuf_ptr = builder->CreatePtrToInt(tBuf, Type::getInt32Ty(module->getContext()));
> +    }
> +
> +	//if (!wg_offset || !g1Xg2Xg3) {
> +      Value* op0 = NULL;
> +      Value* val = NULL;
> +
> +      builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
> +      SETDEBUGLOCATION(F.begin()->begin(), true);
> +	  
> +      /* FIXME: Because the OpenCL language do not support va macro, and we do not want
> +         to introduce the va_list, va_start and va_end into our code, we just simulate
> +         the function calls to caculate the offset caculation here. */
> +
> +	
> + 
> +#define BUILD_CALL_INST(name) \
> +	CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
> +				 "__gen_ocl_get_"#name, 					\
> +				 IntegerType::getInt32Ty(module->getContext()), 		\
> +				 NULL)))
> +
> +      BUILD_CALL_INST(group_id2);
> +      BUILD_CALL_INST(group_id1);
> +      BUILD_CALL_INST(group_id0);
> +      BUILD_CALL_INST(global_size2);
> +      BUILD_CALL_INST(global_size1);
> +      BUILD_CALL_INST(global_size0);
> +      BUILD_CALL_INST(local_id2);
> +      BUILD_CALL_INST(local_id1);
> +      BUILD_CALL_INST(local_id0);
> +      BUILD_CALL_INST(local_size2);
> +      BUILD_CALL_INST(local_size1);
> +      BUILD_CALL_INST(local_size0);
> +
> +#undef BUILD_CALL_INST
> +
> +      /* calculate offset for later usage.
> +         wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> +         + (local_id1 + local_size1 * group_id1) * global_size0
> +         + (local_id0 + local_size0 * group_id0))  */
> +
> +
> +      // local_size2 * group_id2
> +      val = builder->CreateMul(local_size2, group_id2);
> +      // local_id2 + local_size2 * group_id2
> +      val = builder->CreateAdd(local_id2, val);
> +      // global_size1 * global_size0
> +      op0 = builder->CreateMul(global_size1, global_size0);
> +      // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> +      Value* offset1 = builder->CreateMul(val, op0);
> +      // local_size1 * group_id1
> +      val = builder->CreateMul(local_size1, group_id1);
> +      // local_id1 + local_size1 * group_id1
> +      val = builder->CreateAdd(local_id1, val);
> +      // (local_id1 + local_size1 * group_id1) * global_size_0
> +      Value* offset2 = builder->CreateMul(val, global_size0);
> +      // local_size0 * group_id0
> +      val = builder->CreateMul(local_size0, group_id0);
> +      // local_id0 + local_size0 * group_id0
> +      val = builder->CreateAdd(local_id0, val);
> +      // The total sum
> +      val = builder->CreateAdd(val, offset1);
> +      wg_offset = builder->CreateAdd(val, offset2);
> +
> +      // global_size2 * global_size1
> +      op0 = builder->CreateMul(global_size2, global_size1);
> +      // global_size2 * global_size1 * global_size0
> +      g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
> +
> +	  generateOneTimestampInstruction(F.begin());// first basic block
> +
> +   // }
> +
> +	
> +	for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) 
> +		if(B!=F.begin()){
> +			builder->SetInsertPoint(B->getFirstInsertionPt());// insert at the beginning of each basicblock. except first basicblock
> +			SETDEBUGLOCATION(B->getFirstInsertionPt(),false);
> +			generateOneTimestampInstruction(B);
> +	}
> +
> +    delete builder;
> +
> +    return false;
> +  }
> +
> +  FunctionPass* createTimestampParserPass()
> +  {
> +    return new TimestampParser();
> +  }
> +  char TimestampParser::ID = 0;
> +
> +} // end namespace
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index 891f2a1..012b754 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -71,6 +71,8 @@
>  #include <fcntl.h>
>  #include <memory>
>  
> +#include <stdlib.h>
> +
>  namespace gbe
>  {
>    BVAR(OCL_OUTPUT_CFG, false);
> @@ -226,7 +228,7 @@ namespace gbe
>    {
>      std::string errInfo;
>      std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
> -    if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
> +    //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
Is some mistake here? I think the condition is still needed.

>        o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false));
>  
>      // Get the module from its file
> @@ -300,6 +302,12 @@ namespace gbe
>      passes.add(createLowerSwitchPass());           // simplify cfg will generate switch-case instruction
>      passes.add(createScalarizePass());             // Expand all vector ops
>  
> +	//setenv("OCL_PROFILING","0",0);
Something redundant.

> +	if(OCL_PROFILING) {
> +		passes.add(createTimestampParserPass());  // by ynbai
This pass will be replaced by new profiling pass. no need here.

> +		passes.add(createExpandConstantExprPass());
> +	}
> +
>      if(OCL_OUTPUT_CFG)
>        passes.add(createCFGPrinterPass());
>      if(OCL_OUTPUT_CFG_ONLY)
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet




More information about the Beignet mailing list