[Beignet] [PATCH 1/7] add debugloc for inserted llvm instructions
Bai Yannan
yannan.bai at intel.com
Fri Sep 18 02:01:51 PDT 2015
add debugloc when llvm instuctions inserted, the debugloc is inherited from the contiguous one.
Signed-off-by: Bai Yannan <yannan.bai at intel.com>
---
backend/src/backend/program.cpp | 7 +
backend/src/llvm/llvm_gen_backend.cpp | 25 ++
backend/src/llvm/llvm_loadstore_optimization.cpp | 18 ++
backend/src/llvm/llvm_printf_parser.cpp | 20 ++
backend/src/llvm/llvm_sampler_fix.cpp | 17 ++
backend/src/llvm/llvm_scalarize.cpp | 18 ++
backend/src/llvm/llvm_timestamp.cpp | 337 +++++++++++++++++++++++
backend/src/llvm/llvm_to_gen.cpp | 10 +-
8 files changed, 451 insertions(+), 1 deletion(-)
create mode 100644 backend/src/llvm/llvm_timestamp.cpp
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index f5865c2..af817de 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -49,6 +49,7 @@
#include <iostream>
#include <unistd.h>
#include <mutex>
+#include <cstdlib>
#ifdef GBE_COMPILER_AVAILABLE
/* Not defined for LLVM 3.0 */
@@ -554,6 +555,12 @@ namespace gbe {
args.push_back("stringInput.cl");
args.push_back("-ffp-contract=off");
+ if(getenv("OCL_PROFILING")) {
+ char * isProfiling = getenv("OCL_PROFILING");
+ if(*isProfiling == '1')
+ args.push_back("-g");
+ }
+
// The compiler invocation needs a DiagnosticsEngine so it can report problems
std::string ErrorString;
llvm::raw_string_ostream ErrorInfo(ErrorString);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 4905415..238370a 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -108,6 +108,8 @@
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Target/Mangler.h"
#endif
@@ -178,6 +180,20 @@
using namespace llvm;
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb = INSN->getParent(); \
+ llvm::BasicBlock::iterator iter =bb->begin(); \
+ while(!(iter++)->isIdenticalTo(INSN)) ; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ while(!N) N = (++iter)->getMetadata("dbg"); \
+ BUILDER.SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
+
namespace gbe
{
/*! Gen IR manipulates only scalar types */
@@ -977,6 +993,7 @@ namespace gbe
Value *trueVal = getPointerBase((*iter).second[0]);
Value *falseVal = getPointerBase((*iter).second[1]);
Builder.SetInsertPoint(si);
+ SETDEBUGLOCATION(Builder, si);
Value *base = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
pointerBaseMap.insert(std::make_pair(ptr, base));
return base;
@@ -984,6 +1001,7 @@ namespace gbe
PHINode *phi = dyn_cast<PHINode>(ptr);
IRBuilder<> Builder(phi->getParent());
Builder.SetInsertPoint(phi);
+ SETDEBUGLOCATION(Builder, phi);
PHINode *basePhi = Builder.CreatePHI(ptr->getType(), phi->getNumIncomingValues());
unsigned srcNum = pointers.size();
@@ -997,7 +1015,10 @@ namespace gbe
IRBuilder<> Builder2(phi->getIncomingBlock(x));
BasicBlock *predBB = phi->getIncomingBlock(x);
if (predBB->getTerminator())
+ {
Builder2.SetInsertPoint(predBB->getTerminator());
+ SETDEBUGLOCATION(Builder2, predBB->getTerminator());
+ }
#if (LLVM_VERSION_MAJOR== 3 && LLVM_VERSION_MINOR < 6)
// llvm 3.5 and older version don't have CreateBitOrPointerCast() define
@@ -1065,6 +1086,7 @@ namespace gbe
Value *trueVal = getBtiRegister((*iter).second[0]);
Value *falseVal = getBtiRegister((*iter).second[1]);
Builder.SetInsertPoint(si);
+ SETDEBUGLOCATION(Builder, si);
Value *bti = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
BtiValueMap.insert(std::make_pair(Val, bti));
return bti;
@@ -1072,6 +1094,7 @@ namespace gbe
PHINode *phi = dyn_cast<PHINode>(Val);
IRBuilder<> Builder(phi->getParent());
Builder.SetInsertPoint(phi);
+ SETDEBUGLOCATION(Builder, phi);
PHINode *btiPhi = Builder.CreatePHI(IntegerType::get(Val->getContext(), 32), phi->getNumIncomingValues());
PtrOrigMapIter iter = pointerOrigMap.find(Val);
@@ -1240,6 +1263,7 @@ namespace gbe
isLoad = false;
}
Builder.SetInsertPoint(cast<Instruction>(theUser));
+ SETDEBUGLOCATION(Builder, cast<Instruction>(theUser));
Type *int32Ty = Type::getInt32Ty(ptr->getContext());
Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
@@ -1351,6 +1375,7 @@ namespace gbe
IRBuilder<> Builder(&entry);
Builder.SetInsertPoint(bbIter);
+ SETDEBUGLOCATION(Builder, bbIter);
PointerType * AITy = cast<AllocaInst>(base)->getType();
Value * btiArray = Builder.CreateAlloca(AITy->getElementType(), ArraySize, base->getName() + ".bti");
diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
index c6349fa..989b539 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -41,6 +41,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
#endif /* LLVM_VERSION_MINOR <= 2 */
#include "llvm/Pass.h"
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
@@ -55,6 +57,20 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
using namespace llvm;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb = INSN->getParent(); \
+ llvm::BasicBlock::iterator iter =bb->begin(); \
+ while(!(iter++)->isIdenticalTo(INSN)) ; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ while(!N) N = (++iter)->getMetadata("dbg"); \
+ BUILDER.SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
namespace gbe {
class GenLoadStoreOptimization : public BasicBlockPass {
@@ -167,6 +183,7 @@ namespace gbe {
unsigned addrSpace = ld->getPointerAddressSpace();
// insert before first load
Builder.SetInsertPoint(ld);
+ SETDEBUGLOCATION(Builder, ld);
VectorType *vecTy = VectorType::get(ld->getType(), size);
Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
PointerType::get(vecTy, addrSpace));
@@ -226,6 +243,7 @@ namespace gbe {
unsigned align = st->getAlignment();
// insert before the last store
Builder.SetInsertPoint(merged[size-1]);
+ SETDEBUGLOCATION(Builder, merged[size-1]);
Type *dataTy = st->getValueOperand()->getType();
VectorType *vecTy = VectorType::get(dataTy, size);
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index 3d84457..3848bfa 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -59,6 +59,9 @@
#if LLVM_VERSION_MINOR >= 5
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
+
#else
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
@@ -73,6 +76,21 @@
using namespace llvm;
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(INSN, ISBEGIN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb=INSN->getParent(); \
+ llvm::BasicBlock::iterator iter = bb->begin(); \
+ if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN)) ; \
+ else while(!iter->isIdenticalTo(INSN)) iter++; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");} \
+ else while(!N) {iter++;N = iter->getMetadata("dbg");} \
+ builder->SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
namespace gbe
{
using namespace ir;
@@ -659,6 +677,7 @@ error:
Value* val = NULL;
builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
+ SETDEBUGLOCATION(F.begin()->begin(), true);
/* FIXME: Because the OpenCL language do not support va macro, and we do not want
to introduce the va_list, va_start and va_end into our code, we just simulate
@@ -721,6 +740,7 @@ error:
/* Now generate the instructions. */
for (auto pInfo : infoVect) {
builder->SetInsertPoint(pInfo.call);
+ SETDEBUGLOCATION(pInfo.call, false);
deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), generateOnePrintfInstruction(pInfo)));
}
diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
index 8c76324..a1146d3 100644
--- a/backend/src/llvm/llvm_sampler_fix.cpp
+++ b/backend/src/llvm/llvm_sampler_fix.cpp
@@ -36,6 +36,8 @@
#include "llvm/IR/IRBuilder.h"
#if LLVM_VERSION_MINOR >= 5
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Support/CFG.h"
#endif
@@ -47,6 +49,19 @@
using namespace llvm;
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb = INSN->getParent(); \
+ llvm::BasicBlock::iterator iter =bb->begin(); \
+ while(!(iter++)->isIdenticalTo(INSN)) ; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ while(!N) N = (++iter)->getMetadata("dbg"); \
+ BUILDER.SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
namespace gbe {
class SamplerFix : public FunctionPass {
@@ -82,6 +97,7 @@ namespace gbe {
IRBuilder<> Builder(I->getParent());
Builder.SetInsertPoint(I);
+ SETDEBUGLOCATION(Builder, I);
Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask);
Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
@@ -108,6 +124,7 @@ namespace gbe {
} else {
IRBuilder<> Builder(I->getParent());
Builder.SetInsertPoint(I);
+ SETDEBUGLOCATION(Builder, I);
Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE);
Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask);
needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0));
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index bc985c6..d4e87af 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -87,6 +87,8 @@
#if LLVM_VERSION_MINOR >= 5
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
#else
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
@@ -98,6 +100,19 @@
using namespace llvm;
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb = INSN->getParent(); \
+ llvm::BasicBlock::iterator iter =bb->begin(); \
+ while(!(iter++)->isIdenticalTo(INSN)) ; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ while(!N) N = (++iter)->getMetadata("dbg"); \
+ BUILDER->SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
namespace gbe {
struct VectorValues {
@@ -231,6 +246,7 @@ namespace gbe {
void setAppendPoint(Instruction *insn) {
BasicBlock::iterator next(insn);
builder->SetInsertPoint(++next);
+ SETDEBUGLOCATION(builder, next);
}
DenseMap<Value*, VectorValues> vectorVals;
@@ -526,6 +542,7 @@ namespace gbe {
assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
"Scalarizing an op whose arguments haven't been scalarized ");
builder->SetInsertPoint(inst);
+ SETDEBUGLOCATION(builder, inst);
if (IsPerComponentOp(inst))
return scalarizePerComponent(inst);
@@ -793,6 +810,7 @@ namespace gbe {
ReversePostOrderTraversal<Function*> rpot(&F);
BasicBlock::iterator instI = (*rpot.begin())->begin();
builder->SetInsertPoint(instI);
+ SETDEBUGLOCATION(builder, instI);
Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
diff --git a/backend/src/llvm/llvm_timestamp.cpp b/backend/src/llvm/llvm_timestamp.cpp
new file mode 100644
index 0000000..f847d38
--- /dev/null
+++ b/backend/src/llvm/llvm_timestamp.cpp
@@ -0,0 +1,337 @@
+
+
+/**
+ * \file llvm_timestamp.cpp
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MINOR <= 2
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#else
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#endif /* LLVM_VERSION_MINOR <= 2 */
+#include "llvm/Pass.h"
+#if LLVM_VERSION_MINOR <= 1
+#include "llvm/Support/IRBuilder.h"
+#elif LLVM_VERSION_MINOR == 2
+#include "llvm/IRBuilder.h"
+#else
+#include "llvm/IR/IRBuilder.h"
+#endif /* LLVM_VERSION_MINOR <= 1 */
+
+#if LLVM_VERSION_MINOR >= 5
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h" //ynbai
+#include "llvm/IR/DebugInfo.h"
+
+#else
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#endif
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Attributes.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "sys/map.hpp"
+#include "ir/timestamp.hpp"
+
+#include <iostream>
+#include <vector>
+#include <fstream>
+
+using namespace llvm;
+using std::vector;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+
+namespace gbe
+{
+ using namespace ir;
+
+ class TimestampParser : public FunctionPass
+ {
+ public:
+ static char ID;
+ typedef std::pair<Instruction*, bool> TimestampInst;
+ Module* module;
+ IRBuilder<>* builder;
+ Type* intTy;
+ Value* tbuf_ptr;
+ Value* g1Xg2Xg3;
+ Value* wg_offset;
+ int tm_num;
+
+ TimestampParser(void) : FunctionPass(ID)
+ {
+ module = NULL;
+ builder = NULL;
+ intTy = NULL;
+ tbuf_ptr = NULL;
+ g1Xg2Xg3 = NULL;
+ wg_offset = NULL;
+ tm_num = 0;
+ }
+
+ ~TimestampParser(void)
+ {
+ }
+
+ INLINE void storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt);
+ bool generateOneTimestampInstruction(llvm::BasicBlock *BB);
+
+ virtual const char *getPassName() const
+ {
+ return "Timestamp Parser";
+ }
+
+ virtual bool runOnFunction(llvm::Function &F);
+ };
+
+ INLINE void TimestampParser::storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt){
+ /* timestamp format in the buffer:
+ tm_start_region_2, tm_start_region_1, tm_start_region_0, tm_end_region_2, tm_end_region_1, tm_end_region_0
+ ^
+ |
+ addr_base
+ */
+ param[0] = ConstantInt::get(IntegerType::get(module->getContext(), 16), idx);
+ SmallVector<Value *, 2> Args(param, param+2);
+ Value* region = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+ "__gen_ocl_region", FunctionType::get(IntegerType::getInt32Ty(module->getContext()), ParamTys, false))),
+ Args);
+ Value* bi = builder->CreateAdd(addr_base, ConstantInt::get(intTy, ((!isSt)*3 + 2 - idx)*sizeof(uint)));
+ Value* data_addr = builder->CreateIntToPtr(bi, Type::getInt32PtrTy(module->getContext(), 1));
+ builder->CreateStore(region, data_addr);
+
+ }
+
+ bool TimestampParser::generateOneTimestampInstruction(llvm::BasicBlock *BB)
+ {
+
+#define SETDEBUGLOCATION(INSN, ASC) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb = INSN->getParent(); \
+ llvm::BasicBlock::iterator iter = ASC?bb->begin():bb->end(); \
+ while(!(ASC?iter++:iter--)->isIdenticalTo(INSN)) ; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ while(!N) N = (ASC?++iter:--iter)->getMetadata("dbg"); \
+ builder->SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
+ Value * op0 = NULL;
+ Value * val = NULL;
+
+ /////////////////////////////////////////////////////
+ /* Calculate the data address.
+ data_addr = (data_offset + tbuf_ptr + wg_offset * sizeof(uint32)) +
+ totalSizeofSize * global_size2 * global_size1 * global_size0 * tm_num
+ data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
+ totalSizeofSize = tm_num * sizeof(uint32)
+ */
+ CallInst *read_tm_inst_st = NULL, *read_tm_inst_end = NULL;
+
+ // ecch basic block has 2 timestamps,
+ //and tm_num is even when it is at the begin of bb
+ int bbNum = (tm_num-tm_num%2)/2;
+ Value* data_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, bbNum));
+ // index_offset + wg_offset
+ op0 = builder->CreateAdd(data_offset, wg_offset);
+ // (index_offset + wg_offset)* sizeof(int)
+ op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(uint)*6));
+ Value* b = builder->CreateAdd(tbuf_ptr,op0);
+
+ read_tm_inst_st = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+ "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
+ NULL)));
+ tm_num++;
+
+ llvm::BasicBlock::iterator BI = BB->end();
+ builder->SetInsertPoint(--BI);
+ SETDEBUGLOCATION(BI, false);
+ read_tm_inst_end = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+ "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()),
+ NULL)));
+ tm_num++;
+
+ Value* para[2];
+ std::vector<Type *> ParamTys(2);
+ ParamTys[0]=IntegerType::get(module->getContext(), 16);
+ ParamTys[1]=IntegerType::getInt32Ty(module->getContext());
+
+ for(int i=0; i<3; i++)
+ {
+ para[1]=read_tm_inst_st;
+ storeRegionOffsetI(b, i, para, ParamTys, true);
+ para[1]=read_tm_inst_end;
+ storeRegionOffsetI(b, i, para, ParamTys, false);
+ }
+
+ CallInst* timestamp_inst = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+ "__gen_ocl_timestamp", Type::getVoidTy(module->getContext()),
+ NULL)));
+ return true;
+ }
+
+ bool TimestampParser::runOnFunction(llvm::Function &F)
+ {
+
+#define SETDEBUGLOCATION(INSN, ISBEGIN) \
+ if(OCL_PROFILING) { \
+ llvm::BasicBlock *bb=INSN->getParent(); \
+ llvm::BasicBlock::iterator iter = bb->begin(); \
+ if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN)) ; \
+ else while(!iter->isIdenticalTo(INSN)) iter++; \
+ llvm::MDNode *N = iter->getMetadata("dbg"); \
+ llvm::DebugLoc dg = iter->getDebugLoc(); \
+ if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");} \
+ else while(!N) {iter++;N = iter->getMetadata("dbg");} \
+ builder->SetCurrentDebugLocation(dg); \
+ }
+// end define SETDEBUGLOCATION
+
+ bool hasTimestamp = false;
+ switch (F.getCallingConv()) {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+ case CallingConv::PTX_Device:
+ return false;
+ case CallingConv::PTX_Kernel:
+#else
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::SPIR_KERNEL:
+#endif
+ break;
+ default:
+ GBE_ASSERTM(false, "Unsupported calling convention");
+ }
+
+ module = F.getParent();
+ intTy = IntegerType::get(module->getContext(), 32);
+
+ // As we inline all function calls, so skip non-kernel functions
+ bool bKernel = isKernelFunction(F);
+ if(!bKernel) return false;
+
+ builder = new IRBuilder<>(module->getContext());
+
+ if (!tbuf_ptr) {
+ /* alloc a new buffer ptr to collect the timestamps. */
+ Type *ptrTy = Type::getInt32PtrTy(module->getContext());
+ llvm::Constant *tBuf = new GlobalVariable(*module, ptrTy, false,
+ GlobalVariable::ExternalLinkage,
+ nullptr,
+ StringRef("__gen_ocl_timestamp_buf"),
+ nullptr,
+ GlobalVariable::NotThreadLocal,
+ 1);
+ tbuf_ptr = builder->CreatePtrToInt(tBuf, Type::getInt32Ty(module->getContext()));
+ }
+
+ //if (!wg_offset || !g1Xg2Xg3) {
+ Value* op0 = NULL;
+ Value* val = NULL;
+
+ builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
+ SETDEBUGLOCATION(F.begin()->begin(), true);
+
+ /* FIXME: Because the OpenCL language do not support va macro, and we do not want
+ to introduce the va_list, va_start and va_end into our code, we just simulate
+ the function calls to caculate the offset caculation here. */
+
+
+
+#define BUILD_CALL_INST(name) \
+ CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
+ "__gen_ocl_get_"#name, \
+ IntegerType::getInt32Ty(module->getContext()), \
+ NULL)))
+
+ BUILD_CALL_INST(group_id2);
+ BUILD_CALL_INST(group_id1);
+ BUILD_CALL_INST(group_id0);
+ BUILD_CALL_INST(global_size2);
+ BUILD_CALL_INST(global_size1);
+ BUILD_CALL_INST(global_size0);
+ BUILD_CALL_INST(local_id2);
+ BUILD_CALL_INST(local_id1);
+ BUILD_CALL_INST(local_id0);
+ BUILD_CALL_INST(local_size2);
+ BUILD_CALL_INST(local_size1);
+ BUILD_CALL_INST(local_size0);
+
+#undef BUILD_CALL_INST
+
+ /* calculate offset for later usage.
+ wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
+ + (local_id1 + local_size1 * group_id1) * global_size0
+ + (local_id0 + local_size0 * group_id0)) */
+
+
+ // local_size2 * group_id2
+ val = builder->CreateMul(local_size2, group_id2);
+ // local_id2 + local_size2 * group_id2
+ val = builder->CreateAdd(local_id2, val);
+ // global_size1 * global_size0
+ op0 = builder->CreateMul(global_size1, global_size0);
+ // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
+ Value* offset1 = builder->CreateMul(val, op0);
+ // local_size1 * group_id1
+ val = builder->CreateMul(local_size1, group_id1);
+ // local_id1 + local_size1 * group_id1
+ val = builder->CreateAdd(local_id1, val);
+ // (local_id1 + local_size1 * group_id1) * global_size_0
+ Value* offset2 = builder->CreateMul(val, global_size0);
+ // local_size0 * group_id0
+ val = builder->CreateMul(local_size0, group_id0);
+ // local_id0 + local_size0 * group_id0
+ val = builder->CreateAdd(local_id0, val);
+ // The total sum
+ val = builder->CreateAdd(val, offset1);
+ wg_offset = builder->CreateAdd(val, offset2);
+
+ // global_size2 * global_size1
+ op0 = builder->CreateMul(global_size2, global_size1);
+ // global_size2 * global_size1 * global_size0
+ g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
+
+ generateOneTimestampInstruction(F.begin());// first basic block
+
+ // }
+
+
+ for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++)
+ if(B!=F.begin()){
+ builder->SetInsertPoint(B->getFirstInsertionPt());// insert at the beginning of each basicblock. except first basicblock
+ SETDEBUGLOCATION(B->getFirstInsertionPt(),false);
+ generateOneTimestampInstruction(B);
+ }
+
+ delete builder;
+
+ return false;
+ }
+
+ FunctionPass* createTimestampParserPass()
+ {
+ return new TimestampParser();
+ }
+ char TimestampParser::ID = 0;
+
+} // end namespace
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 891f2a1..012b754 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -71,6 +71,8 @@
#include <fcntl.h>
#include <memory>
+#include <stdlib.h>
+
namespace gbe
{
BVAR(OCL_OUTPUT_CFG, false);
@@ -226,7 +228,7 @@ namespace gbe
{
std::string errInfo;
std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
- if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
+ //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN)
o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false));
// Get the module from its file
@@ -300,6 +302,12 @@ namespace gbe
passes.add(createLowerSwitchPass()); // simplify cfg will generate switch-case instruction
passes.add(createScalarizePass()); // Expand all vector ops
+ //setenv("OCL_PROFILING","0",0);
+ if(OCL_PROFILING) {
+ passes.add(createTimestampParserPass()); // by ynbai
+ passes.add(createExpandConstantExprPass());
+ }
+
if(OCL_OUTPUT_CFG)
passes.add(createCFGPrinterPass());
if(OCL_OUTPUT_CFG_ONLY)
--
1.9.1
More information about the Beignet
mailing list