[Beignet] [PATCH] GBE: Re-implement BTI logic in backend
Ruiling Song
ruiling.song at intel.com
Mon Dec 1 00:56:59 PST 2014
Previously, we search from the use-point of pointers, like load/store
and try to find all the possible pointer sources. But sometimes we may meet
ptrtoint/add/inttoptr pattern, and what's worse, for the operands of
add instruction, it is hard to determine which one is from pointer and
which one maybe a offset.
So what we do in this patch is: let's start the search from the def-point
(like GlobalVariable, kernel function pointer argument, AllocaInst, which
we care about) and traversal all their uses. And during the traversal,
we will record the escape point(i.e. Store/load/atomic instructions).
So later, when we generate these kinds of instructions, we can query their
possible sources and get the corresponding BTI.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 40 ++---
backend/src/llvm/llvm_gen_backend.cpp | 227 +++++++++++++++++-----------
2 files changed, 158 insertions(+), 109 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cd968c0..96970c7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3291,6 +3291,18 @@ namespace gbe
}
}
+ INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const {
+ if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT)
+ return address;
+
+ sel.push();
+ sel.curr.noMask = 1;
+ GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+ sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32)));
+ sel.pop();
+ return temp;
+ }
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const
{
using namespace ir;
@@ -3303,28 +3315,16 @@ namespace gbe
sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
address = temp;
}
- if(space == MEM_LOCAL) {
+
+ BTI bti = insn.getBTI();
+ for (int x = 0; x < bti.count; x++) {
+ GenRegister temp = getRelativeAddress(sel, address, space, bti.bti[x]);
if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
- this->emitWrite64(sel, insn, address, 0xfe);
+ this->emitWrite64(sel, insn, temp, bti.bti[x]);
else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
- this->emitUntypedWrite(sel, insn, address, 0xfe);
- else
- this->emitByteScatter(sel, insn, elemSize, address, 0xfe);
- } else {
- BTI bti = insn.getBTI();
- for (int x = 0; x < bti.count; x++) {
- GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
- sel.push();
- sel.curr.noMask = 1;
- sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32)));
- sel.pop();
- if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
- this->emitWrite64(sel, insn, temp, bti.bti[x]);
- else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
- this->emitUntypedWrite(sel, insn, temp, bti.bti[x]);
- else {
- this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
- }
+ this->emitUntypedWrite(sel, insn, temp, bti.bti[x]);
+ else {
+ this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
}
}
return true;
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 939dd63..8826128 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -464,10 +464,16 @@ namespace gbe
*/
set<const Value*> conditionSet;
map<const Value*, int> globalPointer;
+ typedef map<const Value*, int>::iterator GlobalPtrIter;
+
/*!
* <phi,phiCopy> node information for later optimization
*/
map<const ir::Register, const ir::Register> phiMap;
+
+ map<Value *, SmallVector<Value *, 4>> pointerOrigMap;
+ typedef map<Value *, SmallVector<Value *, 4>>::iterator PtrOrigMapIter;
+
/*! We visit each function twice. Once to allocate the registers and once to
* emit the Gen IR instructions
*/
@@ -529,14 +535,22 @@ namespace gbe
bool bKernel = isKernelFunction(F);
if(!bKernel) return false;
+ analyzePointerOrigin(F);
LI = &getAnalysis<LoopInfo>();
emitFunction(F);
phiMap.clear();
globalPointer.clear();
+ pointerOrigMap.clear();
// Reset for next function
btiBase = BTI_RESERVED_NUM;
return false;
}
+ /*! Given a possible pointer value, find out the interested escape like
+ load/store or atomic instruction */
+ void findPointerEscape(Value *ptr);
+ /*! For all possible pointers, GlobalVariable, function pointer argument,
+ alloca instruction, find their pointer escape points */
+ void analyzePointerOrigin(Function &F);
virtual bool doFinalization(Module &M) { return false; }
/*! handle global variable register allocation (local, constant space) */
@@ -647,6 +661,84 @@ namespace gbe
};
char GenWriter::ID = 0;
+
+ void GenWriter::findPointerEscape(Value *ptr) {
+ std::vector<Value*> workList;
+ typedef std::vector<Value*>::iterator workIter;
+ std::set<Value *> visited;
+
+ if (ptr->use_empty()) return;
+
+ workList.push_back(ptr);
+
+ for (unsigned i = 0; i < workList.size(); i++) {
+ Value *work = workList[i];
+ if (work->use_empty()) continue;
+
+ for (Value::use_iterator iter = work->use_begin(); iter != work->use_end(); ++iter) {
+ // After LLVM 3.5, use_iterator points to 'Use' instead of 'User',
+ // which is more straightforward.
+ #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5)
+ User *theUser = *iter;
+ #else
+ User *theUser = iter->getUser();
+ #endif
+ if (visited.find(theUser) != visited.end()) continue;
+ // pointer address is used as the ValueOperand in store instruction, should be skipped
+ if (StoreInst *load = dyn_cast<StoreInst>(theUser)) {
+ if (load->getValueOperand() == work) {
+ continue;
+ }
+ }
+
+ visited.insert(theUser);
+
+ if (isa<LoadInst>(theUser) || isa<StoreInst>(theUser) || isa<CallInst>(theUser)) {
+ if (isa<CallInst>(theUser)) {
+ Function *F = dyn_cast<CallInst>(theUser)->getCalledFunction();
+ if (!F || F->getIntrinsicID() != 0) continue;
+ }
+
+ PtrOrigMapIter ptrIter = pointerOrigMap.find(theUser);
+ if (ptrIter == pointerOrigMap.end()) {
+ // create new one
+ SmallVector<Value *, 4> pointers;
+ pointers.push_back(ptr);
+ pointerOrigMap.insert(std::make_pair(theUser, pointers));
+ } else {
+ // append it
+ (*ptrIter).second.push_back(ptr);
+ }
+ } else {
+ workList.push_back(theUser);
+ }
+ }
+ }
+ }
+
+ void GenWriter::analyzePointerOrigin(Function &F) {
+ // GlobalVariable
+ Module::GlobalListType &globalList = const_cast<Module::GlobalListType &> (TheModule->getGlobalList());
+ for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+ GlobalVariable &v = *i;
+ if(!v.isConstantUsed()) continue;
+ findPointerEscape(&v);
+ }
+ // function argument
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ if (I->getType()->isPointerTy()) {
+ findPointerEscape(I);
+ }
+ }
+ // alloca
+ BasicBlock &bb = F.getEntryBlock();
+ for (BasicBlock::iterator iter = bb.begin(), iterE = bb.end(); iter != iterE; ++iter) {
+ if (AllocaInst *ai = dyn_cast<AllocaInst>(iter)) {
+ findPointerEscape(ai);
+ }
+ }
+ }
+
void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) {
StringRef data = cda->getRawDataValues();
memcpy((char*)ptr+offset, data.data(), data.size());
@@ -2894,7 +2986,7 @@ namespace gbe
const ir::Register dst = this->getRegister(&I);
ir::BTI bti;
- gatherBTI(*AI, bti);
+ gatherBTI(&I, bti);
vector<ir::Register> src;
uint32_t srcNum = 0;
while(AI != AE) {
@@ -3776,96 +3868,54 @@ handle_write_image:
}
// The idea behind is to search along the use-def chain, and find out all
- // possible source of the pointer. Then in later codeGen, we can emit
- // read/store instructions to these btis gathered.
- void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) {
- typedef map<const Value*, int>::iterator GlobalPtrIter;
- Value *p;
- size_t idx = 0;
- int nBTI = 0;
- std::vector<Value*> candidates;
- candidates.push_back(pointer);
- std::set<Value*> processed;
-
- while (idx < candidates.size()) {
- bool isPrivate = false;
- bool needNewBTI = true;
- p = candidates[idx];
-
- while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) {
-
- if (processed.find(p) == processed.end()) {
- processed.insert(p);
- } else {
- // This use-def chain falls into a loop,
- // it does not introduce a new buffer source.
- needNewBTI = false;
- break;
- }
-
- if (dyn_cast<SelectInst>(p)) {
- SelectInst *sel = cast<SelectInst>(p);
- p = sel->getTrueValue();
- candidates.push_back(sel->getFalseValue());
- continue;
- }
-
- if (dyn_cast<PHINode>(p)) {
- PHINode* phi = cast<PHINode>(p);
- int n = phi->getNumIncomingValues();
- for (int j = 1; j < n; j++)
- candidates.push_back(phi->getIncomingValue(j));
- p = phi->getIncomingValue(0);
- continue;
- }
-
- if (dyn_cast<AllocaInst>(p)) {
- isPrivate = true;
- break;
+ // possible sources of the pointer. Then in later codeGen, we can emit
+ // read/store instructions to these BTIs gathered.
+ void GenWriter::gatherBTI(Value *insn, ir::BTI &bti) {
+ PtrOrigMapIter iter = pointerOrigMap.find(insn);
+ if (iter != pointerOrigMap.end()) {
+ SmallVectorImpl<Value *> &origins = iter->second;
+ uint8_t nBTI = 0;
+ for (unsigned i = 0; i < origins.size(); i++) {
+ uint8_t new_bti = 0;
+ Value *origin = origins[i];
+ unsigned space = origin->getType()->getPointerAddressSpace();
+ switch (space) {
+ case 0:
+ new_bti = BTI_PRIVATE;
+ break;
+ case 1:
+ {
+ GlobalPtrIter iter = globalPointer.find(origin);
+ GBE_ASSERT(iter != globalPointer.end());
+ new_bti = iter->second;
+ break;
+ }
+ case 2:
+ new_bti = BTI_CONSTANT;
+ break;
+ case 3:
+ new_bti = 0xfe;
+ break;
+ default:
+ GBE_ASSERT(0 && "address space not unhandled in gatherBTI()\n");
+ break;
}
- p = cast<User>(p)->getOperand(0);
- }
-
- if (needNewBTI == false) {
- // go to next possible pointer source
- idx++; continue;
- }
- uint8_t new_bti = 0;
- if (isPrivate) {
- new_bti = BTI_PRIVATE;
- } else {
- if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) {
- // structure value implementation is not complete now,
- // they are now treated as push constant, so, the load/store
- // here is not as meaningful.
- bti.bti[0] = BTI_PRIVATE;
- bti.count = 1;
- break;
- }
- Type *ty = p->getType();
- if(ty->getPointerAddressSpace() == 3) {
- // __local memory
- new_bti = 0xfe;
- } else {
- // __global memory
- GlobalPtrIter iter = globalPointer.find(p);
- GBE_ASSERT(iter != globalPointer.end());
- new_bti = iter->second;
+ // avoid duplicate
+ bool bFound = false;
+ for (int j = 0; j < nBTI; j++) {
+ if (bti.bti[j] == new_bti) {
+ bFound = true; break;
+ }
}
- }
- // avoid duplicate
- bool bFound = false;
- for (int j = 0; j < nBTI; j++) {
- if (bti.bti[j] == new_bti) {
- bFound = true; break;
+ if (bFound == false) {
+ bti.bti[nBTI++] = new_bti;
+ bti.count = nBTI;
}
}
- if (bFound == false) {
- bti.bti[nBTI++] = new_bti;
- bti.count = nBTI;
- }
- idx++;
+ } else {
+ insn->dump();
+ GBE_ASSERT(0 && "could not find where the address comes from!");
}
GBE_ASSERT(bti.count <= MAX_MIXED_POINTER);
}
@@ -3882,9 +3932,8 @@ handle_write_image:
const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
const ir::Register ptr = this->getRegister(llvmPtr);
ir::BTI binding;
- if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) {
- gatherBTI(llvmPtr, binding);
- }
+ gatherBTI(&I, binding);
+
// Scalar is easy. We neednot build register tuples
if (isScalarType(llvmType) == true) {
const ir::Type type = getType(ctx, llvmType);
--
1.7.10.4
More information about the Beignet
mailing list