[Beignet] [PATCH] GBE: Re-implement BTI logic in backend
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Dec 3 00:12:10 PST 2014
LGTM, pushed with slight change at one assertion.
Thanks.
On Mon, Dec 01, 2014 at 04:56:59PM +0800, Ruiling Song wrote:
> Previously, we search from the use-point of pointers, like load/store
> and try to find all the possible pointer sources. But sometimes we may meet
> ptrtoint/add/inttoptr pattern, and what's worse, for the operands of
> add instruction, it is hard to determine which one is from pointer and
> which one maybe a offset.
>
> So what we do in this patch is: let's start the search from the def-point
> (like GlobalVariable, kernel function pointer argument, AllocaInst, which
> we care about) and traversal all their uses. And during the traversal,
> we will record the escape point(i.e. Store/load/atomic instructions).
> So later, when we generate these kinds of instructions, we can query their
> possible sources and get the corresponding BTI.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/backend/gen_insn_selection.cpp | 40 ++---
> backend/src/llvm/llvm_gen_backend.cpp | 227 +++++++++++++++++-----------
> 2 files changed, 158 insertions(+), 109 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index cd968c0..96970c7 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3291,6 +3291,18 @@ namespace gbe
> }
> }
>
> + INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const {
> + if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT)
> + return address;
> +
> + sel.push();
> + sel.curr.noMask = 1;
> + GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
> + sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32)));
> + sel.pop();
> + return temp;
> + }
> +
> INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const
> {
> using namespace ir;
> @@ -3303,28 +3315,16 @@ namespace gbe
> sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
> address = temp;
> }
> - if(space == MEM_LOCAL) {
> +
> + BTI bti = insn.getBTI();
> + for (int x = 0; x < bti.count; x++) {
> + GenRegister temp = getRelativeAddress(sel, address, space, bti.bti[x]);
> if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
> - this->emitWrite64(sel, insn, address, 0xfe);
> + this->emitWrite64(sel, insn, temp, bti.bti[x]);
> else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
> - this->emitUntypedWrite(sel, insn, address, 0xfe);
> - else
> - this->emitByteScatter(sel, insn, elemSize, address, 0xfe);
> - } else {
> - BTI bti = insn.getBTI();
> - for (int x = 0; x < bti.count; x++) {
> - GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
> - sel.push();
> - sel.curr.noMask = 1;
> - sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32)));
> - sel.pop();
> - if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
> - this->emitWrite64(sel, insn, temp, bti.bti[x]);
> - else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
> - this->emitUntypedWrite(sel, insn, temp, bti.bti[x]);
> - else {
> - this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
> - }
> + this->emitUntypedWrite(sel, insn, temp, bti.bti[x]);
> + else {
> + this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
> }
> }
> return true;
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 939dd63..8826128 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -464,10 +464,16 @@ namespace gbe
> */
> set<const Value*> conditionSet;
> map<const Value*, int> globalPointer;
> + typedef map<const Value*, int>::iterator GlobalPtrIter;
> +
> /*!
> * <phi,phiCopy> node information for later optimization
> */
> map<const ir::Register, const ir::Register> phiMap;
> +
> + map<Value *, SmallVector<Value *, 4>> pointerOrigMap;
> + typedef map<Value *, SmallVector<Value *, 4>>::iterator PtrOrigMapIter;
> +
> /*! We visit each function twice. Once to allocate the registers and once to
> * emit the Gen IR instructions
> */
> @@ -529,14 +535,22 @@ namespace gbe
> bool bKernel = isKernelFunction(F);
> if(!bKernel) return false;
>
> + analyzePointerOrigin(F);
> LI = &getAnalysis<LoopInfo>();
> emitFunction(F);
> phiMap.clear();
> globalPointer.clear();
> + pointerOrigMap.clear();
> // Reset for next function
> btiBase = BTI_RESERVED_NUM;
> return false;
> }
> + /*! Given a possible pointer value, find out the interested escape like
> + load/store or atomic instruction */
> + void findPointerEscape(Value *ptr);
> + /*! For all possible pointers, GlobalVariable, function pointer argument,
> + alloca instruction, find their pointer escape points */
> + void analyzePointerOrigin(Function &F);
>
> virtual bool doFinalization(Module &M) { return false; }
> /*! handle global variable register allocation (local, constant space) */
> @@ -647,6 +661,84 @@ namespace gbe
> };
>
> char GenWriter::ID = 0;
> +
> + void GenWriter::findPointerEscape(Value *ptr) {
> + std::vector<Value*> workList;
> + typedef std::vector<Value*>::iterator workIter;
> + std::set<Value *> visited;
> +
> + if (ptr->use_empty()) return;
> +
> + workList.push_back(ptr);
> +
> + for (unsigned i = 0; i < workList.size(); i++) {
> + Value *work = workList[i];
> + if (work->use_empty()) continue;
> +
> + for (Value::use_iterator iter = work->use_begin(); iter != work->use_end(); ++iter) {
> + // After LLVM 3.5, use_iterator points to 'Use' instead of 'User',
> + // which is more straightforward.
> + #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5)
> + User *theUser = *iter;
> + #else
> + User *theUser = iter->getUser();
> + #endif
> + if (visited.find(theUser) != visited.end()) continue;
> + // pointer address is used as the ValueOperand in store instruction, should be skipped
> + if (StoreInst *load = dyn_cast<StoreInst>(theUser)) {
> + if (load->getValueOperand() == work) {
> + continue;
> + }
> + }
> +
> + visited.insert(theUser);
> +
> + if (isa<LoadInst>(theUser) || isa<StoreInst>(theUser) || isa<CallInst>(theUser)) {
> + if (isa<CallInst>(theUser)) {
> + Function *F = dyn_cast<CallInst>(theUser)->getCalledFunction();
> + if (!F || F->getIntrinsicID() != 0) continue;
> + }
> +
> + PtrOrigMapIter ptrIter = pointerOrigMap.find(theUser);
> + if (ptrIter == pointerOrigMap.end()) {
> + // create new one
> + SmallVector<Value *, 4> pointers;
> + pointers.push_back(ptr);
> + pointerOrigMap.insert(std::make_pair(theUser, pointers));
> + } else {
> + // append it
> + (*ptrIter).second.push_back(ptr);
> + }
> + } else {
> + workList.push_back(theUser);
> + }
> + }
> + }
> + }
> +
> + void GenWriter::analyzePointerOrigin(Function &F) {
> + // GlobalVariable
> + Module::GlobalListType &globalList = const_cast<Module::GlobalListType &> (TheModule->getGlobalList());
> + for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> + GlobalVariable &v = *i;
> + if(!v.isConstantUsed()) continue;
> + findPointerEscape(&v);
> + }
> + // function argument
> + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
> + if (I->getType()->isPointerTy()) {
> + findPointerEscape(I);
> + }
> + }
> + // alloca
> + BasicBlock &bb = F.getEntryBlock();
> + for (BasicBlock::iterator iter = bb.begin(), iterE = bb.end(); iter != iterE; ++iter) {
> + if (AllocaInst *ai = dyn_cast<AllocaInst>(iter)) {
> + findPointerEscape(ai);
> + }
> + }
> + }
> +
> void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) {
> StringRef data = cda->getRawDataValues();
> memcpy((char*)ptr+offset, data.data(), data.size());
> @@ -2894,7 +2986,7 @@ namespace gbe
> const ir::Register dst = this->getRegister(&I);
>
> ir::BTI bti;
> - gatherBTI(*AI, bti);
> + gatherBTI(&I, bti);
> vector<ir::Register> src;
> uint32_t srcNum = 0;
> while(AI != AE) {
> @@ -3776,96 +3868,54 @@ handle_write_image:
> }
>
> // The idea behind is to search along the use-def chain, and find out all
> - // possible source of the pointer. Then in later codeGen, we can emit
> - // read/store instructions to these btis gathered.
> - void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) {
> - typedef map<const Value*, int>::iterator GlobalPtrIter;
> - Value *p;
> - size_t idx = 0;
> - int nBTI = 0;
> - std::vector<Value*> candidates;
> - candidates.push_back(pointer);
> - std::set<Value*> processed;
> -
> - while (idx < candidates.size()) {
> - bool isPrivate = false;
> - bool needNewBTI = true;
> - p = candidates[idx];
> -
> - while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) {
> -
> - if (processed.find(p) == processed.end()) {
> - processed.insert(p);
> - } else {
> - // This use-def chain falls into a loop,
> - // it does not introduce a new buffer source.
> - needNewBTI = false;
> - break;
> - }
> -
> - if (dyn_cast<SelectInst>(p)) {
> - SelectInst *sel = cast<SelectInst>(p);
> - p = sel->getTrueValue();
> - candidates.push_back(sel->getFalseValue());
> - continue;
> - }
> -
> - if (dyn_cast<PHINode>(p)) {
> - PHINode* phi = cast<PHINode>(p);
> - int n = phi->getNumIncomingValues();
> - for (int j = 1; j < n; j++)
> - candidates.push_back(phi->getIncomingValue(j));
> - p = phi->getIncomingValue(0);
> - continue;
> - }
> -
> - if (dyn_cast<AllocaInst>(p)) {
> - isPrivate = true;
> - break;
> + // possible sources of the pointer. Then in later codeGen, we can emit
> + // read/store instructions to these BTIs gathered.
> + void GenWriter::gatherBTI(Value *insn, ir::BTI &bti) {
> + PtrOrigMapIter iter = pointerOrigMap.find(insn);
> + if (iter != pointerOrigMap.end()) {
> + SmallVectorImpl<Value *> &origins = iter->second;
> + uint8_t nBTI = 0;
> + for (unsigned i = 0; i < origins.size(); i++) {
> + uint8_t new_bti = 0;
> + Value *origin = origins[i];
> + unsigned space = origin->getType()->getPointerAddressSpace();
> + switch (space) {
> + case 0:
> + new_bti = BTI_PRIVATE;
> + break;
> + case 1:
> + {
> + GlobalPtrIter iter = globalPointer.find(origin);
> + GBE_ASSERT(iter != globalPointer.end());
> + new_bti = iter->second;
> + break;
> + }
> + case 2:
> + new_bti = BTI_CONSTANT;
> + break;
> + case 3:
> + new_bti = 0xfe;
> + break;
> + default:
> + GBE_ASSERT(0 && "address space not unhandled in gatherBTI()\n");
> + break;
> }
> - p = cast<User>(p)->getOperand(0);
> - }
> -
> - if (needNewBTI == false) {
> - // go to next possible pointer source
> - idx++; continue;
> - }
>
> - uint8_t new_bti = 0;
> - if (isPrivate) {
> - new_bti = BTI_PRIVATE;
> - } else {
> - if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) {
> - // structure value implementation is not complete now,
> - // they are now treated as push constant, so, the load/store
> - // here is not as meaningful.
> - bti.bti[0] = BTI_PRIVATE;
> - bti.count = 1;
> - break;
> - }
> - Type *ty = p->getType();
> - if(ty->getPointerAddressSpace() == 3) {
> - // __local memory
> - new_bti = 0xfe;
> - } else {
> - // __global memory
> - GlobalPtrIter iter = globalPointer.find(p);
> - GBE_ASSERT(iter != globalPointer.end());
> - new_bti = iter->second;
> + // avoid duplicate
> + bool bFound = false;
> + for (int j = 0; j < nBTI; j++) {
> + if (bti.bti[j] == new_bti) {
> + bFound = true; break;
> + }
> }
> - }
> - // avoid duplicate
> - bool bFound = false;
> - for (int j = 0; j < nBTI; j++) {
> - if (bti.bti[j] == new_bti) {
> - bFound = true; break;
> + if (bFound == false) {
> + bti.bti[nBTI++] = new_bti;
> + bti.count = nBTI;
> }
> }
> - if (bFound == false) {
> - bti.bti[nBTI++] = new_bti;
> - bti.count = nBTI;
> - }
> - idx++;
> + } else {
> + insn->dump();
> + GBE_ASSERT(0 && "could not find where the address comes from!");
> }
> GBE_ASSERT(bti.count <= MAX_MIXED_POINTER);
> }
> @@ -3882,9 +3932,8 @@ handle_write_image:
> const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
> const ir::Register ptr = this->getRegister(llvmPtr);
> ir::BTI binding;
> - if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) {
> - gatherBTI(llvmPtr, binding);
> - }
> + gatherBTI(&I, binding);
> +
> // Scalar is easy. We neednot build register tuples
> if (isScalarType(llvmType) == true) {
> const ir::Type type = getType(ctx, llvmType);
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list