[Beignet] [PATCH] GBE: Re-implement BTI logic in backend

Zhigang Gong zhigang.gong at linux.intel.com
Wed Dec 3 00:12:10 PST 2014


LGTM, pushed with slight change at one assertion.
Thanks.

On Mon, Dec 01, 2014 at 04:56:59PM +0800, Ruiling Song wrote:
> Previously, we search from the use-point of pointers, like load/store
> and try to find all the possible pointer sources. But sometimes we may meet
> ptrtoint/add/inttoptr pattern, and what's worse, for the operands of
> add instruction, it is hard to determine which one is from pointer and
> which one maybe a offset.
> 
> So what we do in this patch is: let's start the search from the def-point
> (like GlobalVariable, kernel function pointer argument, AllocaInst, which
> we care about) and traversal all their uses. And during the traversal,
> we will record the escape point(i.e. Store/load/atomic instructions).
> So later, when we generate these kinds of instructions, we can query their
> possible sources and get the corresponding BTI.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   40 ++---
>  backend/src/llvm/llvm_gen_backend.cpp      |  227 +++++++++++++++++-----------
>  2 files changed, 158 insertions(+), 109 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index cd968c0..96970c7 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3291,6 +3291,18 @@ namespace gbe
>        }
>      }
>  
> +    INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const {
> +      if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT)
> +        return address;
> +
> +      sel.push();
> +        sel.curr.noMask = 1;
> +        GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
> +        sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32)));
> +      sel.pop();
> +      return temp;
> +    }
> +
>      INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
> @@ -3303,28 +3315,16 @@ namespace gbe
>          sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
>          address = temp;
>        }
> -      if(space == MEM_LOCAL) {
> +
> +      BTI bti = insn.getBTI();
> +      for (int x = 0; x < bti.count; x++) {
> +        GenRegister temp = getRelativeAddress(sel, address, space, bti.bti[x]);
>          if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
> -          this->emitWrite64(sel, insn, address, 0xfe);
> +          this->emitWrite64(sel, insn, temp, bti.bti[x]);
>          else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
> -          this->emitUntypedWrite(sel, insn, address,  0xfe);
> -        else
> -          this->emitByteScatter(sel, insn, elemSize, address, 0xfe);
> -      } else {
> -        BTI bti = insn.getBTI();
> -        for (int x = 0; x < bti.count; x++) {
> -          GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
> -          sel.push();
> -            sel.curr.noMask = 1;
> -            sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32)));
> -          sel.pop();
> -          if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
> -            this->emitWrite64(sel, insn, temp, bti.bti[x]);
> -          else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
> -            this->emitUntypedWrite(sel, insn, temp,  bti.bti[x]);
> -          else {
> -            this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
> -          }
> +          this->emitUntypedWrite(sel, insn, temp,  bti.bti[x]);
> +        else {
> +          this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
>          }
>        }
>        return true;
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 939dd63..8826128 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -464,10 +464,16 @@ namespace gbe
>       */
>      set<const Value*> conditionSet;
>      map<const Value*, int> globalPointer;
> +    typedef map<const Value*, int>::iterator GlobalPtrIter;
> +
>      /*!
>       *  <phi,phiCopy> node information for later optimization
>       */
>      map<const ir::Register, const ir::Register> phiMap;
> +
> +    map<Value *, SmallVector<Value *, 4>> pointerOrigMap;
> +    typedef map<Value *, SmallVector<Value *, 4>>::iterator PtrOrigMapIter;
> +
>      /*! We visit each function twice. Once to allocate the registers and once to
>       *  emit the Gen IR instructions
>       */
> @@ -529,14 +535,22 @@ namespace gbe
>        bool bKernel = isKernelFunction(F);
>        if(!bKernel) return false;
>  
> +      analyzePointerOrigin(F);
>        LI = &getAnalysis<LoopInfo>();
>        emitFunction(F);
>        phiMap.clear();
>        globalPointer.clear();
> +      pointerOrigMap.clear();
>        // Reset for next function
>        btiBase = BTI_RESERVED_NUM;
>        return false;
>      }
> +    /*! Given a possible pointer value, find out the interested escape like
> +        load/store or atomic instruction */
> +    void findPointerEscape(Value *ptr);
> +    /*! For all possible pointers, GlobalVariable, function pointer argument,
> +        alloca instruction, find their pointer escape points */
> +    void analyzePointerOrigin(Function &F);
>  
>      virtual bool doFinalization(Module &M) { return false; }
>      /*! handle global variable register allocation (local, constant space) */
> @@ -647,6 +661,84 @@ namespace gbe
>    };
>  
>    char GenWriter::ID = 0;
> +
> +  void GenWriter::findPointerEscape(Value *ptr) {
> +    std::vector<Value*> workList;
> +    typedef std::vector<Value*>::iterator workIter;
> +    std::set<Value *> visited;
> +
> +    if (ptr->use_empty()) return;
> +
> +    workList.push_back(ptr);
> +
> +    for (unsigned i = 0; i < workList.size(); i++) {
> +      Value *work = workList[i];
> +      if (work->use_empty()) continue;
> +
> +      for (Value::use_iterator iter = work->use_begin(); iter != work->use_end(); ++iter) {
> +      // After LLVM 3.5, use_iterator points to 'Use' instead of 'User',
> +      // which is more straightforward.
> +  #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5)
> +        User *theUser = *iter;
> +  #else
> +        User *theUser = iter->getUser();
> +  #endif
> +        if (visited.find(theUser) != visited.end()) continue;
> +        // pointer address is used as the ValueOperand in store instruction, should be skipped
> +        if (StoreInst *load = dyn_cast<StoreInst>(theUser)) {
> +          if (load->getValueOperand() == work) {
> +            continue;
> +          }
> +        }
> +
> +        visited.insert(theUser);
> +
> +        if (isa<LoadInst>(theUser) || isa<StoreInst>(theUser) || isa<CallInst>(theUser)) {
> +          if (isa<CallInst>(theUser)) {
> +            Function *F = dyn_cast<CallInst>(theUser)->getCalledFunction();
> +            if (!F || F->getIntrinsicID() != 0) continue;
> +          }
> +
> +          PtrOrigMapIter ptrIter = pointerOrigMap.find(theUser);
> +          if (ptrIter == pointerOrigMap.end()) {
> +            // create new one
> +            SmallVector<Value *, 4> pointers;
> +            pointers.push_back(ptr);
> +            pointerOrigMap.insert(std::make_pair(theUser, pointers));
> +          } else {
> +            // append it
> +            (*ptrIter).second.push_back(ptr);
> +          }
> +        } else {
> +          workList.push_back(theUser);
> +        }
> +      }
> +    }
> +  }
> +
> +  void GenWriter::analyzePointerOrigin(Function &F) {
> +    // GlobalVariable
> +    Module::GlobalListType &globalList = const_cast<Module::GlobalListType &> (TheModule->getGlobalList());
> +    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> +      GlobalVariable &v = *i;
> +      if(!v.isConstantUsed()) continue;
> +      findPointerEscape(&v);
> +    }
> +    // function argument
> +    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
> +      if (I->getType()->isPointerTy()) {
> +        findPointerEscape(I);
> +      }
> +    }
> +    // alloca
> +    BasicBlock &bb = F.getEntryBlock();
> +    for (BasicBlock::iterator iter = bb.begin(), iterE = bb.end(); iter != iterE; ++iter) {
> +      if (AllocaInst *ai = dyn_cast<AllocaInst>(iter)) {
> +        findPointerEscape(ai);
> +      }
> +    }
> +  }
> +
>    void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) {
>      StringRef data = cda->getRawDataValues();
>      memcpy((char*)ptr+offset, data.data(), data.size());
> @@ -2894,7 +2986,7 @@ namespace gbe
>      const ir::Register dst = this->getRegister(&I);
>  
>      ir::BTI bti;
> -    gatherBTI(*AI, bti);
> +    gatherBTI(&I, bti);
>      vector<ir::Register> src;
>      uint32_t srcNum = 0;
>      while(AI != AE) {
> @@ -3776,96 +3868,54 @@ handle_write_image:
>    }
>  
>    // The idea behind is to search along the use-def chain, and find out all
> -  // possible source of the pointer. Then in later codeGen, we can emit
> -  // read/store instructions to these btis gathered.
> -  void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) {
> -    typedef map<const Value*, int>::iterator GlobalPtrIter;
> -    Value *p;
> -    size_t idx = 0;
> -    int nBTI = 0;
> -    std::vector<Value*> candidates;
> -    candidates.push_back(pointer);
> -    std::set<Value*> processed;
> -
> -    while (idx < candidates.size()) {
> -      bool isPrivate = false;
> -      bool needNewBTI = true;
> -      p = candidates[idx];
> -
> -      while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) {
> -
> -        if (processed.find(p) == processed.end()) {
> -          processed.insert(p);
> -        } else {
> -          // This use-def chain falls into a loop,
> -          // it does not introduce a new buffer source.
> -          needNewBTI = false;
> -          break;
> -        }
> -
> -        if (dyn_cast<SelectInst>(p)) {
> -          SelectInst *sel = cast<SelectInst>(p);
> -          p = sel->getTrueValue();
> -          candidates.push_back(sel->getFalseValue());
> -          continue;
> -        }
> -
> -        if (dyn_cast<PHINode>(p)) {
> -          PHINode* phi = cast<PHINode>(p);
> -          int n = phi->getNumIncomingValues();
> -          for (int j = 1; j < n; j++)
> -            candidates.push_back(phi->getIncomingValue(j));
> -          p = phi->getIncomingValue(0);
> -          continue;
> -        }
> -
> -        if (dyn_cast<AllocaInst>(p)) {
> -          isPrivate = true;
> -          break;
> +  // possible sources of the pointer. Then in later codeGen, we can emit
> +  // read/store instructions to these BTIs gathered.
> +  void GenWriter::gatherBTI(Value *insn, ir::BTI &bti) {
> +    PtrOrigMapIter iter = pointerOrigMap.find(insn);
> +    if (iter != pointerOrigMap.end()) {
> +      SmallVectorImpl<Value *> &origins = iter->second;
> +      uint8_t nBTI = 0;
> +      for (unsigned i = 0; i < origins.size(); i++) {
> +        uint8_t new_bti = 0;
> +        Value *origin = origins[i];
> +        unsigned space = origin->getType()->getPointerAddressSpace();
> +        switch (space) {
> +          case 0:
> +            new_bti = BTI_PRIVATE;
> +            break;
> +          case 1:
> +          {
> +            GlobalPtrIter iter = globalPointer.find(origin);
> +            GBE_ASSERT(iter != globalPointer.end());
> +            new_bti = iter->second;
> +            break;
> +          }
> +          case 2:
> +            new_bti = BTI_CONSTANT;
> +            break;
> +          case 3:
> +            new_bti = 0xfe;
> +            break;
> +          default:
> +            GBE_ASSERT(0 && "address space not unhandled in gatherBTI()\n");
> +            break;
>          }
> -        p = cast<User>(p)->getOperand(0);
> -      }
> -
> -      if (needNewBTI == false) {
> -        // go to next possible pointer source
> -        idx++; continue;
> -      }
>  
> -      uint8_t new_bti = 0;
> -      if (isPrivate) {
> -        new_bti = BTI_PRIVATE;
> -      } else {
> -        if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) {
> -          // structure value implementation is not complete now,
> -          // they are now treated as push constant, so, the load/store
> -          // here is not as meaningful.
> -          bti.bti[0] = BTI_PRIVATE;
> -          bti.count = 1;
> -          break;
> -        }
> -        Type *ty = p->getType();
> -        if(ty->getPointerAddressSpace() == 3) {
> -          // __local memory
> -          new_bti = 0xfe;
> -        } else {
> -          // __global memory
> -          GlobalPtrIter iter = globalPointer.find(p);
> -          GBE_ASSERT(iter != globalPointer.end());
> -          new_bti = iter->second;
> +        // avoid duplicate
> +        bool bFound = false;
> +        for (int j = 0; j < nBTI; j++) {
> +          if (bti.bti[j] == new_bti) {
> +            bFound = true; break;
> +          }
>          }
> -      }
> -      // avoid duplicate
> -      bool bFound = false;
> -      for (int j = 0; j < nBTI; j++) {
> -        if (bti.bti[j] == new_bti) {
> -          bFound = true; break;
> +        if (bFound == false) {
> +          bti.bti[nBTI++] = new_bti;
> +          bti.count = nBTI;
>          }
>        }
> -      if (bFound == false) {
> -        bti.bti[nBTI++] = new_bti;
> -        bti.count = nBTI;
> -      }
> -      idx++;
> +    } else {
> +      insn->dump();
> +      GBE_ASSERT(0 && "could not find where the address comes from!");
>      }
>      GBE_ASSERT(bti.count <= MAX_MIXED_POINTER);
>    }
> @@ -3882,9 +3932,8 @@ handle_write_image:
>      const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
>      const ir::Register ptr = this->getRegister(llvmPtr);
>      ir::BTI binding;
> -    if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) {
> -      gatherBTI(llvmPtr, binding);
> -    }
> +    gatherBTI(&I, binding);
> +
>      // Scalar is easy. We neednot build register tuples
>      if (isScalarType(llvmType) == true) {
>        const ir::Type type = getType(ctx, llvmType);
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list