[Beignet] [PATCH] GBE: Re-implement BTI logic in backend

Ruiling Song ruiling.song at intel.com
Mon Dec 1 00:56:59 PST 2014


Previously, we search from the use-point of pointers, like load/store
and try to find all the possible pointer sources. But sometimes we may meet
ptrtoint/add/inttoptr pattern, and what's worse, for the operands of
add instruction, it is hard to determine which one is from pointer and
which one maybe a offset.

So what we do in this patch is: let's start the search from the def-point
(like GlobalVariable, kernel function pointer argument, AllocaInst, which
we care about) and traversal all their uses. And during the traversal,
we will record the escape point(i.e. Store/load/atomic instructions).
So later, when we generate these kinds of instructions, we can query their
possible sources and get the corresponding BTI.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   40 ++---
 backend/src/llvm/llvm_gen_backend.cpp      |  227 +++++++++++++++++-----------
 2 files changed, 158 insertions(+), 109 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cd968c0..96970c7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3291,6 +3291,18 @@ namespace gbe
       }
     }
 
+    INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const {
+      if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT)
+        return address;
+
+      sel.push();
+        sel.curr.noMask = 1;
+        GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+        sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32)));
+      sel.pop();
+      return temp;
+    }
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
@@ -3303,28 +3315,16 @@ namespace gbe
         sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
         address = temp;
       }
-      if(space == MEM_LOCAL) {
+
+      BTI bti = insn.getBTI();
+      for (int x = 0; x < bti.count; x++) {
+        GenRegister temp = getRelativeAddress(sel, address, space, bti.bti[x]);
         if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
-          this->emitWrite64(sel, insn, address, 0xfe);
+          this->emitWrite64(sel, insn, temp, bti.bti[x]);
         else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
-          this->emitUntypedWrite(sel, insn, address,  0xfe);
-        else
-          this->emitByteScatter(sel, insn, elemSize, address, 0xfe);
-      } else {
-        BTI bti = insn.getBTI();
-        for (int x = 0; x < bti.count; x++) {
-          GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
-          sel.push();
-            sel.curr.noMask = 1;
-            sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32)));
-          sel.pop();
-          if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
-            this->emitWrite64(sel, insn, temp, bti.bti[x]);
-          else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
-            this->emitUntypedWrite(sel, insn, temp,  bti.bti[x]);
-          else {
-            this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
-          }
+          this->emitUntypedWrite(sel, insn, temp,  bti.bti[x]);
+        else {
+          this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
         }
       }
       return true;
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 939dd63..8826128 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -464,10 +464,16 @@ namespace gbe
      */
     set<const Value*> conditionSet;
     map<const Value*, int> globalPointer;
+    typedef map<const Value*, int>::iterator GlobalPtrIter;
+
     /*!
      *  <phi,phiCopy> node information for later optimization
      */
     map<const ir::Register, const ir::Register> phiMap;
+
+    map<Value *, SmallVector<Value *, 4>> pointerOrigMap;
+    typedef map<Value *, SmallVector<Value *, 4>>::iterator PtrOrigMapIter;
+
     /*! We visit each function twice. Once to allocate the registers and once to
      *  emit the Gen IR instructions
      */
@@ -529,14 +535,22 @@ namespace gbe
       bool bKernel = isKernelFunction(F);
       if(!bKernel) return false;
 
+      analyzePointerOrigin(F);
       LI = &getAnalysis<LoopInfo>();
       emitFunction(F);
       phiMap.clear();
       globalPointer.clear();
+      pointerOrigMap.clear();
       // Reset for next function
       btiBase = BTI_RESERVED_NUM;
       return false;
     }
+    /*! Given a possible pointer value, find out the interested escape like
+        load/store or atomic instruction */
+    void findPointerEscape(Value *ptr);
+    /*! For all possible pointers, GlobalVariable, function pointer argument,
+        alloca instruction, find their pointer escape points */
+    void analyzePointerOrigin(Function &F);
 
     virtual bool doFinalization(Module &M) { return false; }
     /*! handle global variable register allocation (local, constant space) */
@@ -647,6 +661,84 @@ namespace gbe
   };
 
   char GenWriter::ID = 0;
+
+  void GenWriter::findPointerEscape(Value *ptr) {
+    std::vector<Value*> workList;
+    typedef std::vector<Value*>::iterator workIter;
+    std::set<Value *> visited;
+
+    if (ptr->use_empty()) return;
+
+    workList.push_back(ptr);
+
+    for (unsigned i = 0; i < workList.size(); i++) {
+      Value *work = workList[i];
+      if (work->use_empty()) continue;
+
+      for (Value::use_iterator iter = work->use_begin(); iter != work->use_end(); ++iter) {
+      // After LLVM 3.5, use_iterator points to 'Use' instead of 'User',
+      // which is more straightforward.
+  #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 5)
+        User *theUser = *iter;
+  #else
+        User *theUser = iter->getUser();
+  #endif
+        if (visited.find(theUser) != visited.end()) continue;
+        // pointer address is used as the ValueOperand in store instruction, should be skipped
+        if (StoreInst *load = dyn_cast<StoreInst>(theUser)) {
+          if (load->getValueOperand() == work) {
+            continue;
+          }
+        }
+
+        visited.insert(theUser);
+
+        if (isa<LoadInst>(theUser) || isa<StoreInst>(theUser) || isa<CallInst>(theUser)) {
+          if (isa<CallInst>(theUser)) {
+            Function *F = dyn_cast<CallInst>(theUser)->getCalledFunction();
+            if (!F || F->getIntrinsicID() != 0) continue;
+          }
+
+          PtrOrigMapIter ptrIter = pointerOrigMap.find(theUser);
+          if (ptrIter == pointerOrigMap.end()) {
+            // create new one
+            SmallVector<Value *, 4> pointers;
+            pointers.push_back(ptr);
+            pointerOrigMap.insert(std::make_pair(theUser, pointers));
+          } else {
+            // append it
+            (*ptrIter).second.push_back(ptr);
+          }
+        } else {
+          workList.push_back(theUser);
+        }
+      }
+    }
+  }
+
+  void GenWriter::analyzePointerOrigin(Function &F) {
+    // GlobalVariable
+    Module::GlobalListType &globalList = const_cast<Module::GlobalListType &> (TheModule->getGlobalList());
+    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+      GlobalVariable &v = *i;
+      if(!v.isConstantUsed()) continue;
+      findPointerEscape(&v);
+    }
+    // function argument
+    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+      if (I->getType()->isPointerTy()) {
+        findPointerEscape(I);
+      }
+    }
+    // alloca
+    BasicBlock &bb = F.getEntryBlock();
+    for (BasicBlock::iterator iter = bb.begin(), iterE = bb.end(); iter != iterE; ++iter) {
+      if (AllocaInst *ai = dyn_cast<AllocaInst>(iter)) {
+        findPointerEscape(ai);
+      }
+    }
+  }
+
   void getSequentialData(const ConstantDataSequential *cda, void *ptr, uint32_t &offset) {
     StringRef data = cda->getRawDataValues();
     memcpy((char*)ptr+offset, data.data(), data.size());
@@ -2894,7 +2986,7 @@ namespace gbe
     const ir::Register dst = this->getRegister(&I);
 
     ir::BTI bti;
-    gatherBTI(*AI, bti);
+    gatherBTI(&I, bti);
     vector<ir::Register> src;
     uint32_t srcNum = 0;
     while(AI != AE) {
@@ -3776,96 +3868,54 @@ handle_write_image:
   }
 
   // The idea behind is to search along the use-def chain, and find out all
-  // possible source of the pointer. Then in later codeGen, we can emit
-  // read/store instructions to these btis gathered.
-  void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) {
-    typedef map<const Value*, int>::iterator GlobalPtrIter;
-    Value *p;
-    size_t idx = 0;
-    int nBTI = 0;
-    std::vector<Value*> candidates;
-    candidates.push_back(pointer);
-    std::set<Value*> processed;
-
-    while (idx < candidates.size()) {
-      bool isPrivate = false;
-      bool needNewBTI = true;
-      p = candidates[idx];
-
-      while (dyn_cast<User>(p) && !dyn_cast<GlobalVariable>(p)) {
-
-        if (processed.find(p) == processed.end()) {
-          processed.insert(p);
-        } else {
-          // This use-def chain falls into a loop,
-          // it does not introduce a new buffer source.
-          needNewBTI = false;
-          break;
-        }
-
-        if (dyn_cast<SelectInst>(p)) {
-          SelectInst *sel = cast<SelectInst>(p);
-          p = sel->getTrueValue();
-          candidates.push_back(sel->getFalseValue());
-          continue;
-        }
-
-        if (dyn_cast<PHINode>(p)) {
-          PHINode* phi = cast<PHINode>(p);
-          int n = phi->getNumIncomingValues();
-          for (int j = 1; j < n; j++)
-            candidates.push_back(phi->getIncomingValue(j));
-          p = phi->getIncomingValue(0);
-          continue;
-        }
-
-        if (dyn_cast<AllocaInst>(p)) {
-          isPrivate = true;
-          break;
+  // possible sources of the pointer. Then in later codeGen, we can emit
+  // read/store instructions to these BTIs gathered.
+  void GenWriter::gatherBTI(Value *insn, ir::BTI &bti) {
+    PtrOrigMapIter iter = pointerOrigMap.find(insn);
+    if (iter != pointerOrigMap.end()) {
+      SmallVectorImpl<Value *> &origins = iter->second;
+      uint8_t nBTI = 0;
+      for (unsigned i = 0; i < origins.size(); i++) {
+        uint8_t new_bti = 0;
+        Value *origin = origins[i];
+        unsigned space = origin->getType()->getPointerAddressSpace();
+        switch (space) {
+          case 0:
+            new_bti = BTI_PRIVATE;
+            break;
+          case 1:
+          {
+            GlobalPtrIter iter = globalPointer.find(origin);
+            GBE_ASSERT(iter != globalPointer.end());
+            new_bti = iter->second;
+            break;
+          }
+          case 2:
+            new_bti = BTI_CONSTANT;
+            break;
+          case 3:
+            new_bti = 0xfe;
+            break;
+          default:
+            GBE_ASSERT(0 && "address space not unhandled in gatherBTI()\n");
+            break;
         }
-        p = cast<User>(p)->getOperand(0);
-      }
-
-      if (needNewBTI == false) {
-        // go to next possible pointer source
-        idx++; continue;
-      }
 
-      uint8_t new_bti = 0;
-      if (isPrivate) {
-        new_bti = BTI_PRIVATE;
-      } else {
-        if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) {
-          // structure value implementation is not complete now,
-          // they are now treated as push constant, so, the load/store
-          // here is not as meaningful.
-          bti.bti[0] = BTI_PRIVATE;
-          bti.count = 1;
-          break;
-        }
-        Type *ty = p->getType();
-        if(ty->getPointerAddressSpace() == 3) {
-          // __local memory
-          new_bti = 0xfe;
-        } else {
-          // __global memory
-          GlobalPtrIter iter = globalPointer.find(p);
-          GBE_ASSERT(iter != globalPointer.end());
-          new_bti = iter->second;
+        // avoid duplicate
+        bool bFound = false;
+        for (int j = 0; j < nBTI; j++) {
+          if (bti.bti[j] == new_bti) {
+            bFound = true; break;
+          }
         }
-      }
-      // avoid duplicate
-      bool bFound = false;
-      for (int j = 0; j < nBTI; j++) {
-        if (bti.bti[j] == new_bti) {
-          bFound = true; break;
+        if (bFound == false) {
+          bti.bti[nBTI++] = new_bti;
+          bti.count = nBTI;
         }
       }
-      if (bFound == false) {
-        bti.bti[nBTI++] = new_bti;
-        bti.count = nBTI;
-      }
-      idx++;
+    } else {
+      insn->dump();
+      GBE_ASSERT(0 && "could not find where the address comes from!");
     }
     GBE_ASSERT(bti.count <= MAX_MIXED_POINTER);
   }
@@ -3882,9 +3932,8 @@ handle_write_image:
     const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
     const ir::Register ptr = this->getRegister(llvmPtr);
     ir::BTI binding;
-    if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) {
-      gatherBTI(llvmPtr, binding);
-    }
+    gatherBTI(&I, binding);
+
     // Scalar is easy. We neednot build register tuples
     if (isScalarType(llvmType) == true) {
       const ir::Type type = getType(ctx, llvmType);
-- 
1.7.10.4



More information about the Beignet mailing list