[Beignet] [PATCH V2] backend: refine load/store merging algorithm

Fri Jun 23 05:28:28 UTC 2017

Pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Song, Ruiling
> Sent: Thursday, June 22, 2017 14:29
> To: Wang, Rander <rander.wang at intel.com>; beignet at freedesktop.org
> Cc: Wang, Rander <rander.wang at intel.com>
> Subject: Re: [Beignet] [PATCH V2] backend: refine load/store merging
> algorithm
> 
> LGTM
> 
> Ruiling
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of rander.wang
> > Sent: Friday, June 16, 2017 9:50 AM
> > To: beignet at freedesktop.org
> > Cc: Wang, Rander <rander.wang at intel.com>
> > Subject: [Beignet] [PATCH V2] backend: refine load/store merging
> > algorithm
> >
> > 	Now it works for sequence: load(0), load(1), load(2)
> > 	but it cant work for load(2), load(0), load(1). because
> >         it compared the last merged load and the new one not all
> > 	the loads
> >
> > 	for  sequence: load(0), load(1), load(2). the load(0) is the
> >         start, can find that load(1) is successor without space, so
> >         put it to a merge fifo. then the start is moving to the top
> >         of fifo load(1), and compared with load(2). Also load(2) can
> > be merged
> >
> > 	for load(2), load(0), load(1). load(2) cant be merged with
> >         load(0) for a space between them. So skip load(0) and mov to next
> >         load(1).And this load(1) can be merged. But it never go back merge
> >         load(0)
> >
> >         Now change the algorithm.
> > 	(1) find all loads maybe merged arround the start by the distance to
> > 	    the start. the distance is depended on data type, for 32bit data, the
> >             distance is 4. Put them in a list
> >
> >         (2) sort the list by the distance from the start.
> >
> > 	(3) search the continuous sequence including the start to merge
> >
> > 	V2: (1)refine the sort and compare algoritm. First find all the IO
> > 	       in small offset compared to start. Then call std:sort
> >             (2)check the number of candidate IO to be favorable to performance
> > 	       for most cases there is no chance to merge IO
> >
> > Signed-off-by: rander.wang <rander.wang at intel.com>
> > ---
> >  backend/src/llvm/llvm_loadstore_optimization.cpp | 87
> > +++++++++++++++++++++---
> >  1 file changed, 78 insertions(+), 9 deletions(-)
> >
> > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp
> > b/backend/src/llvm/llvm_loadstore_optimization.cpp
> > index 5aa38be..c91c1a0 100644
> > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> > @@ -67,7 +67,7 @@ namespace gbe {
> >      bool     isSimpleLoadStore(Value *I);
> >      bool     optimizeLoadStore(BasicBlock &BB);
> >
> > -    bool     isLoadStoreCompatible(Value *A, Value *B);
> > +    bool     isLoadStoreCompatible(Value *A, Value *B, int *dist, int*
> elementSize,
> > int maxVecSize);
> >      void     mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> >      void     mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> >      bool     findConsecutiveAccess(BasicBlock &BB,
> > @@ -109,7 +109,7 @@ namespace gbe {
> >      return NULL;
> >    }
> >
> > -  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,
> > Value *B) {
> > +  bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,
> > + Value *B,
> > int *dist, int* elementSize, int maxVecSize) {
> >      Value *ptrA = getPointerOperand(A);
> >      Value *ptrB = getPointerOperand(B);
> >      unsigned ASA = getAddressSpace(A); @@ -136,7 +136,11 @@
> namespace
> > gbe {
> >      // The Instructions are connsecutive if the size of the first load/store is
> >      // the same as the offset.
> >      int64_t sz = TD->getTypeStoreSize(Ty);
> > -    return ((-offset) == sz);
> > +    *dist = -offset;
> > +    *elementSize = sz;
> > +
> > +    //a insn with small distance from the search load/store is a candidate
> one
> > +    return (abs(-offset) < sz*maxVecSize);
> >    }
> >
> >    void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
> > SmallVector<Instruction*, 16> &merged) { @@ -163,6 +167,25 @@
> > namespace gbe {
> >        values[i]->replaceAllUsesWith(S);
> >      }
> >    }
> > +
> > +  class mergedInfo{
> > +    public:
> > +    Instruction* mInsn;
> > +    int mOffset;
> > +
> > +    void init(Instruction* insn, int offset)
> > +    {
> > +      mInsn = insn;
> > +      mOffset = offset;
> > +    }
> > +  };
> > +
> > +  struct offsetSorter {
> > +    bool operator()(mergedInfo* m0, mergedInfo* m1) const {
> > +    return m0->mOffset < m1->mOffset;
> > +    }
> > +  };
> > +
> >    // When searching for consecutive memory access, we do it in a small
> window,
> >    // if the window is too large, it would take up too much compiling time.
> >    // An Important rule we have followed is don't try to change load/store
> order.
> > @@ -177,7 +200,6 @@ namespace gbe {
> >
> >      if(!isSimpleLoadStore(&*start)) return false;
> >
> > -    merged.push_back(&*start);
> >      unsigned targetAddrSpace = getAddressSpace(&*start);
> >
> >      BasicBlock::iterator E = BB.end(); @@ -187,11 +209,27 @@
> > namespace gbe {
> >      unsigned maxLimit = maxVecSize * 8;
> >      bool reordered = false;
> >
> > -    for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {
> > -      if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
> > -        if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {
> > -          merged.push_back(&*J);
> > -        }
> > +    bool ready = false;
> > +    int elementSize;
> > +
> > +    SmallVector<mergedInfo *, 16> searchInsnArray;
> > +    mergedInfo meInfoArray[16];
> > +    int indx = 0;
> > +    meInfoArray[indx++].init(&*start, 0);
> > +
> > +    searchInsnArray.push_back(&meInfoArray[0]);
> > +    BasicBlock::iterator I = start;
> > +    ++I;
> > +
> > +    for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
> > +      if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
> > +          int distance;
> > +          if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I,
> > + &distance,
> > &elementSize, maxVecSize))
> > +          {
> > +            meInfoArray[indx].init(&*I, distance);
> > +            searchInsnArray.push_back(&meInfoArray[indx]);
> > +            indx++;
> > +          }
> >        } else if((isLoad && isa<StoreInst>(*J))) {
> >          // simple stop to keep read/write order
> >          StoreInst *st = cast<StoreInst>(&*J); @@ -214,6 +252,37 @@
> > namespace gbe {
> >        if(merged.size() >= maxVecSize) break;
> >      }
> >
> > +    if(indx > 1)
> > +    {
> > +      //try to sort the load/store by the offset from the start
> > +      //the farthest is at the beginning. this is easy to find the
> > +      //continuous load/store
> > +      std::sort(searchInsnArray.begin(), searchInsnArray.end(),
> > + offsetSorter());
> > +
> > +      // try to find continuous loadstore insn in the candidate array
> > +      for(unsigned i = 0; i < searchInsnArray.size(); i++)
> > +      {
> > +        unsigned j;
> > +        for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)
> > +        {
> > +          if(searchInsnArray[i+j+1]->mOffset -
> > + searchInsnArray[i+j]->mOffset !=
> > elementSize)
> > +            break;
> > +
> > +          //this means the search load/store which offset is 0, is in the
> sequence
> > +          if(searchInsnArray[i+j]->mOffset == 0 ||
> > + searchInsnArray[i+j+1]->mOffset
> > == 0)
> > +            ready = true;
> > +        }
> > +
> > +        if(j > 0 && ready)
> > +        {
> > +          for(unsigned k = 0; k < j+1; k++)
> > +            merged.push_back(searchInsnArray[i+k]->mInsn);
> > +
> > +          break;
> > +        }
> > +      }
> > +    }
> > +
> >      return reordered;
> >    }
> >
> > --
> > 2.7.4
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet