[Beignet] [PATCH V2] backend: refine load/store merging algorithm
Yang, Rong R
rong.r.yang at intel.com
Fri Jun 23 05:28:28 UTC 2017
Pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Song, Ruiling
> Sent: Thursday, June 22, 2017 14:29
> To: Wang, Rander <rander.wang at intel.com>; beignet at freedesktop.org
> Cc: Wang, Rander <rander.wang at intel.com>
> Subject: Re: [Beignet] [PATCH V2] backend: refine load/store merging
> algorithm
>
> LGTM
>
> Ruiling
>
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of rander.wang
> > Sent: Friday, June 16, 2017 9:50 AM
> > To: beignet at freedesktop.org
> > Cc: Wang, Rander <rander.wang at intel.com>
> > Subject: [Beignet] [PATCH V2] backend: refine load/store merging
> > algorithm
> >
> > Now it works for sequence: load(0), load(1), load(2)
> > but it cant work for load(2), load(0), load(1). because
> > it compared the last merged load and the new one not all
> > the loads
> >
> > for sequence: load(0), load(1), load(2). the load(0) is the
> > start, can find that load(1) is successor without space, so
> > put it to a merge fifo. then the start is moving to the top
> > of fifo load(1), and compared with load(2). Also load(2) can
> > be merged
> >
> > for load(2), load(0), load(1). load(2) cant be merged with
> > load(0) for a space between them. So skip load(0) and mov to next
> > load(1).And this load(1) can be merged. But it never go back merge
> > load(0)
> >
> > Now change the algorithm.
> > (1) find all loads maybe merged arround the start by the distance to
> > the start. the distance is depended on data type, for 32bit data, the
> > distance is 4. Put them in a list
> >
> > (2) sort the list by the distance from the start.
> >
> > (3) search the continuous sequence including the start to merge
> >
> > V2: (1)refine the sort and compare algoritm. First find all the IO
> > in small offset compared to start. Then call std:sort
> > (2)check the number of candidate IO to be favorable to performance
> > for most cases there is no chance to merge IO
> >
> > Signed-off-by: rander.wang <rander.wang at intel.com>
> > ---
> > backend/src/llvm/llvm_loadstore_optimization.cpp | 87
> > +++++++++++++++++++++---
> > 1 file changed, 78 insertions(+), 9 deletions(-)
> >
> > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp
> > b/backend/src/llvm/llvm_loadstore_optimization.cpp
> > index 5aa38be..c91c1a0 100644
> > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> > @@ -67,7 +67,7 @@ namespace gbe {
> > bool isSimpleLoadStore(Value *I);
> > bool optimizeLoadStore(BasicBlock &BB);
> >
> > - bool isLoadStoreCompatible(Value *A, Value *B);
> > + bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int*
> elementSize,
> > int maxVecSize);
> > void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> > void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> > bool findConsecutiveAccess(BasicBlock &BB,
> > @@ -109,7 +109,7 @@ namespace gbe {
> > return NULL;
> > }
> >
> > - bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,
> > Value *B) {
> > + bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A,
> > + Value *B,
> > int *dist, int* elementSize, int maxVecSize) {
> > Value *ptrA = getPointerOperand(A);
> > Value *ptrB = getPointerOperand(B);
> > unsigned ASA = getAddressSpace(A); @@ -136,7 +136,11 @@
> namespace
> > gbe {
> > // The Instructions are connsecutive if the size of the first load/store is
> > // the same as the offset.
> > int64_t sz = TD->getTypeStoreSize(Ty);
> > - return ((-offset) == sz);
> > + *dist = -offset;
> > + *elementSize = sz;
> > +
> > + //a insn with small distance from the search load/store is a candidate
> one
> > + return (abs(-offset) < sz*maxVecSize);
> > }
> >
> > void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
> > SmallVector<Instruction*, 16> &merged) { @@ -163,6 +167,25 @@
> > namespace gbe {
> > values[i]->replaceAllUsesWith(S);
> > }
> > }
> > +
> > + class mergedInfo{
> > + public:
> > + Instruction* mInsn;
> > + int mOffset;
> > +
> > + void init(Instruction* insn, int offset)
> > + {
> > + mInsn = insn;
> > + mOffset = offset;
> > + }
> > + };
> > +
> > + struct offsetSorter {
> > + bool operator()(mergedInfo* m0, mergedInfo* m1) const {
> > + return m0->mOffset < m1->mOffset;
> > + }
> > + };
> > +
> > // When searching for consecutive memory access, we do it in a small
> window,
> > // if the window is too large, it would take up too much compiling time.
> > // An Important rule we have followed is don't try to change load/store
> order.
> > @@ -177,7 +200,6 @@ namespace gbe {
> >
> > if(!isSimpleLoadStore(&*start)) return false;
> >
> > - merged.push_back(&*start);
> > unsigned targetAddrSpace = getAddressSpace(&*start);
> >
> > BasicBlock::iterator E = BB.end(); @@ -187,11 +209,27 @@
> > namespace gbe {
> > unsigned maxLimit = maxVecSize * 8;
> > bool reordered = false;
> >
> > - for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {
> > - if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
> > - if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {
> > - merged.push_back(&*J);
> > - }
> > + bool ready = false;
> > + int elementSize;
> > +
> > + SmallVector<mergedInfo *, 16> searchInsnArray;
> > + mergedInfo meInfoArray[16];
> > + int indx = 0;
> > + meInfoArray[indx++].init(&*start, 0);
> > +
> > + searchInsnArray.push_back(&meInfoArray[0]);
> > + BasicBlock::iterator I = start;
> > + ++I;
> > +
> > + for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
> > + if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
> > + int distance;
> > + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I,
> > + &distance,
> > &elementSize, maxVecSize))
> > + {
> > + meInfoArray[indx].init(&*I, distance);
> > + searchInsnArray.push_back(&meInfoArray[indx]);
> > + indx++;
> > + }
> > } else if((isLoad && isa<StoreInst>(*J))) {
> > // simple stop to keep read/write order
> > StoreInst *st = cast<StoreInst>(&*J); @@ -214,6 +252,37 @@
> > namespace gbe {
> > if(merged.size() >= maxVecSize) break;
> > }
> >
> > + if(indx > 1)
> > + {
> > + //try to sort the load/store by the offset from the start
> > + //the farthest is at the beginning. this is easy to find the
> > + //continuous load/store
> > + std::sort(searchInsnArray.begin(), searchInsnArray.end(),
> > + offsetSorter());
> > +
> > + // try to find continuous loadstore insn in the candidate array
> > + for(unsigned i = 0; i < searchInsnArray.size(); i++)
> > + {
> > + unsigned j;
> > + for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)
> > + {
> > + if(searchInsnArray[i+j+1]->mOffset -
> > + searchInsnArray[i+j]->mOffset !=
> > elementSize)
> > + break;
> > +
> > + //this means the search load/store which offset is 0, is in the
> sequence
> > + if(searchInsnArray[i+j]->mOffset == 0 ||
> > + searchInsnArray[i+j+1]->mOffset
> > == 0)
> > + ready = true;
> > + }
> > +
> > + if(j > 0 && ready)
> > + {
> > + for(unsigned k = 0; k < j+1; k++)
> > + merged.push_back(searchInsnArray[i+k]->mInsn);
> > +
> > + break;
> > + }
> > + }
> > + }
> > +
> > return reordered;
> > }
> >
> > --
> > 2.7.4
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list