[Beignet] [PATCH V2] backend: refine load/store merging algorithm
Song, Ruiling
ruiling.song at intel.com
Thu Jun 22 06:29:26 UTC 2017
LGTM
Ruiling
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> rander.wang
> Sent: Friday, June 16, 2017 9:50 AM
> To: beignet at freedesktop.org
> Cc: Wang, Rander <rander.wang at intel.com>
> Subject: [Beignet] [PATCH V2] backend: refine load/store merging algorithm
>
> Now it works for sequence: load(0), load(1), load(2)
> but it cant work for load(2), load(0), load(1). because
> it compared the last merged load and the new one not all
> the loads
>
> for sequence: load(0), load(1), load(2). the load(0) is the
> start, can find that load(1) is successor without space, so
> put it to a merge fifo. then the start is moving to the top
> of fifo load(1), and compared with load(2). Also load(2) can be merged
>
> for load(2), load(0), load(1). load(2) cant be merged with
> load(0) for a space between them. So skip load(0) and mov to next
> load(1).And this load(1) can be merged. But it never go back merge
> load(0)
>
> Now change the algorithm.
> (1) find all loads maybe merged arround the start by the distance to
> the start. the distance is depended on data type, for 32bit data, the
> distance is 4. Put them in a list
>
> (2) sort the list by the distance from the start.
>
> (3) search the continuous sequence including the start to merge
>
> V2: (1)refine the sort and compare algoritm. First find all the IO
> in small offset compared to start. Then call std:sort
> (2)check the number of candidate IO to be favorable to performance
> for most cases there is no chance to merge IO
>
> Signed-off-by: rander.wang <rander.wang at intel.com>
> ---
> backend/src/llvm/llvm_loadstore_optimization.cpp | 87
> +++++++++++++++++++++---
> 1 file changed, 78 insertions(+), 9 deletions(-)
>
> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp
> b/backend/src/llvm/llvm_loadstore_optimization.cpp
> index 5aa38be..c91c1a0 100644
> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> @@ -67,7 +67,7 @@ namespace gbe {
> bool isSimpleLoadStore(Value *I);
> bool optimizeLoadStore(BasicBlock &BB);
>
> - bool isLoadStoreCompatible(Value *A, Value *B);
> + bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize,
> int maxVecSize);
> void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
> void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
> bool findConsecutiveAccess(BasicBlock &BB,
> @@ -109,7 +109,7 @@ namespace gbe {
> return NULL;
> }
>
> - bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B) {
> + bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B,
> int *dist, int* elementSize, int maxVecSize) {
> Value *ptrA = getPointerOperand(A);
> Value *ptrB = getPointerOperand(B);
> unsigned ASA = getAddressSpace(A);
> @@ -136,7 +136,11 @@ namespace gbe {
> // The Instructions are connsecutive if the size of the first load/store is
> // the same as the offset.
> int64_t sz = TD->getTypeStoreSize(Ty);
> - return ((-offset) == sz);
> + *dist = -offset;
> + *elementSize = sz;
> +
> + //a insn with small distance from the search load/store is a candidate one
> + return (abs(-offset) < sz*maxVecSize);
> }
>
> void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
> SmallVector<Instruction*, 16> &merged) {
> @@ -163,6 +167,25 @@ namespace gbe {
> values[i]->replaceAllUsesWith(S);
> }
> }
> +
> + class mergedInfo{
> + public:
> + Instruction* mInsn;
> + int mOffset;
> +
> + void init(Instruction* insn, int offset)
> + {
> + mInsn = insn;
> + mOffset = offset;
> + }
> + };
> +
> + struct offsetSorter {
> + bool operator()(mergedInfo* m0, mergedInfo* m1) const {
> + return m0->mOffset < m1->mOffset;
> + }
> + };
> +
> // When searching for consecutive memory access, we do it in a small window,
> // if the window is too large, it would take up too much compiling time.
> // An Important rule we have followed is don't try to change load/store order.
> @@ -177,7 +200,6 @@ namespace gbe {
>
> if(!isSimpleLoadStore(&*start)) return false;
>
> - merged.push_back(&*start);
> unsigned targetAddrSpace = getAddressSpace(&*start);
>
> BasicBlock::iterator E = BB.end();
> @@ -187,11 +209,27 @@ namespace gbe {
> unsigned maxLimit = maxVecSize * 8;
> bool reordered = false;
>
> - for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {
> - if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
> - if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {
> - merged.push_back(&*J);
> - }
> + bool ready = false;
> + int elementSize;
> +
> + SmallVector<mergedInfo *, 16> searchInsnArray;
> + mergedInfo meInfoArray[16];
> + int indx = 0;
> + meInfoArray[indx++].init(&*start, 0);
> +
> + searchInsnArray.push_back(&meInfoArray[0]);
> + BasicBlock::iterator I = start;
> + ++I;
> +
> + for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
> + if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
> + int distance;
> + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance,
> &elementSize, maxVecSize))
> + {
> + meInfoArray[indx].init(&*I, distance);
> + searchInsnArray.push_back(&meInfoArray[indx]);
> + indx++;
> + }
> } else if((isLoad && isa<StoreInst>(*J))) {
> // simple stop to keep read/write order
> StoreInst *st = cast<StoreInst>(&*J);
> @@ -214,6 +252,37 @@ namespace gbe {
> if(merged.size() >= maxVecSize) break;
> }
>
> + if(indx > 1)
> + {
> + //try to sort the load/store by the offset from the start
> + //the farthest is at the beginning. this is easy to find the
> + //continuous load/store
> + std::sort(searchInsnArray.begin(), searchInsnArray.end(), offsetSorter());
> +
> + // try to find continuous loadstore insn in the candidate array
> + for(unsigned i = 0; i < searchInsnArray.size(); i++)
> + {
> + unsigned j;
> + for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)
> + {
> + if(searchInsnArray[i+j+1]->mOffset - searchInsnArray[i+j]->mOffset !=
> elementSize)
> + break;
> +
> + //this means the search load/store which offset is 0, is in the sequence
> + if(searchInsnArray[i+j]->mOffset == 0 || searchInsnArray[i+j+1]->mOffset
> == 0)
> + ready = true;
> + }
> +
> + if(j > 0 && ready)
> + {
> + for(unsigned k = 0; k < j+1; k++)
> + merged.push_back(searchInsnArray[i+k]->mInsn);
> +
> + break;
> + }
> + }
> + }
> +
> return reordered;
> }
>
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list