[Beignet] [PATCH] backend: refine load/store merging algorithm
rander.wang
rander.wang at intel.com
Mon Jun 5 02:16:05 UTC 2017
Now it works for sequence: load(0), load(1), load(2)
but it cant work for load(2), load(0), load(1). because
it compared the last merged load and the new one not all
the loads
for sequence: load(0), load(1), load(2). the load(0) is the
start, can find that load(1) is successor without space, so
put it to a merge fifo. then the start is moving to the top
of fifo load(1), and compared with load(2). Also load(2) can be merged
for load(2), load(0), load(1). load(2) cant be merged with
load(0) for a space between them. So skip load(0) and mov to next
load(1).And this load(1) can be merged. But it never go back merge
load(0)
Now change the algorithm.
(1) find all loads maybe merged arround the start by the distance to
the start. the distance is depended on data type, for 32bit data, the
distance is 4. Put them in a list
(2) sort the list by the distance from the start.
(3) search the continuous sequence including the start to merge
Signed-off-by: rander.wang <rander.wang at intel.com>
---
backend/src/llvm/llvm_loadstore_optimization.cpp | 90 +++++++++++++++++++++---
1 file changed, 82 insertions(+), 8 deletions(-)
diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
index 5aa38be..9076a14 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -67,7 +67,7 @@ namespace gbe {
bool isSimpleLoadStore(Value *I);
bool optimizeLoadStore(BasicBlock &BB);
- bool isLoadStoreCompatible(Value *A, Value *B);
+ bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize);
void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> &merged);
bool findConsecutiveAccess(BasicBlock &BB,
@@ -109,7 +109,7 @@ namespace gbe {
return NULL;
}
- bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B) {
+ bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B, int *dist, int* elementSize, int maxVecSize) {
Value *ptrA = getPointerOperand(A);
Value *ptrB = getPointerOperand(B);
unsigned ASA = getAddressSpace(A);
@@ -136,7 +136,11 @@ namespace gbe {
// The Instructions are connsecutive if the size of the first load/store is
// the same as the offset.
int64_t sz = TD->getTypeStoreSize(Ty);
- return ((-offset) == sz);
+ *dist = -offset;
+ *elementSize = sz;
+
+ //a insn with small distance from the search load/store is a candidate one
+ return (abs(-offset) < sz*maxVecSize);
}
void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> &merged) {
@@ -163,6 +167,19 @@ namespace gbe {
values[i]->replaceAllUsesWith(S);
}
}
+
+ class mergedInfo{
+ public:
+ Instruction* mInsn;
+ int mOffset;
+
+ void init(Instruction* insn, int offset)
+ {
+ mInsn = insn;
+ mOffset = offset;
+ }
+ };
+
// When searching for consecutive memory access, we do it in a small window,
// if the window is too large, it would take up too much compiling time.
// An Important rule we have followed is don't try to change load/store order.
@@ -177,7 +194,6 @@ namespace gbe {
if(!isSimpleLoadStore(&*start)) return false;
- merged.push_back(&*start);
unsigned targetAddrSpace = getAddressSpace(&*start);
BasicBlock::iterator E = BB.end();
@@ -187,10 +203,45 @@ namespace gbe {
unsigned maxLimit = maxVecSize * 8;
bool reordered = false;
- for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) {
- if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) {
- if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) {
- merged.push_back(&*J);
+ bool ready = false;
+ int elementSize;
+
+ SmallVector<mergedInfo *, 16> searchInsnArray;
+ mergedInfo meInfoArray[16];
+ int indx = 0;
+ meInfoArray[indx++].init(&*start, 0);
+
+ searchInsnArray.push_back(&meInfoArray[0]);
+ BasicBlock::iterator I = start;
+ ++I;
+
+ for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
+ if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
+ for(unsigned i = 0; i < searchInsnArray.size(); i++)
+ {
+ int distance;
+ if(isLoadStoreCompatible(searchInsnArray[i]->mInsn, &*I, &distance, &elementSize, maxVecSize))
+ {
+ meInfoArray[indx].init(&*I, distance + searchInsnArray[i]->mOffset);
+
+ //try to insert the load/store by the distance from the start
+ //the farthest is at the beginning. this is easy to find the
+ //continuous load/store
+ SmallVector<mergedInfo *, 16>::iterator iA = searchInsnArray.begin();
+ for(; iA != searchInsnArray.end(); ++iA)
+ {
+ if(meInfoArray[indx].mOffset > (*iA)->mOffset)
+ continue;
+
+ searchInsnArray.insert(iA, &meInfoArray[indx]);
+ break;
+ }
+ if(iA == searchInsnArray.end())
+ searchInsnArray.push_back(&meInfoArray[indx]);
+
+ indx++;
+ break;
+ }
}
} else if((isLoad && isa<StoreInst>(*J))) {
// simple stop to keep read/write order
@@ -214,6 +265,29 @@ namespace gbe {
if(merged.size() >= maxVecSize) break;
}
+ // try to find continuous loadstore insn in the candidate array
+ for(unsigned i = 0; i < searchInsnArray.size(); i++)
+ {
+ unsigned j;
+ for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++)
+ {
+ if(searchInsnArray[i+j+1]->mOffset - searchInsnArray[i+j]->mOffset != elementSize)
+ break;
+
+ //this means the search load/store which offset is 0, is in the sequence
+ if(searchInsnArray[i+j]->mOffset == 0 || searchInsnArray[i+j+1]->mOffset == 0)
+ ready = true;
+ }
+
+ if(j > 0 && ready)
+ {
+ for(unsigned k = 0; k < j+1; k++)
+ merged.push_back(searchInsnArray[i+k]->mInsn);
+
+ break;
+ }
+ }
+
return reordered;
}
--
2.7.4
More information about the Beignet
mailing list