[Beignet] [PATCH V5] backend: refine load store optimization
Yang, Rong R
rong.r.yang at intel.com
Tue Jul 18 03:17:47 UTC 2017
LGTM, pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Thursday, July 13, 2017 10:45
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling <ruiling.song at intel.com>; Wang, Rander
> <rander.wang at intel.com>
> Subject: [Beignet] [PATCH V5] backend: refine load store optimization
>
> this fix basic test in conformance tests failed for vec8 of char because of
> overflow. And it fix many test items failed in opencv because of offset error
>
> (1)modify the size of searchInsnArray to 32, it is the max size for char
> And add check for overflow if too many insn (2)Make sure the start insn is
> the first insn of searched array
> because if it is not the first, the offset maybe invalid. And
> it is complex to modify offset without error
>
> V2: refine search index, using J not I
> V3: remove (2), now add offset to the pointer of start
> pass OpenCV, conformance basic and compiler tests, utests
> V4: check pointer type, if 64bit, modify it by 64, or 32
> V5: refine findSafeInstruction() and variable naming in
> findConsecutiveAccess().
>
> Signed-off-by: rander.wang <rander.wang at intel.com>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/llvm/llvm_loadstore_optimization.cpp | 125
> ++++++++++++++++-------
> 1 file changed, 88 insertions(+), 37 deletions(-)
>
> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp
> b/backend/src/llvm/llvm_loadstore_optimization.cpp
> index c91c1a0..bb8dc5f 100644
> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> @@ -68,13 +68,14 @@ namespace gbe {
> bool optimizeLoadStore(BasicBlock &BB);
>
> bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int*
> elementSize, int maxVecSize);
> - void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> - void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged);
> + void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged, Instruction *start,int offset);
> + void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16>
> &merged, Instruction *start,int offset);
> bool findConsecutiveAccess(BasicBlock &BB,
> SmallVector<Instruction*, 16> &merged,
> const BasicBlock::iterator &start,
> unsigned maxVecSize,
> - bool isLoad);
> + bool isLoad,
> + int *addrOffset);
> #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> virtual StringRef getPassName() const #else @@ -143,7 +144,10 @@
> namespace gbe {
> return (abs(-offset) < sz*maxVecSize);
> }
>
> - void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
> SmallVector<Instruction*, 16> &merged) {
> + void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB,
> + SmallVector<Instruction*, 16> &merged,
> + Instruction *start,
> + int offset) {
> IRBuilder<> Builder(&BB);
>
> unsigned size = merged.size();
> @@ -151,14 +155,27 @@ namespace gbe {
> for(unsigned i = 0; i < size; i++) {
> values.push_back(merged[i]);
> }
> - LoadInst *ld = cast<LoadInst>(merged[0]);
> + LoadInst *ld = cast<LoadInst>(start);
> unsigned align = ld->getAlignment();
> unsigned addrSpace = ld->getPointerAddressSpace();
> // insert before first load
> Builder.SetInsertPoint(ld);
> +
> + //modify offset
> + Value *newPtr = ld->getPointerOperand();
> + if(offset != 0)
> + {
> + Type *ptype = ld->getPointerOperand()->getType();
> + unsigned typeSize = TD->getPointerTypeSize(ptype);
> + ptype = (typeSize == 4) ? Builder.getInt32Ty():Builder.getInt64Ty();
> + Value *StartAddr = Builder.CreatePtrToInt(ld->getPointerOperand(),
> ptype);
> + Value *offsetVal = ConstantInt::get(ptype, offset);
> + Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal);
> + newPtr = Builder.CreateIntToPtr(newAddr, ld->getPointerOperand()-
> >getType());
> + }
> +
> VectorType *vecTy = VectorType::get(ld->getType(), size);
> - Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
> - PointerType::get(vecTy, addrSpace));
> + Value *vecPtr = Builder.CreateBitCast(newPtr,
> + PointerType::get(vecTy, addrSpace));
> LoadInst *vecValue = Builder.CreateLoad(vecPtr);
> vecValue->setAlignment(align);
>
> @@ -196,8 +213,8 @@ namespace gbe {
> SmallVector<Instruction*, 16> &merged,
> const BasicBlock::iterator &start,
> unsigned maxVecSize,
> - bool isLoad) {
> -
> + bool isLoad,
> + int *addrOffset) {
> if(!isSimpleLoadStore(&*start)) return false;
>
> unsigned targetAddrSpace = getAddressSpace(&*start); @@ -207,35
> +224,40 @@ namespace gbe {
> ++J;
>
> unsigned maxLimit = maxVecSize * 8;
> - bool reordered = false;
> -
> + bool crossAddressSpace = false;
> + // When we are merging loads and there are some other AddressSpace
> stores
> + // lies among them, we are saying that loadStoreReorder happens. The
> same
> + // for merging stores and there are some other AddressSpace load among
> them.
> + bool loadStoreReorder = false;
> bool ready = false;
> int elementSize;
>
> - SmallVector<mergedInfo *, 16> searchInsnArray;
> - mergedInfo meInfoArray[16];
> + SmallVector<mergedInfo *, 32> searchInsnArray;
> + mergedInfo meInfoArray[32];
> int indx = 0;
> meInfoArray[indx++].init(&*start, 0);
> -
> searchInsnArray.push_back(&meInfoArray[0]);
> - BasicBlock::iterator I = start;
> - ++I;
>
> - for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) {
> - if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) {
> + for(unsigned ss = 0; J!= E && ss <= maxLimit; ++ss, ++J) {
> + if((isLoad && isa<LoadInst>(*J)) || (!isLoad &&
> + isa<StoreInst>(*J))) {
> int distance;
> - if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance,
> &elementSize, maxVecSize))
> + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*J,
> + &distance, &elementSize, maxVecSize))
> {
> - meInfoArray[indx].init(&*I, distance);
> + meInfoArray[indx].init(&*J, distance);
> searchInsnArray.push_back(&meInfoArray[indx]);
> indx++;
> + if (crossAddressSpace)
> + loadStoreReorder = true;
> +
> + if(indx >= 32)
> + break;
> }
> } else if((isLoad && isa<StoreInst>(*J))) {
> // simple stop to keep read/write order
> StoreInst *st = cast<StoreInst>(&*J);
> unsigned addrSpace = st->getPointerAddressSpace();
> if (addrSpace != targetAddrSpace) {
> - reordered = true;
> + crossAddressSpace = true;
> } else {
> break;
> }
> @@ -243,15 +265,14 @@ namespace gbe {
> LoadInst *ld = cast<LoadInst>(&*J);
> unsigned addrSpace = ld->getPointerAddressSpace();
> if (addrSpace != targetAddrSpace) {
> - reordered = true;
> + crossAddressSpace = true;
> } else {
> break;
> }
> }
> -
> - if(merged.size() >= maxVecSize) break;
> }
>
> +
> if(indx > 1)
> {
> //try to sort the load/store by the offset from the start @@ -275,18
> +296,29 @@ namespace gbe {
>
> if(j > 0 && ready)
> {
> - for(unsigned k = 0; k < j+1; k++)
> + unsigned endIndx = j + 1;
> + *addrOffset = searchInsnArray[i]->mOffset;
> + endIndx = (endIndx >= 16) ? 16 : (endIndx >= 8 ? 8 : (endIndx
> + >= 4 ? 4 : endIndx));
> +
> + for(unsigned k = 0; k < endIndx; k++)
> + {
> merged.push_back(searchInsnArray[i+k]->mInsn);
> + if (k >= maxVecSize)
> + break;
> + }
>
> break;
> }
> }
> }
>
> - return reordered;
> + return loadStoreReorder;
> }
>
> - void GenLoadStoreOptimization::mergeStore(BasicBlock &BB,
> SmallVector<Instruction*, 16> &merged) {
> + void GenLoadStoreOptimization::mergeStore(BasicBlock &BB,
> + SmallVector<Instruction*, 16> &merged,
> + Instruction *start,
> + int offset) {
> IRBuilder<> Builder(&BB);
>
> unsigned size = merged.size();
> @@ -294,7 +326,7 @@ namespace gbe {
> for(unsigned i = 0; i < size; i++) {
> values.push_back(cast<StoreInst>(merged[i])->getValueOperand());
> }
> - StoreInst *st = cast<StoreInst>(merged[0]);
> + StoreInst *st = cast<StoreInst>(start);
> if(!st)
> return;
>
> @@ -314,12 +346,26 @@ namespace gbe {
> Value * stPointer = st->getPointerOperand();
> if(!stPointer)
> return;
> - Value *newPtr = Builder.CreateBitCast(stPointer, PointerType::get(vecTy,
> addrSpace));
> +
> + //modify offset
> + Value *newSPtr = stPointer;
> + if(offset != 0)
> + {
> + unsigned typeSize = TD->getPointerTypeSize(stPointer->getType());
> + Type *ptype = (typeSize == 4) ? Builder.getInt32Ty() :
> Builder.getInt64Ty();
> + Value *StartAddr = Builder.CreatePtrToInt(stPointer, ptype);
> + Value *offsetVal = ConstantInt::get(ptype, offset);
> + Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal);
> + newSPtr = Builder.CreateIntToPtr(newAddr, stPointer->getType());
> + }
> +
> + Value *newPtr = Builder.CreateBitCast(newSPtr,
> + PointerType::get(vecTy, addrSpace));
> StoreInst *newST = Builder.CreateStore(parent, newPtr);
> newST->setAlignment(align);
> }
>
> - // Find the safe iterator we can point to. If reorder happens, we need to
> + // Find the safe iterator (will not be deleted after the merge) we
> + can // point to. If reorder happens, we need to
> // point to the instruction after the first of toBeDeleted. If no reorder,
> // we are safe to point to the instruction after the last of toBeDeleted
> static BasicBlock::iterator
> @@ -329,12 +375,15 @@ namespace gbe {
> BasicBlock::iterator safe = current;
> unsigned size = toBeDeleted.size();
> if (reorder) {
> - unsigned i = 0;
> - while (i < size && toBeDeleted[i] == &*safe) {
> - ++i;
> - ++safe;
> + BasicBlock *BB = &*current->getParent();
> + for (; safe != BB->end(); ++safe) {
> + if (std::find(toBeDeleted.begin(), toBeDeleted.end(), &*safe) ==
> + toBeDeleted.end())
> + break;
> }
> } else {
> + // TODO we should use the furthest instruction, so that the outer loop
> + // ends quicker.
> safe = BasicBlock::iterator(toBeDeleted[size - 1]);
> ++safe;
> }
> @@ -355,12 +404,14 @@ namespace gbe {
> ((ty->isIntegerTy(8) || ty->isIntegerTy(16)) && isLoad)))
> continue;
>
> + int addrOffset = 0;
> unsigned maxVecSize = (ty->isFloatTy() || ty->isIntegerTy(32)) ? 4 :
> (ty->isIntegerTy(16) ? 8 : 16);
> - bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize,
> isLoad);
> + bool reorder = findConsecutiveAccess(BB, merged, BBI,
> + maxVecSize, isLoad, &addrOffset);
> uint32_t size = merged.size();
> uint32_t pos = 0;
> bool doDeleting = size > 1;
> + BasicBlock::iterator startLS = BBI;
> if (doDeleting) {
> // choose next undeleted instruction
> BBI = findSafeInstruction(merged, BBI, reorder); @@ -372,9 +423,9 @@
> namespace gbe {
> (size >= 4 ? 4 : size));
> SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos,
> merged.begin() + pos + vecSize);
> if(isLoad)
> - mergeLoad(BB, mergedVec);
> + mergeLoad(BB, mergedVec, &*startLS, addrOffset);
> else
> - mergeStore(BB, mergedVec);
> + mergeStore(BB, mergedVec, &*startLS, addrOffset);
> // remove merged insn
> for(uint32_t i = 0; i < mergedVec.size(); i++)
> mergedVec[i]->eraseFromParent();
> --
> 2.4.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list