[Beignet] [Patch v2] GBE: Add a customized loop unrolling handling mechanism.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Oct 15 00:16:15 PDT 2014
Ping for review including the patch 1/2 in the first version
patch set and this v2 patch.
And please be noted, this patch requires the following patch
[PATCH] GBE: workaround an unsupported wide integer case.
to pass all the unit tests.
Thanks.
On Wed, Oct 08, 2014 at 12:58:59PM +0800, Zhigang Gong wrote:
> By default, the unrolling threshold is relatively small.
> Thus some relative large loops which access private array
> will not be unrolled, thus those private array can't
> be scalarized latter. And the private array is allocated
> in stack which is extreme slow for Gen backend currently.
>
> To increase the unrolling threshold for all loops is not
> a good idea, as most of the loops don't need to do unrolling
> for this purpose and a large unrolling threshold will cause
> a big code size and unecessary big register pressure which
> may lead to register spilling.
>
> So this patch introduce a trade-off pass to identify those
> loops which still have private load/store in the outer most
> of the loop. Then add a metadata to it to indicate aggresive
> unrolling on those loops. Then do another round loop unrolling.
>
> This patch with the previous small patch, can bring significant
> performance improvement for some cases. I just tested with some
> opencv test cases, and observed it can bring 2x to 10x improvement.
>
> v2:
> refine the parent loop unroll analysis method.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/llvm/llvm_gen_backend.hpp | 4 +
> backend/src/llvm/llvm_to_gen.cpp | 12 +-
> backend/src/llvm/llvm_unroll.cpp | 230 ++++++++++++++++++++++++++++++++++
> 4 files changed, 245 insertions(+), 2 deletions(-)
> create mode 100644 backend/src/llvm/llvm_unroll.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index e57227a..e408365 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -84,6 +84,7 @@ set (GBE_SRC
> llvm/llvm_loadstore_optimization.cpp
> llvm/llvm_gen_backend.hpp
> llvm/llvm_gen_ocl_function.hxx
> + llvm/llvm_unroll.cpp
> llvm/llvm_to_gen.hpp
> backend/gen/gen_mesa_disasm.c
> backend/gen_insn_selection.cpp
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index f73aafe..76b60df 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -27,6 +27,7 @@
> #define __GBE_LLVM_GEN_BACKEND_HPP__
>
> #include "llvm/Pass.h"
> +#include "llvm/Analysis/LoopPass.h"
> #include "sys/platform.hpp"
> #include "sys/map.hpp"
> #include "sys/hash_map.hpp"
> @@ -98,6 +99,9 @@ namespace gbe
> /*! Passer the printf function call. */
> llvm::FunctionPass* createPrintfParserPass();
>
> + /* customized loop unrolling pass. */
> + llvm::LoopPass *createCustomLoopUnrollPass();
> +
> /*! Add all the function call of ocl to our bitcode. */
> llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath);
>
> diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
> index 26d2a49..6fbf41f 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -152,9 +152,17 @@ namespace gbe
> MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
> MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
> MPM.add(createLoopDeletionPass()); // Delete dead loops
> - MPM.add(createLoopUnrollPass()); // Unroll small loops
> - if(optLevel > 0)
> + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
> + if(optLevel > 0) {
> + MPM.add(createSROAPass(/*RequiresDomTree*/ false));
> + MPM.add(createGVNPass()); // Remove redundancies
> + }
> + MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
> + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
> + if(optLevel > 0) {
> + MPM.add(createSROAPass(/*RequiresDomTree*/ false));
> MPM.add(createGVNPass()); // Remove redundancies
> + }
> MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
> MPM.add(createSCCPPass()); // Constant prop with SCCP
>
> diff --git a/backend/src/llvm/llvm_unroll.cpp b/backend/src/llvm/llvm_unroll.cpp
> new file mode 100644
> index 0000000..a27324b
> --- /dev/null
> +++ b/backend/src/llvm/llvm_unroll.cpp
> @@ -0,0 +1,230 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <set>
> +#include "llvm/Config/llvm-config.h"
> +#if LLVM_VERSION_MINOR <= 2
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#else
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#endif /* LLVM_VERSION_MINOR <= 2 */
> +#include "llvm/Pass.h"
> +#if LLVM_VERSION_MINOR <= 1
> +#include "llvm/Support/IRBuilder.h"
> +#elif LLVM_VERSION_MINOR == 2
> +#include "llvm/IRBuilder.h"
> +#else
> +#include "llvm/IR/IRBuilder.h"
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/PassManager.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Analysis/ScalarEvolution.h"
> +#include "llvm/Analysis/LoopPass.h"
> +#include "llvm/Analysis/TargetTransformInfo.h"
> +#include "llvm/IR/Dominators.h"
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "sys/map.hpp"
> +
> +
> +using namespace llvm;
> +
> +namespace gbe {
> + class CustomLoopUnroll : public LoopPass
> + {
> + public:
> + static char ID;
> + CustomLoopUnroll() :
> + LoopPass(ID) {}
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const {
> + AU.addRequired<LoopInfo>();
> + AU.addPreserved<LoopInfo>();
> + AU.addRequiredID(LoopSimplifyID);
> + AU.addPreservedID(LoopSimplifyID);
> + AU.addRequiredID(LCSSAID);
> + AU.addPreservedID(LCSSAID);
> + AU.addRequired<ScalarEvolution>();
> + AU.addPreserved<ScalarEvolution>();
> + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
> + // If loop unroll does not preserve dom info then LCSSA pass on next
> + // loop will receive invalid dom info.
> + // For now, recreate dom info, if loop is unrolled.
> + AU.addPreserved<DominatorTreeWrapperPass>();
> +
> + }
> +
> + // Returns the value associated with the given metadata node name (for
> + // example, "llvm.loop.unroll.count"). If no such named metadata node
> + // exists, then nullptr is returned.
> + static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
> + StringRef Name) {
> + MDNode *LoopID = L->getLoopID();
> + if (!LoopID) return nullptr;
> +
> + // First operand should refer to the loop id itself.
> + assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
> + assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
> +
> + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
> + const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
> + if (!MD) continue;
> +
> + const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
> + if (!S) continue;
> +
> + if (Name.equals(S->getString())) {
> + assert(MD->getNumOperands() == 2 &&
> + "Unroll hint metadata should have two operands.");
> + return cast<ConstantInt>(MD->getOperand(1));
> + }
> + }
> + return nullptr;
> + }
> +
> + void setUnrollID(Loop *L, bool enable) {
> + if (!enable && disabledLoops.find(L) != disabledLoops.end())
> + return;
> + LLVMContext &Context = L->getHeader()->getContext();
> + SmallVector<Value *, 2> forceUnroll;
> + forceUnroll.push_back(MDString::get(Context, "llvm.loop.unroll.enable"));
> + forceUnroll.push_back(ConstantInt::get(Type::getInt1Ty(Context), enable));
> + MDNode *forceUnrollNode = MDNode::get(Context, forceUnroll);
> + SmallVector<Value *, 4> Vals;
> + Vals.push_back(NULL);
> + Vals.push_back(forceUnrollNode);
> + MDNode *NewLoopID = MDNode::get(Context, Vals);
> + // Set operand 0 to refer to the loop id itself.
> + NewLoopID->replaceOperandWith(0, NewLoopID);
> + L->setLoopID(NewLoopID);
> + if (!enable)
> + disabledLoops.insert(L);
> + }
> +
> + static bool hasPrivateLoadStore(Loop *L) {
> + const std::vector<Loop*> subLoops = L->getSubLoops();
> + std::set<BasicBlock*> subBlocks, blocks;
> +
> + for(auto l : subLoops)
> + for(auto bb : l->getBlocks())
> + subBlocks.insert(bb);
> + for(auto bb : L->getBlocks())
> + if (subBlocks.find(bb) == subBlocks.end())
> + blocks.insert(bb);
> + for(auto bb : blocks) {
> + for (BasicBlock::iterator inst = bb->begin(), instE = bb->end(); inst != instE; ++inst) {
> + unsigned addrSpace = 0;
> + if (isa<LoadInst>(*inst)) {
> + LoadInst *ld = cast<LoadInst>(&*inst);
> + addrSpace = ld->getPointerAddressSpace();
> + }
> + else if (isa<StoreInst>(*inst)) {
> + StoreInst *st = cast<StoreInst>(&*inst);
> + addrSpace = st->getPointerAddressSpace();
> + }
> + if (addrSpace == 0)
> + return true;
> + }
> + }
> + return false;
> + }
> + // If one loop has very large self trip count
> + // we don't want to unroll it.
> + // self trip count means trip count divide by the parent's trip count. for example
> + // for (int i = 0; i < 16; i++) {
> + // for (int j = 0; j < 4; j++) {
> + // for (int k = 0; k < 2; k++) {
> + // ...
> + // }
> + // ...
> + // }
> + // The inner loops j and k could be unrolled, but the loop i will not be unrolled.
> + // The return value true means the L could be unrolled, otherwise, it could not
> + // be unrolled.
> + bool handleParentLoops(Loop *L, LPPassManager &LPM) {
> + Loop *currL = L;
> + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
> + BasicBlock *latchBlock = currL->getLoopLatch();
> + unsigned currTripCount = 0;
> + bool shouldUnroll = true;
> + if (latchBlock)
> + currTripCount = SE->getSmallConstantTripCount(L, latchBlock);
> +
> + while(currL) {
> + Loop *parentL = currL->getParentLoop();
> + unsigned parentTripCount = 0;
> + if (parentL) {
> + BasicBlock *parentLatchBlock = parentL->getLoopLatch();
> + if (parentLatchBlock)
> + parentTripCount = SE->getSmallConstantTripCount(parentL, parentLatchBlock);
> + }
> + if ((parentTripCount != 0 && currTripCount / parentTripCount > 16) ||
> + (currTripCount > 32)) {
> + if (currL == L)
> + shouldUnroll = false;
> + setUnrollID(currL, false);
> + if (currL != L)
> + LPM.deleteLoopFromQueue(currL);
> + }
> + currL = parentL;
> + currTripCount = parentTripCount;
> + }
> + return shouldUnroll;
> + }
> +
> + // Analyze the outermost BBs of this loop, if there are
> + // some private load or store, we change it's loop meta data
> + // to indicate more aggresive unrolling on it.
> + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
> + const ConstantInt *Enable = GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
> + if (Enable)
> + return false;
> + const ConstantInt *Count = GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
> + if (Count)
> + return false;
> +
> + if (!handleParentLoops(L, LPM))
> + return false;
> +
> + if (!hasPrivateLoadStore(L))
> + return false;
> + setUnrollID(L, true);
> + return true;
> + }
> +
> + virtual const char *getPassName() const {
> + return "SPIR backend: custom loop unrolling pass";
> + }
> + private:
> + std::set<Loop *> disabledLoops;
> +
> + };
> +
> + char CustomLoopUnroll::ID = 0;
> +
> + LoopPass *createCustomLoopUnrollPass() {
> + return new CustomLoopUnroll();
> + }
> +} // end namespace
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list