[Beignet] [PATCH 2/2] GBE: Add a customized loop unrolling handling mechanism.
Gong, Zhigang
zhigang.gong at intel.com
Mon Sep 29 22:13:38 PDT 2014
This patchset introduces one regression at utest's compiler_time_stamp.
Not sure what's the root cause, will continue to investigate it when I back from the vacation next week.
> -----Original Message-----
> From: Gong, Zhigang
> Sent: Tuesday, September 30, 2014 12:17 PM
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [PATCH 2/2] GBE: Add a customized loop unrolling handling
> mechanism.
>
> By default, the unrolling threshold is relatively small.
> Thus some relative large loops which access private array will not be unrolled,
> thus those private array can't be scalarized latter. And the private array is
> allocated in stack which is extreme slow for Gen backend currently.
>
> To increase the unrolling threshold for all loops is not a good idea, as most of
> the loops don't need to do unrolling for this purpose and a large unrolling
> threshold will cause a big code size and unecessary big register pressure which
> may lead to register spilling.
>
> So this patch introduce a trade-off pass to identify those loops which still have
> private load/store in the outer most of the loop. Then add a metadata to it to
> indicate aggresive unrolling on those loops. Then do another round loop
> unrolling.
>
> This patch with the previous small patch, can bring significant performance
> improvement for some cases. I just tested with some opencv test cases, and
> observed it can bring 2x to 10x improvement.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/llvm/llvm_gen_backend.hpp | 4 +
> backend/src/llvm/llvm_to_gen.cpp | 12 +-
> backend/src/llvm/llvm_unroll.cpp | 229
> ++++++++++++++++++++++++++++++++++
> 4 files changed, 244 insertions(+), 2 deletions(-) create mode 100644
> backend/src/llvm/llvm_unroll.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> e57227a..e408365 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -84,6 +84,7 @@ set (GBE_SRC
> llvm/llvm_loadstore_optimization.cpp
> llvm/llvm_gen_backend.hpp
> llvm/llvm_gen_ocl_function.hxx
> + llvm/llvm_unroll.cpp
> llvm/llvm_to_gen.hpp
> backend/gen/gen_mesa_disasm.c
> backend/gen_insn_selection.cpp
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> b/backend/src/llvm/llvm_gen_backend.hpp
> index f73aafe..76b60df 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -27,6 +27,7 @@
> #define __GBE_LLVM_GEN_BACKEND_HPP__
>
> #include "llvm/Pass.h"
> +#include "llvm/Analysis/LoopPass.h"
> #include "sys/platform.hpp"
> #include "sys/map.hpp"
> #include "sys/hash_map.hpp"
> @@ -98,6 +99,9 @@ namespace gbe
> /*! Passer the printf function call. */
> llvm::FunctionPass* createPrintfParserPass();
>
> + /* customized loop unrolling pass. */ llvm::LoopPass
> + *createCustomLoopUnrollPass();
> +
> /*! Add all the function call of ocl to our bitcode. */
> llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath);
>
> diff --git a/backend/src/llvm/llvm_to_gen.cpp
> b/backend/src/llvm/llvm_to_gen.cpp
> index 26d2a49..6fbf41f 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -152,9 +152,17 @@ namespace gbe
> MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
> MPM.add(createLoopIdiomPass()); // Recognize idioms
> like memset.
> MPM.add(createLoopDeletionPass()); // Delete dead loops
> - MPM.add(createLoopUnrollPass()); // Unroll small loops
> - if(optLevel > 0)
> + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll
> loops
> + if(optLevel > 0) {
> + MPM.add(createSROAPass(/*RequiresDomTree*/ false));
> + MPM.add(createGVNPass()); // Remove
> redundancies
> + }
> + MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512));
> //Unroll loops
> + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll
> loops
> + if(optLevel > 0) {
> + MPM.add(createSROAPass(/*RequiresDomTree*/ false));
> MPM.add(createGVNPass()); // Remove
> redundancies
> + }
> MPM.add(createMemCpyOptPass()); // Remove memcpy
> / form memset
> MPM.add(createSCCPPass()); // Constant prop
> with SCCP
>
> diff --git a/backend/src/llvm/llvm_unroll.cpp
> b/backend/src/llvm/llvm_unroll.cpp
> new file mode 100644
> index 0000000..b71c0ab
> --- /dev/null
> +++ b/backend/src/llvm/llvm_unroll.cpp
> @@ -0,0 +1,229 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <set>
> +#include "llvm/Config/llvm-config.h"
> +#if LLVM_VERSION_MINOR <= 2
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#else
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#endif /* LLVM_VERSION_MINOR <= 2 */
> +#include "llvm/Pass.h"
> +#if LLVM_VERSION_MINOR <= 1
> +#include "llvm/Support/IRBuilder.h"
> +#elif LLVM_VERSION_MINOR == 2
> +#include "llvm/IRBuilder.h"
> +#else
> +#include "llvm/IR/IRBuilder.h"
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/PassManager.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Analysis/ScalarEvolution.h"
> +#include "llvm/Analysis/LoopPass.h"
> +#include "llvm/Analysis/TargetTransformInfo.h"
> +#include "llvm/IR/Dominators.h"
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "sys/map.hpp"
> +
> +
> +using namespace llvm;
> +
> +namespace gbe {
> + class CustomLoopUnroll : public LoopPass
> + {
> + public:
> + static char ID;
> + CustomLoopUnroll() :
> + LoopPass(ID) {}
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const {
> + AU.addRequired<LoopInfo>();
> + AU.addPreserved<LoopInfo>();
> + AU.addRequiredID(LoopSimplifyID);
> + AU.addPreservedID(LoopSimplifyID);
> + AU.addRequiredID(LCSSAID);
> + AU.addPreservedID(LCSSAID);
> + AU.addRequired<ScalarEvolution>();
> + AU.addPreserved<ScalarEvolution>();
> + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
> + // If loop unroll does not preserve dom info then LCSSA pass on next
> + // loop will receive invalid dom info.
> + // For now, recreate dom info, if loop is unrolled.
> + AU.addPreserved<DominatorTreeWrapperPass>();
> +
> + }
> +
> + // Returns the value associated with the given metadata node name
> (for
> + // example, "llvm.loop.unroll.count"). If no such named metadata
> node
> + // exists, then nullptr is returned.
> + static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
> + StringRef
> Name) {
> + MDNode *LoopID = L->getLoopID();
> + if (!LoopID) return nullptr;
> +
> + // First operand should refer to the loop id itself.
> + assert(LoopID->getNumOperands() > 0 && "requires at least one
> operand");
> + assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
> +
> + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
> + const MDNode *MD =
> dyn_cast<MDNode>(LoopID->getOperand(i));
> + if (!MD) continue;
> +
> + const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
> + if (!S) continue;
> +
> + if (Name.equals(S->getString())) {
> + assert(MD->getNumOperands() == 2 &&
> + "Unroll hint metadata should have two operands.");
> + return cast<ConstantInt>(MD->getOperand(1));
> + }
> + }
> + return nullptr;
> + }
> +
> + void setUnrollID(Loop *L, bool enable) {
> + if (!enable && disabledLoops.find(L) != disabledLoops.end())
> + return;
> + LLVMContext &Context = L->getHeader()->getContext();
> + SmallVector<Value *, 2> forceUnroll;
> + forceUnroll.push_back(MDString::get(Context,
> "llvm.loop.unroll.enable"));
> + forceUnroll.push_back(ConstantInt::get(Type::getInt1Ty(Context),
> enable));
> + MDNode *forceUnrollNode = MDNode::get(Context, forceUnroll);
> + SmallVector<Value *, 4> Vals;
> + Vals.push_back(NULL);
> + Vals.push_back(forceUnrollNode);
> + MDNode *NewLoopID = MDNode::get(Context, Vals);
> + // Set operand 0 to refer to the loop id itself.
> + NewLoopID->replaceOperandWith(0, NewLoopID);
> + L->setLoopID(NewLoopID);
> + if (!enable)
> + disabledLoops.insert(L);
> + }
> +
> + static bool hasPrivateLoadStore(Loop *L) {
> + const std::vector<Loop*> subLoops = L->getSubLoops();
> + std::set<BasicBlock*> subBlocks, blocks;
> +
> + for(auto l : subLoops)
> + for(auto bb : l->getBlocks())
> + subBlocks.insert(bb);
> + for(auto bb : L->getBlocks())
> + if (subBlocks.find(bb) == subBlocks.end())
> + blocks.insert(bb);
> + for(auto bb : blocks) {
> + for (BasicBlock::iterator inst = bb->begin(), instE = bb->end();
> inst != instE; ++inst) {
> + unsigned addrSpace = 0;
> + if (isa<LoadInst>(*inst)) {
> + LoadInst *ld = cast<LoadInst>(&*inst);
> + addrSpace = ld->getPointerAddressSpace();
> + }
> + else if (isa<StoreInst>(*inst)) {
> + StoreInst *st = cast<StoreInst>(&*inst);
> + addrSpace = st->getPointerAddressSpace();
> + }
> + if (addrSpace == 0)
> + return true;
> + }
> + }
> + return false;
> + }
> + // If one loop has very large self trip count
> + // we don't want to unroll it.
> + // self trip count means trip count divide by the parent's trip count. for
> example
> + // for (int i = 0; i < 16; i++) {
> + // for (int j = 0; j < 4; j++) {
> + // for (int k = 0; k < 2; k++) {
> + // ...
> + // }
> + // ...
> + // }
> + // The inner loops j and k could be unrolled, but the loop i will not be
> unrolled.
> + // The return value true means the L could be unrolled, otherwise, it
> could not
> + // be unrolled.
> + bool handleParentLoops(Loop *L, LPPassManager &LPM) {
> + Loop *currL = L;
> + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
> + BasicBlock *latchBlock = currL->getLoopLatch();
> + unsigned currTripCount = 0;
> + bool shouldUnroll = true;
> + if (latchBlock)
> + currTripCount = SE->getSmallConstantTripCount(L, latchBlock);
> +
> + while(currL) {
> + Loop *parentL = currL->getParentLoop();
> + unsigned parentTripCount = 0;
> + if (parentL) {
> + BasicBlock *parentLatchBlock = parentL->getLoopLatch();
> + if (parentLatchBlock)
> + parentTripCount = SE->getSmallConstantTripCount(parentL,
> parentLatchBlock);
> + }
> + if (parentTripCount == 0 || currTripCount == 0 || currTripCount /
> parentTripCount > 16) {
> + if (currL == L)
> + shouldUnroll = false;
> + setUnrollID(currL, false);
> + if (currL != L)
> + LPM.deleteLoopFromQueue(currL);
> + }
> + currL = parentL;
> + currTripCount = parentTripCount;
> + }
> + return shouldUnroll;
> + }
> +
> + // Analyze the outermost BBs of this loop, if there are
> + // some private load or store, we change it's loop meta data
> + // to indicate more aggresive unrolling on it.
> + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
> + const ConstantInt *Enable = GetUnrollMetadataValue(L,
> "llvm.loop.unroll.enable");
> + if (Enable)
> + return false;
> + const ConstantInt *Count = GetUnrollMetadataValue(L,
> "llvm.loop.unroll.count");
> + if (Count)
> + return false;
> +
> + if (!handleParentLoops(L, LPM))
> + return false;
> +
> + if (!hasPrivateLoadStore(L))
> + return false;
> + setUnrollID(L, true);
> + return true;
> + }
> +
> + virtual const char *getPassName() const {
> + return "SPIR backend: custom loop unrolling pass";
> + }
> + private:
> + std::set<Loop *> disabledLoops;
> +
> + };
> +
> + char CustomLoopUnroll::ID = 0;
> +
> + LoopPass *createCustomLoopUnrollPass() {
> + return new CustomLoopUnroll();
> + }
> +} // end namespace
> --
> 1.8.3.2
More information about the Beignet
mailing list