[Beignet] [PATCH] Reduce the compilation time of inline pass in runModulePass().
yan.wang at linux.intel.com
yan.wang at linux.intel.com
Tue Oct 25 07:07:49 UTC 2016
From: Yan Wang <yan.wang at linux.intel.com>
It could reduce much compilation time when run Luxmark scenes.
Avoid calling inline pass many times in runModulePass when module is
changed by the other pass.
Create a single funtion to run inline pass.
In this single funtion, lower pass and strict math related pass are
also added for keep enough precision.
---
backend/src/llvm/llvm_to_gen.cpp | 53 +++++++++++++++++++++++++++++++++++++---
1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index e108810..0d51ee3 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -106,6 +106,55 @@ namespace gbe
FPM.doFinalization();
}
+ void runInlinePass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath)
+ {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+ legacy::PassManager MPM;
+#else
+ PassManager MPM;
+#endif
+
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ MPM.add(new DataLayoutPass());
+#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 5
+ MPM.add(new DataLayoutPass(DL));
+#else
+ MPM.add(new DataLayout(DL));
+#endif
+
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+ MPM.add(new TargetLibraryInfoWrapperPass(*libraryInfo));
+#else
+ MPM.add(new TargetLibraryInfo(*libraryInfo));
+#endif
+ MPM.add(createIntrinsicLoweringPass());
+ MPM.add(createBarrierNodupPass(false)); // remove noduplicate fnAttr before inlining.
+ MPM.add(createFunctionInliningPass(20000));
+ //MPM.add(createAlwaysInlinerPass());
+ MPM.add(createBarrierNodupPass(true)); // restore noduplicate fnAttr after inlining.
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+ // FIXME Workaround: we find that CustomLoopUnroll may increase register pressure greatly,
+ // and it may even make som cl kernel cannot compile because of limited scratch memory for spill.
+ // As we observe this under strict math. So we disable CustomLoopUnroll if strict math is enabled.
+ if (!strictMath) {
+#if !defined(__ANDROID__)
+ MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
+#endif
+ MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
+ if(optLevel > 0) {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
+ MPM.add(createSROAPass());
+#else
+ MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+#endif
+ MPM.add(createGVNPass()); // Remove redundancies
+ }
+ }
+#endif
+ MPM.run(mod);
+ }
+
void runModulePass(Module &mod, TARGETLIBRARY *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath)
{
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
@@ -146,9 +195,6 @@ namespace gbe
MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
MPM.add(createPruneEHPass()); // Remove dead EH info
- MPM.add(createBarrierNodupPass(false)); // remove noduplicate fnAttr before inlining.
- MPM.add(createFunctionInliningPass(20000));
- MPM.add(createBarrierNodupPass(true)); // restore noduplicate fnAttr after inlining.
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
MPM.add(createPostOrderFunctionAttrsLegacyPass());
#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
@@ -344,6 +390,7 @@ namespace gbe
OUTPUT_BITCODE(AFTER_LINK, mod);
runFuntionPass(mod, libraryInfo, DL);
+ runInlinePass(mod, libraryInfo, DL, optLevel, strictMath);
runModulePass(mod, libraryInfo, DL, optLevel, strictMath);
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
legacy::PassManager passes;
--
2.7.4
More information about the Beignet
mailing list