[Beignet] [PATCH] GBE: workaround register allocation fail caused by custom loop unroll.
Ruiling Song
ruiling.song at intel.com
Sun Nov 2 23:52:25 PST 2014
As this issue only occurs under strict math, we disable custom
loop unroll if strict math is enabled.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/llvm/llvm_to_gen.cpp | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 0c7785b..33aec59 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -107,7 +107,7 @@ namespace gbe
FPM.doFinalization();
}
- void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel)
+ void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath)
{
llvm::PassManager MPM;
@@ -158,11 +158,16 @@ namespace gbe
MPM.add(createGVNPass()); // Remove redundancies
}
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
- MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
- MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
- if(optLevel > 0) {
- MPM.add(createSROAPass(/*RequiresDomTree*/ false));
- MPM.add(createGVNPass()); // Remove redundancies
+ // FIXME Workaround: we find that CustomLoopUnroll may increase register pressure greatly,
+ // and it may even make som cl kernel cannot compile because of limited scratch memory for spill.
+ // As we observe this under strict math. So we disable CustomLoopUnroll if strict math is enabled.
+ if (!strictMath) {
+ MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
+ MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops
+ if(optLevel > 0) {
+ MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+ MPM.add(createGVNPass()); // Remove redundancies
+ }
}
#endif
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
@@ -250,7 +255,7 @@ namespace gbe
OUTPUT_BITCODE(AFTER_LINK, mod);
runFuntionPass(mod, libraryInfo, DL);
- runModulePass(mod, libraryInfo, DL, optLevel);
+ runModulePass(mod, libraryInfo, DL, optLevel, strictMath);
llvm::PassManager passes;
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
passes.add(new DataLayoutPass(DL));
--
1.7.10.4
More information about the Beignet
mailing list