[Mesa-dev] [PATCH 3/3] clover: Inline all functions for drivers that don't support subroutines
Tom Stellard
thomas.stellard at amd.com
Wed Mar 5 18:25:06 PST 2014
---
src/gallium/drivers/radeon/radeon_llvm_util.c | 35 --------------
.../state_trackers/clover/core/compiler.hpp | 3 +-
src/gallium/state_trackers/clover/core/device.cpp | 6 +++
src/gallium/state_trackers/clover/core/device.hpp | 1 +
src/gallium/state_trackers/clover/core/program.cpp | 3 +-
.../state_trackers/clover/llvm/invocation.cpp | 55 +++++++++++++++++-----
6 files changed, 55 insertions(+), 48 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
index 2ace91f..fe7f9a6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
}
-static void radeon_llvm_optimize(LLVMModuleRef mod)
-{
- const char *data_layout = LLVMGetDataLayout(mod);
- LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
- LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
- LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
-
- /* Functions calls are not supported yet, so we need to inline
- * everything. The most efficient way to do this is to add
- * the always_inline attribute to all non-kernel functions
- * and then run the Always Inline pass. The Always Inline
- * pass will automaically inline functions with this attribute
- * and does not perform the expensive cost analysis that the normal
- * inliner does.
- */
-
- LLVMValueRef fn;
- for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
- /* All the non-kernel functions have internal linkage */
- if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
- LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
- }
- }
-
- LLVMAddTargetData(TD, pass_manager);
- LLVMAddAlwaysInlinerPass(pass_manager);
- LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
-
- LLVMRunPassManager(pass_manager, mod);
- LLVMPassManagerBuilderDispose(builder);
- LLVMDisposePassManager(pass_manager);
- LLVMDisposeTargetData(TD);
-}
-
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const unsigned char *bitcode, unsigned bitcode_len)
{
@@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
LLVMDeleteFunction(kernel_function);
}
FREE(kernel_metadata);
- radeon_llvm_optimize(mod);
return mod;
}
diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
index 49cd022..5035a6b 100644
--- a/src/gallium/state_trackers/clover/core/compiler.hpp
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -32,7 +32,8 @@ namespace clover {
module compile_program_llvm(const compat::string &source,
pipe_shader_ir ir,
const compat::string &target,
- const compat::string &opts);
+ const compat::string &opts,
+ bool subroutines_supported);
module compile_program_tgsi(const compat::string &source);
}
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 2c5f9b7..6820f56 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -187,3 +187,9 @@ enum pipe_endian
device::endianness() const {
return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
}
+
+bool
+device::subroutines_supported() const {
+ return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_SUBROUTINES);
+}
diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
index 433ac81..b187a93 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -68,6 +68,7 @@ namespace clover {
enum pipe_shader_ir ir_format() const;
std::string ir_target() const;
enum pipe_endian endianness() const;
+ bool subroutines_supported() const;
friend class command_queue;
friend class root_resource;
diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
index 3aaa652..b547023 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) {
auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
compile_program_tgsi(_source) :
compile_program_llvm(_source, dev.ir_format(),
- dev.ir_target(), build_opts(dev)));
+ dev.ir_target(), build_opts(dev),
+ dev.subroutines_supported()));
_binaries.insert({ &dev, module });
} catch (build_error &e) {
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index cdf32b6..c46e85e 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -30,6 +30,7 @@
#include <llvm/Bitcode/BitstreamWriter.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Linker.h>
+#include <llvm/Target/TargetLibraryInfo.h>
#if HAVE_LLVM < 0x0303
#include <llvm/DerivedTypes.h>
#include <llvm/LLVMContext.h>
@@ -42,6 +43,7 @@
#include <llvm/IRReader/IRReader.h>
#endif
#include <llvm/PassManager.h>
+#include <llvm/Support/CodeGen.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/MemoryBuffer.h>
#if HAVE_LLVM < 0x0303
@@ -212,6 +214,9 @@ namespace {
// that is no executed by all threads) during its optimizaton passes.
c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
+ // Compile at -O0. We will do optimizations later.
+ c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None;
+
// Compile the code
if (!c.ExecuteAction(act))
throw build_error(log);
@@ -241,10 +246,39 @@ namespace {
}
void
- internalize_functions(llvm::Module *mod,
- const std::vector<llvm::Function *> &kernels) {
+ optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels,
+ bool subroutines_supported) {
+ llvm::PassManagerBuilder builder;
llvm::PassManager PM;
+ std::vector<const char*> export_list;
+
+#if HAVE_LLVM < 0x0305
+ PM.add(new llvm::DataLayout(mod));
+#else
+ PM.add(new llvm::DataLayoutPass(mod));
+#endif
+ // For targets that don't support subroutines, we need to inline
+ // everything. The most efficient way to do this is to add
+ // the always_inline attribute to all non-kernel functions
+ // and then run the Always Inline pass. The Always Inline
+ // pass will automaically inline functions with this attribute
+ // and does not perform the expensive cost analysis that the normal
+ // inliner does.
+ for (llvm::Module::iterator f = mod->begin(), e = mod->end();
+ f != e; ++f) {
+ if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) {
+ export_list.push_back(f->getName().data());
+ } else if (!subroutines_supported) {
+ f->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+ }
+
+ if (!subroutines_supported)
+ builder.Inliner = llvm::createAlwaysInlinerPass();
+ else
+ builder.Inliner = llvm::createFunctionInliningPass();
+
// Add a function internalizer pass.
//
// By default, the function internalizer pass will look for a function
@@ -259,14 +293,12 @@ namespace {
// list of kernel functions to the internalizer. The internalizer will
// treat the functions in the list as "main" functions and internalize
// all of the other functions.
- std::vector<const char*> export_list;
- for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
- E = kernels.end();
- I != E; ++I) {
- llvm::Function *kernel = *I;
- export_list.push_back(kernel->getName().data());
- }
PM.add(llvm::createInternalizePass(export_list));
+
+ builder.LibraryInfo =
+ new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple()));
+
+ builder.populateModulePassManager(PM);
PM.run(*mod);
}
@@ -372,7 +404,8 @@ module
clover::compile_program_llvm(const compat::string &source,
enum pipe_shader_ir ir,
const compat::string &target,
- const compat::string &opts) {
+ const compat::string &opts,
+ bool subroutines_supported) {
std::vector<llvm::Function *> kernels;
size_t processor_str_len = std::string(target.begin()).find_first_of("-");
@@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source,
find_kernels(mod, kernels);
- internalize_functions(mod, kernels);
+ optimize(mod, kernels, subroutines_supported);
// Build the clover::module
switch (ir) {
--
1.8.1.5
More information about the mesa-dev
mailing list