[Mesa-dev] [PATCH 3/3] clover: Inline all functions for drivers that don't support subroutines

Tom Stellard thomas.stellard at amd.com
Wed Mar 5 18:25:06 PST 2014


---
 src/gallium/drivers/radeon/radeon_llvm_util.c      | 35 --------------
 .../state_trackers/clover/core/compiler.hpp        |  3 +-
 src/gallium/state_trackers/clover/core/device.cpp  |  6 +++
 src/gallium/state_trackers/clover/core/device.hpp  |  1 +
 src/gallium/state_trackers/clover/core/program.cpp |  3 +-
 .../state_trackers/clover/llvm/invocation.cpp      | 55 +++++++++++++++++-----
 6 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
index 2ace91f..fe7f9a6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
 	return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
 }
 
-static void radeon_llvm_optimize(LLVMModuleRef mod)
-{
-	const char *data_layout = LLVMGetDataLayout(mod);
-	LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
-	LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
-	LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
-
-	/* Functions calls are not supported yet, so we need to inline
-	 * everything.  The most efficient way to do this is to add
-	 * the always_inline attribute to all non-kernel functions
-	 * and then run the Always Inline pass.  The Always Inline
-	 * pass will automaically inline functions with this attribute
-	 * and does not perform the expensive cost analysis that the normal
-	 * inliner does.
-	 */
-
-	LLVMValueRef fn;
-	for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
-		/* All the non-kernel functions have internal linkage */
-		if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
-			LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
-		}
-	}
-
-	LLVMAddTargetData(TD, pass_manager);
-	LLVMAddAlwaysInlinerPass(pass_manager);
-	LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
-
-	LLVMRunPassManager(pass_manager, mod);
-	LLVMPassManagerBuilderDispose(builder);
-	LLVMDisposePassManager(pass_manager);
-	LLVMDisposeTargetData(TD);
-}
-
 LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
 		const unsigned char *bitcode, unsigned bitcode_len)
 {
@@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
 		LLVMDeleteFunction(kernel_function);
 	}
 	FREE(kernel_metadata);
-	radeon_llvm_optimize(mod);
 	return mod;
 }
diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
index 49cd022..5035a6b 100644
--- a/src/gallium/state_trackers/clover/core/compiler.hpp
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -32,7 +32,8 @@ namespace clover {
    module compile_program_llvm(const compat::string &source,
                                pipe_shader_ir ir,
                                const compat::string &target,
-                               const compat::string &opts);
+                               const compat::string &opts,
+                               bool subroutines_supported);
 
    module compile_program_tgsi(const compat::string &source);
 }
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 2c5f9b7..6820f56 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -187,3 +187,9 @@ enum pipe_endian
 device::endianness() const {
    return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
 }
+
+bool
+device::subroutines_supported() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+                                 PIPE_SHADER_CAP_SUBROUTINES);
+}
diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
index 433ac81..b187a93 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -68,6 +68,7 @@ namespace clover {
       enum pipe_shader_ir ir_format() const;
       std::string ir_target() const;
       enum pipe_endian endianness() const;
+      bool subroutines_supported() const;
 
       friend class command_queue;
       friend class root_resource;
diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
index 3aaa652..b547023 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) {
             auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
                            compile_program_tgsi(_source) :
                            compile_program_llvm(_source, dev.ir_format(),
-                                                dev.ir_target(), build_opts(dev)));
+                                                dev.ir_target(), build_opts(dev),
+                                                dev.subroutines_supported()));
             _binaries.insert({ &dev, module });
 
          } catch (build_error &e) {
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index cdf32b6..c46e85e 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -30,6 +30,7 @@
 #include <llvm/Bitcode/BitstreamWriter.h>
 #include <llvm/Bitcode/ReaderWriter.h>
 #include <llvm/Linker.h>
+#include <llvm/Target/TargetLibraryInfo.h>
 #if HAVE_LLVM < 0x0303
 #include <llvm/DerivedTypes.h>
 #include <llvm/LLVMContext.h>
@@ -42,6 +43,7 @@
 #include <llvm/IRReader/IRReader.h>
 #endif
 #include <llvm/PassManager.h>
+#include <llvm/Support/CodeGen.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/MemoryBuffer.h>
 #if HAVE_LLVM < 0x0303
@@ -212,6 +214,9 @@ namespace {
       // that is no executed by all threads) during its optimizaton passes.
       c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
 
+      // Compile at -O0.  We will do optimizations later.
+      c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None;
+
       // Compile the code
       if (!c.ExecuteAction(act))
          throw build_error(log);
@@ -241,10 +246,39 @@ namespace {
    }
 
    void
-   internalize_functions(llvm::Module *mod,
-        const std::vector<llvm::Function *> &kernels) {
+   optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels,
+            bool subroutines_supported) {
 
+      llvm::PassManagerBuilder builder;
       llvm::PassManager PM;
+      std::vector<const char*> export_list;
+
+#if HAVE_LLVM < 0x0305
+      PM.add(new llvm::DataLayout(mod));
+#else
+      PM.add(new llvm::DataLayoutPass(mod));
+#endif
+       // For targets that don't support subroutines, we need to inline
+       // everything.  The most efficient way to do this is to add
+       // the always_inline attribute to all non-kernel functions
+       // and then run the Always Inline pass.  The Always Inline
+       // pass will automaically inline functions with this attribute
+       // and does not perform the expensive cost analysis that the normal
+       // inliner does.
+      for (llvm::Module::iterator f = mod->begin(), e = mod->end();
+           f != e; ++f) {
+         if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) {
+            export_list.push_back(f->getName().data());
+         } else if (!subroutines_supported) {
+            f->addFnAttr(llvm::Attribute::AlwaysInline);
+         }
+      }
+
+      if (!subroutines_supported)
+         builder.Inliner = llvm::createAlwaysInlinerPass();
+      else
+         builder.Inliner = llvm::createFunctionInliningPass();
+
       // Add a function internalizer pass.
       //
       // By default, the function internalizer pass will look for a function
@@ -259,14 +293,12 @@ namespace {
       // list of kernel functions to the internalizer.  The internalizer will
       // treat the functions in the list as "main" functions and internalize
       // all of the other functions.
-      std::vector<const char*> export_list;
-      for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
-                                                         E = kernels.end();
-                                                         I != E; ++I) {
-         llvm::Function *kernel = *I;
-         export_list.push_back(kernel->getName().data());
-      }
       PM.add(llvm::createInternalizePass(export_list));
+
+      builder.LibraryInfo =
+          new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple()));
+
+      builder.populateModulePassManager(PM);
       PM.run(*mod);
    }
 
@@ -372,7 +404,8 @@ module
 clover::compile_program_llvm(const compat::string &source,
                              enum pipe_shader_ir ir,
                              const compat::string &target,
-                             const compat::string &opts) {
+                             const compat::string &opts,
+                             bool subroutines_supported) {
 
    std::vector<llvm::Function *> kernels;
    size_t processor_str_len = std::string(target.begin()).find_first_of("-");
@@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source,
 
    find_kernels(mod, kernels);
 
-   internalize_functions(mod, kernels);
+   optimize(mod, kernels, subroutines_supported);
 
    // Build the clover::module
    switch (ir) {
-- 
1.8.1.5




More information about the mesa-dev mailing list