[Mesa-dev] [PATCH 3/3] clover: Inline all functions for drivers that don't support subroutines

Francisco Jerez currojerez at riseup.net
Thu Mar 6 04:45:36 PST 2014


Tom Stellard <thomas.stellard at amd.com> writes:

> ---
>  src/gallium/drivers/radeon/radeon_llvm_util.c      | 35 --------------
>  .../state_trackers/clover/core/compiler.hpp        |  3 +-
>  src/gallium/state_trackers/clover/core/device.cpp  |  6 +++
>  src/gallium/state_trackers/clover/core/device.hpp  |  1 +
>  src/gallium/state_trackers/clover/core/program.cpp |  3 +-
>  .../state_trackers/clover/llvm/invocation.cpp      | 55 +++++++++++++++++-----
>  6 files changed, 55 insertions(+), 48 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
> index 2ace91f..fe7f9a6 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
> @@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
>  	return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
>  }
>  
> -static void radeon_llvm_optimize(LLVMModuleRef mod)
> -{
> -	const char *data_layout = LLVMGetDataLayout(mod);
> -	LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
> -	LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
> -	LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
> -
> -	/* Functions calls are not supported yet, so we need to inline
> -	 * everything.  The most efficient way to do this is to add
> -	 * the always_inline attribute to all non-kernel functions
> -	 * and then run the Always Inline pass.  The Always Inline
> -	 * pass will automaically inline functions with this attribute
> -	 * and does not perform the expensive cost analysis that the normal
> -	 * inliner does.
> -	 */
> -
> -	LLVMValueRef fn;
> -	for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
> -		/* All the non-kernel functions have internal linkage */
> -		if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
> -			LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
> -		}
> -	}
> -
> -	LLVMAddTargetData(TD, pass_manager);
> -	LLVMAddAlwaysInlinerPass(pass_manager);
> -	LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
> -
> -	LLVMRunPassManager(pass_manager, mod);
> -	LLVMPassManagerBuilderDispose(builder);
> -	LLVMDisposePassManager(pass_manager);
> -	LLVMDisposeTargetData(TD);
> -}
> -
>  LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
>  		const unsigned char *bitcode, unsigned bitcode_len)
>  {
> @@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
>  		LLVMDeleteFunction(kernel_function);
>  	}
>  	FREE(kernel_metadata);
> -	radeon_llvm_optimize(mod);
>  	return mod;
>  }
> diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
> index 49cd022..5035a6b 100644
> --- a/src/gallium/state_trackers/clover/core/compiler.hpp
> +++ b/src/gallium/state_trackers/clover/core/compiler.hpp
> @@ -32,7 +32,8 @@ namespace clover {
>     module compile_program_llvm(const compat::string &source,
>                                 pipe_shader_ir ir,
>                                 const compat::string &target,
> -                               const compat::string &opts);
> +                               const compat::string &opts,
> +                               bool subroutines_supported);
>  
>     module compile_program_tgsi(const compat::string &source);
>  }
> diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
> index 2c5f9b7..6820f56 100644
> --- a/src/gallium/state_trackers/clover/core/device.cpp
> +++ b/src/gallium/state_trackers/clover/core/device.cpp
> @@ -187,3 +187,9 @@ enum pipe_endian
>  device::endianness() const {
>     return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
>  }
> +
> +bool
> +device::subroutines_supported() const {
> +   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
> +                                 PIPE_SHADER_CAP_SUBROUTINES);
> +}
> diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
> index 433ac81..b187a93 100644
> --- a/src/gallium/state_trackers/clover/core/device.hpp
> +++ b/src/gallium/state_trackers/clover/core/device.hpp
> @@ -68,6 +68,7 @@ namespace clover {
>        enum pipe_shader_ir ir_format() const;
>        std::string ir_target() const;
>        enum pipe_endian endianness() const;
> +      bool subroutines_supported() const;
>  
>        friend class command_queue;
>        friend class root_resource;
> diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
> index 3aaa652..b547023 100644
> --- a/src/gallium/state_trackers/clover/core/program.cpp
> +++ b/src/gallium/state_trackers/clover/core/program.cpp
> @@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) {
>              auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
>                             compile_program_tgsi(_source) :
>                             compile_program_llvm(_source, dev.ir_format(),
> -                                                dev.ir_target(), build_opts(dev)));
> +                                                dev.ir_target(), build_opts(dev),
> +                                                dev.subroutines_supported()));
>              _binaries.insert({ &dev, module });
>  
>           } catch (build_error &e) {
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index cdf32b6..c46e85e 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -30,6 +30,7 @@
>  #include <llvm/Bitcode/BitstreamWriter.h>
>  #include <llvm/Bitcode/ReaderWriter.h>
>  #include <llvm/Linker.h>
> +#include <llvm/Target/TargetLibraryInfo.h>
>  #if HAVE_LLVM < 0x0303
>  #include <llvm/DerivedTypes.h>
>  #include <llvm/LLVMContext.h>
> @@ -42,6 +43,7 @@
>  #include <llvm/IRReader/IRReader.h>
>  #endif
>  #include <llvm/PassManager.h>
> +#include <llvm/Support/CodeGen.h>
>  #include <llvm/Support/TargetSelect.h>
>  #include <llvm/Support/MemoryBuffer.h>
>  #if HAVE_LLVM < 0x0303
> @@ -212,6 +214,9 @@ namespace {
>        // that is no executed by all threads) during its optimizaton passes.
>        c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
>  
> +      // Compile at -O0.  We will do optimizations later.
> +      c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None;
> +
>        // Compile the code
>        if (!c.ExecuteAction(act))
>           throw build_error(log);
> @@ -241,10 +246,39 @@ namespace {
>     }
>  
>     void
> -   internalize_functions(llvm::Module *mod,
> -        const std::vector<llvm::Function *> &kernels) {
> +   optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels,
> +            bool subroutines_supported) {
>  
> +      llvm::PassManagerBuilder builder;
>        llvm::PassManager PM;
> +      std::vector<const char*> export_list;
> +
> +#if HAVE_LLVM < 0x0305
> +      PM.add(new llvm::DataLayout(mod));
> +#else
> +      PM.add(new llvm::DataLayoutPass(mod));
> +#endif
> +       // For targets that don't support subroutines, we need to inline
> +       // everything.  The most efficient way to do this is to add
> +       // the always_inline attribute to all non-kernel functions
> +       // and then run the Always Inline pass.  The Always Inline
> +       // pass will automaically inline functions with this attribute
> +       // and does not perform the expensive cost analysis that the normal
> +       // inliner does.
> +      for (llvm::Module::iterator f = mod->begin(), e = mod->end();
> +           f != e; ++f) {
> +         if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) {
> +            export_list.push_back(f->getName().data());
> +         } else if (!subroutines_supported) {
> +            f->addFnAttr(llvm::Attribute::AlwaysInline);
> +         }
> +      }
> +
> +      if (!subroutines_supported)
> +         builder.Inliner = llvm::createAlwaysInlinerPass();
> +      else
> +         builder.Inliner = llvm::createFunctionInliningPass();
> +

I have the feeling that this doesn't belong here.  I think, ideally this
would be taken care of by the LLVM back-end (where you could use the
target string to determine whether subroutines are supported or not), or
by the pipe driver as you were previously doing (as it seems unlikely to
me that this will be useful for drivers other than radeon).  If you
don't want the normal function inlining pass to be called for radeon,
how about removing this code altogether?

Thanks.

>        // Add a function internalizer pass.
>        //
>        // By default, the function internalizer pass will look for a function
> @@ -259,14 +293,12 @@ namespace {
>        // list of kernel functions to the internalizer.  The internalizer will
>        // treat the functions in the list as "main" functions and internalize
>        // all of the other functions.
> -      std::vector<const char*> export_list;
> -      for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
> -                                                         E = kernels.end();
> -                                                         I != E; ++I) {
> -         llvm::Function *kernel = *I;
> -         export_list.push_back(kernel->getName().data());
> -      }
>        PM.add(llvm::createInternalizePass(export_list));
> +
> +      builder.LibraryInfo =
> +          new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple()));
> +
> +      builder.populateModulePassManager(PM);
>        PM.run(*mod);
>     }
>  
> @@ -372,7 +404,8 @@ module
>  clover::compile_program_llvm(const compat::string &source,
>                               enum pipe_shader_ir ir,
>                               const compat::string &target,
> -                             const compat::string &opts) {
> +                             const compat::string &opts,
> +                             bool subroutines_supported) {
>  
>     std::vector<llvm::Function *> kernels;
>     size_t processor_str_len = std::string(target.begin()).find_first_of("-");
> @@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source,
>  
>     find_kernels(mod, kernels);
>  
> -   internalize_functions(mod, kernels);
> +   optimize(mod, kernels, subroutines_supported);
>  
>     // Build the clover::module
>     switch (ir) {
> -- 
> 1.8.1.5
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 229 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140306/7524d4b0/attachment.pgp>


More information about the mesa-dev mailing list