[Mesa-dev] [PATCH 3/3] clover: Inline all functions for drivers that don't support subroutines

Tom Stellard tom at stellard.net
Thu Mar 6 08:23:07 PST 2014


On Thu, Mar 06, 2014 at 01:45:36PM +0100, Francisco Jerez wrote:
> Tom Stellard <thomas.stellard at amd.com> writes:
> 
> > ---
> >  src/gallium/drivers/radeon/radeon_llvm_util.c      | 35 --------------
> >  .../state_trackers/clover/core/compiler.hpp        |  3 +-
> >  src/gallium/state_trackers/clover/core/device.cpp  |  6 +++
> >  src/gallium/state_trackers/clover/core/device.hpp  |  1 +
> >  src/gallium/state_trackers/clover/core/program.cpp |  3 +-
> >  .../state_trackers/clover/llvm/invocation.cpp      | 55 +++++++++++++++++-----
> >  6 files changed, 55 insertions(+), 48 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
> > index 2ace91f..fe7f9a6 100644
> > --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
> > +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
> > @@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
> >  	return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
> >  }
> >  
> > -static void radeon_llvm_optimize(LLVMModuleRef mod)
> > -{
> > -	const char *data_layout = LLVMGetDataLayout(mod);
> > -	LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
> > -	LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
> > -	LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
> > -
> > -	/* Functions calls are not supported yet, so we need to inline
> > -	 * everything.  The most efficient way to do this is to add
> > -	 * the always_inline attribute to all non-kernel functions
> > -	 * and then run the Always Inline pass.  The Always Inline
> > -	 * pass will automaically inline functions with this attribute
> > -	 * and does not perform the expensive cost analysis that the normal
> > -	 * inliner does.
> > -	 */
> > -
> > -	LLVMValueRef fn;
> > -	for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
> > -		/* All the non-kernel functions have internal linkage */
> > -		if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
> > -			LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
> > -		}
> > -	}
> > -
> > -	LLVMAddTargetData(TD, pass_manager);
> > -	LLVMAddAlwaysInlinerPass(pass_manager);
> > -	LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
> > -
> > -	LLVMRunPassManager(pass_manager, mod);
> > -	LLVMPassManagerBuilderDispose(builder);
> > -	LLVMDisposePassManager(pass_manager);
> > -	LLVMDisposeTargetData(TD);
> > -}
> > -
> >  LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
> >  		const unsigned char *bitcode, unsigned bitcode_len)
> >  {
> > @@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
> >  		LLVMDeleteFunction(kernel_function);
> >  	}
> >  	FREE(kernel_metadata);
> > -	radeon_llvm_optimize(mod);
> >  	return mod;
> >  }
> > diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
> > index 49cd022..5035a6b 100644
> > --- a/src/gallium/state_trackers/clover/core/compiler.hpp
> > +++ b/src/gallium/state_trackers/clover/core/compiler.hpp
> > @@ -32,7 +32,8 @@ namespace clover {
> >     module compile_program_llvm(const compat::string &source,
> >                                 pipe_shader_ir ir,
> >                                 const compat::string &target,
> > -                               const compat::string &opts);
> > +                               const compat::string &opts,
> > +                               bool subroutines_supported);
> >  
> >     module compile_program_tgsi(const compat::string &source);
> >  }
> > diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
> > index 2c5f9b7..6820f56 100644
> > --- a/src/gallium/state_trackers/clover/core/device.cpp
> > +++ b/src/gallium/state_trackers/clover/core/device.cpp
> > @@ -187,3 +187,9 @@ enum pipe_endian
> >  device::endianness() const {
> >     return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
> >  }
> > +
> > +bool
> > +device::subroutines_supported() const {
> > +   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
> > +                                 PIPE_SHADER_CAP_SUBROUTINES);
> > +}
> > diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
> > index 433ac81..b187a93 100644
> > --- a/src/gallium/state_trackers/clover/core/device.hpp
> > +++ b/src/gallium/state_trackers/clover/core/device.hpp
> > @@ -68,6 +68,7 @@ namespace clover {
> >        enum pipe_shader_ir ir_format() const;
> >        std::string ir_target() const;
> >        enum pipe_endian endianness() const;
> > +      bool subroutines_supported() const;
> >  
> >        friend class command_queue;
> >        friend class root_resource;
> > diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
> > index 3aaa652..b547023 100644
> > --- a/src/gallium/state_trackers/clover/core/program.cpp
> > +++ b/src/gallium/state_trackers/clover/core/program.cpp
> > @@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) {
> >              auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
> >                             compile_program_tgsi(_source) :
> >                             compile_program_llvm(_source, dev.ir_format(),
> > -                                                dev.ir_target(), build_opts(dev)));
> > +                                                dev.ir_target(), build_opts(dev),
> > +                                                dev.subroutines_supported()));
> >              _binaries.insert({ &dev, module });
> >  
> >           } catch (build_error &e) {
> > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > index cdf32b6..c46e85e 100644
> > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > @@ -30,6 +30,7 @@
> >  #include <llvm/Bitcode/BitstreamWriter.h>
> >  #include <llvm/Bitcode/ReaderWriter.h>
> >  #include <llvm/Linker.h>
> > +#include <llvm/Target/TargetLibraryInfo.h>
> >  #if HAVE_LLVM < 0x0303
> >  #include <llvm/DerivedTypes.h>
> >  #include <llvm/LLVMContext.h>
> > @@ -42,6 +43,7 @@
> >  #include <llvm/IRReader/IRReader.h>
> >  #endif
> >  #include <llvm/PassManager.h>
> > +#include <llvm/Support/CodeGen.h>
> >  #include <llvm/Support/TargetSelect.h>
> >  #include <llvm/Support/MemoryBuffer.h>
> >  #if HAVE_LLVM < 0x0303
> > @@ -212,6 +214,9 @@ namespace {
> >        // that is no executed by all threads) during its optimizaton passes.
> >        c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
> >  
> > +      // Compile at -O0.  We will do optimizations later.
> > +      c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None;
> > +
> >        // Compile the code
> >        if (!c.ExecuteAction(act))
> >           throw build_error(log);
> > @@ -241,10 +246,39 @@ namespace {
> >     }
> >  
> >     void
> > -   internalize_functions(llvm::Module *mod,
> > -        const std::vector<llvm::Function *> &kernels) {
> > +   optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels,
> > +            bool subroutines_supported) {
> >  
> > +      llvm::PassManagerBuilder builder;
> >        llvm::PassManager PM;
> > +      std::vector<const char*> export_list;
> > +
> > +#if HAVE_LLVM < 0x0305
> > +      PM.add(new llvm::DataLayout(mod));
> > +#else
> > +      PM.add(new llvm::DataLayoutPass(mod));
> > +#endif
> > +       // For targets that don't support subroutines, we need to inline
> > +       // everything.  The most efficient way to do this is to add
> > +       // the always_inline attribute to all non-kernel functions
> > +       // and then run the Always Inline pass.  The Always Inline
> > +       // pass will automaically inline functions with this attribute
> > +       // and does not perform the expensive cost analysis that the normal
> > +       // inliner does.
> > +      for (llvm::Module::iterator f = mod->begin(), e = mod->end();
> > +           f != e; ++f) {
> > +         if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) {
> > +            export_list.push_back(f->getName().data());
> > +         } else if (!subroutines_supported) {
> > +            f->addFnAttr(llvm::Attribute::AlwaysInline);
> > +         }
> > +      }
> > +
> > +      if (!subroutines_supported)
> > +         builder.Inliner = llvm::createAlwaysInlinerPass();
> > +      else
> > +         builder.Inliner = llvm::createFunctionInliningPass();
> > +
> 
> I have the feeling that this doesn't belong here.  I think, ideally this
> would be taken care of by the LLVM back-end (where you could use the
> target string to determine whether subroutines are supported or not), or
> by the pipe driver as you were previously doing (as it seems unlikely to
> me that this will be useful for drivers other than radeon).  If you
> don't want the normal function inlining pass to be called for radeon,
> how about removing this code altogether?
>

If we want to move the entire compilation process into clover (i.e
OpenCL C source to ELF binary) for targets like radeon, which support
ELF generation, then we can't do the function inlining
in the pipe drivers, because the drivers will never get the LLVM IR.

The disadvantage for radeon of inlining in the backend is that
it will miss out on a lot of optimization opportunities by doing
this transformation after the bulk of the optimization passes.

For drivers that want to use LLVM IR without a formal backend and
instead hand lower it to their own IR, they could do this in the pipe
driver, but then we would end up with duplicated code all over the place
that might as well be in clover.  Especially since of all the gallium
drivers, only nvc0 support subroutines.

So, I think there are a lot of advantages to having this code in clover,
but if you are still opposed to it, I will probably leave it in the
driver until someone implements subroutine support.

-Tom

> 
> >        // Add a function internalizer pass.
> >        //
> >        // By default, the function internalizer pass will look for a function
> > @@ -259,14 +293,12 @@ namespace {
> >        // list of kernel functions to the internalizer.  The internalizer will
> >        // treat the functions in the list as "main" functions and internalize
> >        // all of the other functions.
> > -      std::vector<const char*> export_list;
> > -      for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
> > -                                                         E = kernels.end();
> > -                                                         I != E; ++I) {
> > -         llvm::Function *kernel = *I;
> > -         export_list.push_back(kernel->getName().data());
> > -      }
> >        PM.add(llvm::createInternalizePass(export_list));
> > +
> > +      builder.LibraryInfo =
> > +          new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple()));
> > +
> > +      builder.populateModulePassManager(PM);
> >        PM.run(*mod);
> >     }
> >  
> > @@ -372,7 +404,8 @@ module
> >  clover::compile_program_llvm(const compat::string &source,
> >                               enum pipe_shader_ir ir,
> >                               const compat::string &target,
> > -                             const compat::string &opts) {
> > +                             const compat::string &opts,
> > +                             bool subroutines_supported) {
> >  
> >     std::vector<llvm::Function *> kernels;
> >     size_t processor_str_len = std::string(target.begin()).find_first_of("-");
> > @@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source,
> >  
> >     find_kernels(mod, kernels);
> >  
> > -   internalize_functions(mod, kernels);
> > +   optimize(mod, kernels, subroutines_supported);
> >  
> >     // Build the clover::module
> >     switch (ir) {
> > -- 
> > 1.8.1.5




> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list