[Mesa-dev] [PATCH 3/3] clover: Inline all functions for drivers that don't support subroutines

Fri Mar 7 07:08:07 PST 2014

Tom Stellard <tom at stellard.net> writes:

> On Thu, Mar 06, 2014 at 01:45:36PM +0100, Francisco Jerez wrote:
>> Tom Stellard <thomas.stellard at amd.com> writes:
>> 
>> > ---
>> >  src/gallium/drivers/radeon/radeon_llvm_util.c      | 35 --------------
>> >  .../state_trackers/clover/core/compiler.hpp        |  3 +-
>> >  src/gallium/state_trackers/clover/core/device.cpp  |  6 +++
>> >  src/gallium/state_trackers/clover/core/device.hpp  |  1 +
>> >  src/gallium/state_trackers/clover/core/program.cpp |  3 +-
>> >  .../state_trackers/clover/llvm/invocation.cpp      | 55 +++++++++++++++++-----
>> >  6 files changed, 55 insertions(+), 48 deletions(-)
>> >
>> > diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
>> > index 2ace91f..fe7f9a6 100644
>> > --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
>> > +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
>> > @@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
>> >  	return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
>> >  }
>> >  
>> > -static void radeon_llvm_optimize(LLVMModuleRef mod)
>> > -{
>> > -	const char *data_layout = LLVMGetDataLayout(mod);
>> > -	LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
>> > -	LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
>> > -	LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
>> > -
>> > -	/* Functions calls are not supported yet, so we need to inline
>> > -	 * everything.  The most efficient way to do this is to add
>> > -	 * the always_inline attribute to all non-kernel functions
>> > -	 * and then run the Always Inline pass.  The Always Inline
>> > -	 * pass will automaically inline functions with this attribute
>> > -	 * and does not perform the expensive cost analysis that the normal
>> > -	 * inliner does.
>> > -	 */
>> > -
>> > -	LLVMValueRef fn;
>> > -	for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
>> > -		/* All the non-kernel functions have internal linkage */
>> > -		if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
>> > -			LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
>> > -		}
>> > -	}
>> > -
>> > -	LLVMAddTargetData(TD, pass_manager);
>> > -	LLVMAddAlwaysInlinerPass(pass_manager);
>> > -	LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
>> > -
>> > -	LLVMRunPassManager(pass_manager, mod);
>> > -	LLVMPassManagerBuilderDispose(builder);
>> > -	LLVMDisposePassManager(pass_manager);
>> > -	LLVMDisposeTargetData(TD);
>> > -}
>> > -
>> >  LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
>> >  		const unsigned char *bitcode, unsigned bitcode_len)
>> >  {
>> > @@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
>> >  		LLVMDeleteFunction(kernel_function);
>> >  	}
>> >  	FREE(kernel_metadata);
>> > -	radeon_llvm_optimize(mod);
>> >  	return mod;
>> >  }
>> > diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
>> > index 49cd022..5035a6b 100644
>> > --- a/src/gallium/state_trackers/clover/core/compiler.hpp
>> > +++ b/src/gallium/state_trackers/clover/core/compiler.hpp
>> > @@ -32,7 +32,8 @@ namespace clover {
>> >     module compile_program_llvm(const compat::string &source,
>> >                                 pipe_shader_ir ir,
>> >                                 const compat::string &target,
>> > -                               const compat::string &opts);
>> > +                               const compat::string &opts,
>> > +                               bool subroutines_supported);
>> >  
>> >     module compile_program_tgsi(const compat::string &source);
>> >  }
>> > diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
>> > index 2c5f9b7..6820f56 100644
>> > --- a/src/gallium/state_trackers/clover/core/device.cpp
>> > +++ b/src/gallium/state_trackers/clover/core/device.cpp
>> > @@ -187,3 +187,9 @@ enum pipe_endian
>> >  device::endianness() const {
>> >     return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);
>> >  }
>> > +
>> > +bool
>> > +device::subroutines_supported() const {
>> > +   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
>> > +                                 PIPE_SHADER_CAP_SUBROUTINES);
>> > +}
>> > diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
>> > index 433ac81..b187a93 100644
>> > --- a/src/gallium/state_trackers/clover/core/device.hpp
>> > +++ b/src/gallium/state_trackers/clover/core/device.hpp
>> > @@ -68,6 +68,7 @@ namespace clover {
>> >        enum pipe_shader_ir ir_format() const;
>> >        std::string ir_target() const;
>> >        enum pipe_endian endianness() const;
>> > +      bool subroutines_supported() const;
>> >  
>> >        friend class command_queue;
>> >        friend class root_resource;
>> > diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
>> > index 3aaa652..b547023 100644
>> > --- a/src/gallium/state_trackers/clover/core/program.cpp
>> > +++ b/src/gallium/state_trackers/clover/core/program.cpp
>> > @@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) {
>> >              auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
>> >                             compile_program_tgsi(_source) :
>> >                             compile_program_llvm(_source, dev.ir_format(),
>> > -                                                dev.ir_target(), build_opts(dev)));
>> > +                                                dev.ir_target(), build_opts(dev),
>> > +                                                dev.subroutines_supported()));
>> >              _binaries.insert({ &dev, module });
>> >  
>> >           } catch (build_error &e) {
>> > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > index cdf32b6..c46e85e 100644
>> > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > @@ -30,6 +30,7 @@
>> >  #include <llvm/Bitcode/BitstreamWriter.h>
>> >  #include <llvm/Bitcode/ReaderWriter.h>
>> >  #include <llvm/Linker.h>
>> > +#include <llvm/Target/TargetLibraryInfo.h>
>> >  #if HAVE_LLVM < 0x0303
>> >  #include <llvm/DerivedTypes.h>
>> >  #include <llvm/LLVMContext.h>
>> > @@ -42,6 +43,7 @@
>> >  #include <llvm/IRReader/IRReader.h>
>> >  #endif
>> >  #include <llvm/PassManager.h>
>> > +#include <llvm/Support/CodeGen.h>
>> >  #include <llvm/Support/TargetSelect.h>
>> >  #include <llvm/Support/MemoryBuffer.h>
>> >  #if HAVE_LLVM < 0x0303
>> > @@ -212,6 +214,9 @@ namespace {
>> >        // that is no executed by all threads) during its optimizaton passes.
>> >        c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
>> >  
>> > +      // Compile at -O0.  We will do optimizations later.
>> > +      c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None;
>> > +
>> >        // Compile the code
>> >        if (!c.ExecuteAction(act))
>> >           throw build_error(log);
>> > @@ -241,10 +246,39 @@ namespace {
>> >     }
>> >  
>> >     void
>> > -   internalize_functions(llvm::Module *mod,
>> > -        const std::vector<llvm::Function *> &kernels) {
>> > +   optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels,
>> > +            bool subroutines_supported) {
>> >  
>> > +      llvm::PassManagerBuilder builder;
>> >        llvm::PassManager PM;
>> > +      std::vector<const char*> export_list;
>> > +
>> > +#if HAVE_LLVM < 0x0305
>> > +      PM.add(new llvm::DataLayout(mod));
>> > +#else
>> > +      PM.add(new llvm::DataLayoutPass(mod));
>> > +#endif
>> > +       // For targets that don't support subroutines, we need to inline
>> > +       // everything.  The most efficient way to do this is to add
>> > +       // the always_inline attribute to all non-kernel functions
>> > +       // and then run the Always Inline pass.  The Always Inline
>> > +       // pass will automaically inline functions with this attribute
>> > +       // and does not perform the expensive cost analysis that the normal
>> > +       // inliner does.
>> > +      for (llvm::Module::iterator f = mod->begin(), e = mod->end();
>> > +           f != e; ++f) {
>> > +         if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) {
>> > +            export_list.push_back(f->getName().data());
>> > +         } else if (!subroutines_supported) {
>> > +            f->addFnAttr(llvm::Attribute::AlwaysInline);
>> > +         }
>> > +      }
>> > +
>> > +      if (!subroutines_supported)
>> > +         builder.Inliner = llvm::createAlwaysInlinerPass();
>> > +      else
>> > +         builder.Inliner = llvm::createFunctionInliningPass();
>> > +
>> 
>> I have the feeling that this doesn't belong here.  I think, ideally this
>> would be taken care of by the LLVM back-end (where you could use the
>> target string to determine whether subroutines are supported or not), or
>> by the pipe driver as you were previously doing (as it seems unlikely to
>> me that this will be useful for drivers other than radeon).  If you
>> don't want the normal function inlining pass to be called for radeon,
>> how about removing this code altogether?
>>
>
> If we want to move the entire compilation process into clover (i.e
> OpenCL C source to ELF binary) for targets like radeon, which support
> ELF generation, then we can't do the function inlining
> in the pipe drivers, because the drivers will never get the LLVM IR.
>
My idea was to move the entire compilation logic into Clang, not into
Clover, so we can support off-line compilation.  For that to work we
really want the back-end to have taken care of this already.

> The disadvantage for radeon of inlining in the backend is that
> it will miss out on a lot of optimization opportunities by doing
> this transformation after the bulk of the optimization passes.
>

Not necessarily.  IIRC a target can add passes which are run before
optimization.

> For drivers that want to use LLVM IR without a formal backend and
> instead hand lower it to their own IR, they could do this in the pipe
> driver, but then we would end up with duplicated code all over the place
> that might as well be in clover.  Especially since of all the gallium
> drivers, only nvc0 support subroutines.
>

That doesn't seem likely to me.  nv50+ is the only other hardware that
is likely to get compute support anytime soon, and it can do subroutines
just fine -- even if it couldn't, this change wouldn't be useful for it
if it goes the TGSI path.  So it seems to me that radeon is the only
driver likely to benefit from this, and only as a temporary work-around
until you implement proper subroutine support.

Thanks.

> So, I think there are a lot of advantages to having this code in clover,
> but if you are still opposed to it, I will probably leave it in the
> driver until someone implements subroutine support.
>
> -Tom
>
>> 
>> >        // Add a function internalizer pass.
>> >        //
>> >        // By default, the function internalizer pass will look for a function
>> > @@ -259,14 +293,12 @@ namespace {
>> >        // list of kernel functions to the internalizer.  The internalizer will
>> >        // treat the functions in the list as "main" functions and internalize
>> >        // all of the other functions.
>> > -      std::vector<const char*> export_list;
>> > -      for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
>> > -                                                         E = kernels.end();
>> > -                                                         I != E; ++I) {
>> > -         llvm::Function *kernel = *I;
>> > -         export_list.push_back(kernel->getName().data());
>> > -      }
>> >        PM.add(llvm::createInternalizePass(export_list));
>> > +
>> > +      builder.LibraryInfo =
>> > +          new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple()));
>> > +
>> > +      builder.populateModulePassManager(PM);
>> >        PM.run(*mod);
>> >     }
>> >  
>> > @@ -372,7 +404,8 @@ module
>> >  clover::compile_program_llvm(const compat::string &source,
>> >                               enum pipe_shader_ir ir,
>> >                               const compat::string &target,
>> > -                             const compat::string &opts) {
>> > +                             const compat::string &opts,
>> > +                             bool subroutines_supported) {
>> >  
>> >     std::vector<llvm::Function *> kernels;
>> >     size_t processor_str_len = std::string(target.begin()).find_first_of("-");
>> > @@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source,
>> >  
>> >     find_kernels(mod, kernels);
>> >  
>> > -   internalize_functions(mod, kernels);
>> > +   optimize(mod, kernels, subroutines_supported);
>> >  
>> >     // Build the clover::module
>> >     switch (ir) {
>> > -- 
>> > 1.8.1.5
>
>
>
>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 229 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140307/c96d049f/attachment.pgp>