[Mesa-dev] [PATCH 11/11] ac/radv: using tls to store llvm related info and speed up compiles (v3)

Dave Airlie airlied at gmail.com
Thu Jun 28 20:57:26 UTC 2018


On 28 June 2018 at 18:23, Alex Smith <asmith at feralinteractive.com> wrote:
> Hi Dave,
>
> I did a quick test with this on Rise of the Tomb Raider. It reduced the time
> taken to create all pipelines for the whole game over 8 threads (with
> RADV_DEBUG=nocache) from 12m24s to 11m35s. Nice improvement :)

Oh good to have some real world numbers.

Thanks for testing,
Dave.

>
> Also didn't see any issues, so:
>
> Tested-by: Alex Smith <asmith at feralinteractive.com>
>
> Thanks,
> Alex
>
> On 27 June 2018 at 04:58, Dave Airlie <airlied at gmail.com> wrote:
>>
>> From: Dave Airlie <airlied at redhat.com>
>>
>> I'd like to encourage people to test this to see if it helps (like
>> does it make app startup better or less hitching in dxvk).
>>
>> The basic idea is to store a bunch of LLVM related data structs
>> in thread local storage so we can avoid reiniting them every time
>> we compile a shader. Since we know llvm objects aren't thread safe
>> it has to be stored using TLS to avoid any collisions.
>>
>> This should remove all the fixed overheads setup costs of creating
>> the pass manager each time.
>>
>> This takes a demo app time to compile the radv meta shaders on nocache
>> and exit from 1.7s to 1s.
>>
>> TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS
>> works if you have radeonsi and radv loaded at the same time, if
>> they'll magically try and use the same tls stuff, in which case
>> this might explode all over the place.
>>
>> v2: fix llvm6 build, inline emit function, handle multiple targets
>> in one thread
>> v3: rebase and port onto new structure
>> ---
>>  src/amd/common/ac_llvm_helper.cpp | 120 ++++++++++++++++++++++++++++--
>>  src/amd/common/ac_llvm_util.c     |  10 +--
>>  src/amd/common/ac_llvm_util.h     |   9 +++
>>  src/amd/vulkan/radv_debug.h       |   1 +
>>  src/amd/vulkan/radv_device.c      |   1 +
>>  src/amd/vulkan/radv_shader.c      |   2 +
>>  6 files changed, 132 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_helper.cpp
>> b/src/amd/common/ac_llvm_helper.cpp
>> index 27403dbe085..f1f1399b3fb 100644
>> --- a/src/amd/common/ac_llvm_helper.cpp
>> +++ b/src/amd/common/ac_llvm_helper.cpp
>> @@ -31,12 +31,21 @@
>>
>>  #include "ac_llvm_util.h"
>>  #include <llvm-c/Core.h>
>> -#include <llvm/Target/TargetOptions.h>
>> -#include <llvm/ExecutionEngine/ExecutionEngine.h>
>> -#include <llvm/IR/Attributes.h>
>> -#include <llvm/IR/CallSite.h>
>> +#include <llvm/Target/TargetMachine.h>
>>  #include <llvm/IR/IRBuilder.h>
>>  #include <llvm/Analysis/TargetLibraryInfo.h>
>> +#include <llvm/IR/LegacyPassManager.h>
>> +
>> +#include <llvm-c/Transforms/IPO.h>
>> +#include <llvm-c/Transforms/Scalar.h>
>> +#if HAVE_LLVM >= 0x0700
>> +#include <llvm-c/Transforms/Utils.h>
>> +#endif
>> +
>> +#if HAVE_LLVM < 0x0700
>> +#include "llvm/Support/raw_ostream.h"
>> +#endif
>> +#include <list>
>>
>>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>>  {
>> @@ -101,11 +110,110 @@
>> ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
>>         delete reinterpret_cast<llvm::TargetLibraryInfoImpl
>> *>(library_info);
>>  }
>>
>> +class ac_llvm_per_thread_info {
>> +public:
>> +       ac_llvm_per_thread_info(enum radeon_family arg_family,
>> +                               enum ac_target_machine_options
>> arg_tm_options)
>> +               : family(arg_family), tm_options(arg_tm_options),
>> +                 OStream(CodeString) {}
>> +       ~ac_llvm_per_thread_info() {
>> +               ac_llvm_compiler_dispose_internal(&llvm_info);
>> +       }
>> +
>> +       struct ac_llvm_compiler_info llvm_info;
>> +       enum radeon_family family;
>> +       enum ac_target_machine_options tm_options;
>> +       llvm::SmallString<0> CodeString;
>> +       llvm::raw_svector_ostream OStream;
>> +       llvm::legacy::PassManager pass;
>> +};
>> +
>> +/* we have to store a linked list per thread due to the possiblity of
>> multiple gpus being required */
>> +static thread_local std::list<ac_llvm_per_thread_info>
>> ac_llvm_per_thread_list;
>> +
>>  bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info,
>>                                  LLVMModuleRef M,
>>                                  char **ErrorMessage,
>>                                  LLVMMemoryBufferRef *OutMemBuf)
>>  {
>> -       return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M,
>> LLVMObjectFile,
>> -                                                  ErrorMessage,
>> OutMemBuf);
>> +       ac_llvm_per_thread_info *thread_info = nullptr;
>> +       if (info->thread_stored) {
>> +               for (auto &I : ac_llvm_per_thread_list) {
>> +                       if (I.llvm_info.tm == info->tm) {
>> +                               thread_info = &I;
>> +                               break;
>> +                       }
>> +               }
>> +
>> +               if (!thread_info) {
>> +                       assert(0);
>> +                       return false;
>> +               }
>> +       } else {
>> +               return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M,
>> LLVMObjectFile,
>> +                                                          ErrorMessage,
>> OutMemBuf);
>> +       }
>> +
>> +       llvm::TargetMachine *TM =
>> reinterpret_cast<llvm::TargetMachine*>(thread_info->llvm_info.tm);
>> +       llvm::Module *Mod = llvm::unwrap(M);
>> +       llvm::StringRef Data;
>> +
>> +       Mod->setDataLayout(TM->createDataLayout());
>> +
>> +       thread_info->pass.run(*Mod);
>> +
>> +       Data = thread_info->OStream.str();
>> +       *OutMemBuf =
>> LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
>> +       thread_info->CodeString = "";
>> +       return false;
>> +}
>> +
>> +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
>> +                          bool add_target_library_info,
>> +                          enum radeon_family family,
>> +                          enum ac_target_machine_options tm_options)
>> +{
>> +       if (tm_options & AC_TM_THREAD_LLVM) {
>> +               for (auto &I : ac_llvm_per_thread_list) {
>> +                       if (I.family == family &&
>> +                           I.tm_options == tm_options) {
>> +                               *info = I.llvm_info;
>> +                               return true;
>> +                       }
>> +               }
>> +
>> +               ac_llvm_per_thread_list.emplace_back(family, tm_options);
>> +               ac_llvm_per_thread_info &tinfo =
>> ac_llvm_per_thread_list.back();
>> +               if (!ac_llvm_compiler_init_internal(&tinfo.llvm_info,
>> +                                                   true,
>> +                                                   family,
>> +                                                   tm_options))
>> +                       return false;
>> +
>> +               tinfo.llvm_info.thread_stored = true;
>> +               *info = tinfo.llvm_info;
>> +
>> +               llvm::TargetMachine *TM =
>> reinterpret_cast<llvm::TargetMachine*>(tinfo.llvm_info.tm);
>> +               if (TM->addPassesToEmitFile(tinfo.pass, tinfo.OStream,
>> +#if HAVE_LLVM >= 0x0700
>> +                                           nullptr,
>> +#endif
>> +
>> llvm::TargetMachine::CGFT_ObjectFile)) {
>> +                       assert(0);
>> +                       return false;
>> +               }
>> +       } else {
>> +               if (!ac_llvm_compiler_init_internal(info,
>> +
>> add_target_library_info,
>> +                                                   family,
>> +                                                   tm_options))
>> +                       return false;
>> +       }
>> +       return true;
>> +}
>> +
>> +void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
>> +{
>> +       if (!info->thread_stored)
>> +               ac_llvm_compiler_dispose_internal(info);
>>  }
>> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
>> index 7f8c5746b37..d8ec4ee0092 100644
>> --- a/src/amd/common/ac_llvm_util.c
>> +++ b/src/amd/common/ac_llvm_util.c
>> @@ -188,10 +188,10 @@ static LLVMPassManagerRef
>> ac_init_passmgr(LLVMTargetLibraryInfoRef target_librar
>>         return passmgr;
>>  }
>>
>> -bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
>> -                          bool add_target_library_info,
>> -                          enum radeon_family family,
>> -                          enum ac_target_machine_options tm_options)
>> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
>> +                                   bool add_target_library_info,
>> +                                   enum radeon_family family,
>> +                                   enum ac_target_machine_options
>> tm_options)
>>  {
>>         memset(info, 0, sizeof(*info));
>>         info->tm = ac_create_target_machine(family, tm_options,
>> &info->triple);
>> @@ -223,7 +223,7 @@ fail:
>>         return false;
>>  }
>>
>> -void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
>> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info
>> *info)
>>  {
>>         if (info->data_layout)
>>                 LLVMDisposeMessage((char*)info->data_layout);
>> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
>> index 1ba972dab87..c579e6a9ec3 100644
>> --- a/src/amd/common/ac_llvm_util.h
>> +++ b/src/amd/common/ac_llvm_util.h
>> @@ -60,6 +60,7 @@ enum ac_target_machine_options {
>>         AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
>>         AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
>>         AC_TM_CHECK_IR = (1 << 5),
>> +       AC_TM_THREAD_LLVM = (1 << 6),
>>  };
>>
>>  enum ac_float_mode {
>> @@ -74,6 +75,7 @@ struct ac_llvm_compiler_info {
>>         LLVMTargetLibraryInfoRef target_library_info;
>>         const char *triple;
>>         const char *data_layout;
>> +       bool thread_stored;
>>  };
>>
>>  const char *ac_get_llvm_processor_name(enum radeon_family family);
>> @@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory)
>>  unsigned
>>  ac_count_scratch_private_memory(LLVMValueRef function);
>>
>> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
>> +                                   bool add_target_library_info, /* crash
>> workaround */
>> +                                   enum radeon_family family,
>> +                                   enum ac_target_machine_options
>> tm_options);
>> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info
>> *info);
>> +
>>  bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
>>                            bool add_target_library_info, /* crash
>> workaround */
>>                            enum radeon_family family,
>> @@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(struct
>> ac_llvm_compiler_info *info,
>>                                  LLVMModuleRef M,
>>                                  char **ErrorMessage,
>>                                  LLVMMemoryBufferRef *OutMemBuf);
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
>> index f1b0dc26a63..9fe4c3b7404 100644
>> --- a/src/amd/vulkan/radv_debug.h
>> +++ b/src/amd/vulkan/radv_debug.h
>> @@ -49,6 +49,7 @@ enum {
>>         RADV_DEBUG_ERRORS            = 0x80000,
>>         RADV_DEBUG_STARTUP           = 0x100000,
>>         RADV_DEBUG_CHECKIR           = 0x200000,
>> +       RADV_DEBUG_NOTHREADLLVM      = 0x400000,
>>  };
>>
>>  enum {
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index ad3465f594e..73c48cef1f0 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[]
>> = {
>>         {"errors", RADV_DEBUG_ERRORS},
>>         {"startup", RADV_DEBUG_STARTUP},
>>         {"checkir", RADV_DEBUG_CHECKIR},
>> +       {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
>>         {NULL, 0}
>>  };
>>
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 3bbb7867640..f7033aff771 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device,
>>                 tm_options |= AC_TM_SISCHED;
>>         if (options->check_ir)
>>                 tm_options |= AC_TM_CHECK_IR;
>> +       if (!(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM))
>> +               tm_options |= AC_TM_THREAD_LLVM;
>>
>>         radv_init_llvm_once();
>>
>> --
>> 2.17.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>


More information about the mesa-dev mailing list