[Mesa-dev] [PATCH 11/11] ac/radv: using tls to store llvm related info and speed up compiles (v3)
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Wed Jun 27 23:25:35 UTC 2018
On Wed, Jun 27, 2018 at 5:58 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> I'd like to encourage people to test this to see if it helps (like
> does it make app startup better or less hitching in dxvk).
>
> The basic idea is to store a bunch of LLVM related data structs
> in thread local storage so we can avoid reiniting them every time
> we compile a shader. Since we know llvm objects aren't thread safe
> it has to be stored using TLS to avoid any collisions.
>
> This should remove all the fixed overheads setup costs of creating
> the pass manager each time.
>
> This takes a demo app time to compile the radv meta shaders on nocache
> and exit from 1.7s to 1s.
>
> TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS
> works if you have radeonsi and radv loaded at the same time, if
> they'll magically try and use the same tls stuff, in which case
> this might explode all over the place.
>
> v2: fix llvm6 build, inline emit function, handle multiple targets
> in one thread
> v3: rebase and port onto new structure
> ---
> src/amd/common/ac_llvm_helper.cpp | 120 ++++++++++++++++++++++++++++--
> src/amd/common/ac_llvm_util.c | 10 +--
> src/amd/common/ac_llvm_util.h | 9 +++
> src/amd/vulkan/radv_debug.h | 1 +
> src/amd/vulkan/radv_device.c | 1 +
> src/amd/vulkan/radv_shader.c | 2 +
> 6 files changed, 132 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
> index 27403dbe085..f1f1399b3fb 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -31,12 +31,21 @@
>
> #include "ac_llvm_util.h"
> #include <llvm-c/Core.h>
> -#include <llvm/Target/TargetOptions.h>
> -#include <llvm/ExecutionEngine/ExecutionEngine.h>
> -#include <llvm/IR/Attributes.h>
> -#include <llvm/IR/CallSite.h>
> +#include <llvm/Target/TargetMachine.h>
> #include <llvm/IR/IRBuilder.h>
> #include <llvm/Analysis/TargetLibraryInfo.h>
> +#include <llvm/IR/LegacyPassManager.h>
> +
> +#include <llvm-c/Transforms/IPO.h>
> +#include <llvm-c/Transforms/Scalar.h>
> +#if HAVE_LLVM >= 0x0700
> +#include <llvm-c/Transforms/Utils.h>
> +#endif
> +
> +#if HAVE_LLVM < 0x0700
> +#include "llvm/Support/raw_ostream.h"
> +#endif
> +#include <list>
>
> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
> {
> @@ -101,11 +110,110 @@ ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
> delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
> }
>
> +class ac_llvm_per_thread_info {
> +public:
> + ac_llvm_per_thread_info(enum radeon_family arg_family,
> + enum ac_target_machine_options arg_tm_options)
> + : family(arg_family), tm_options(arg_tm_options),
> + OStream(CodeString) {}
> + ~ac_llvm_per_thread_info() {
> + ac_llvm_compiler_dispose_internal(&llvm_info);
> + }
> +
> + struct ac_llvm_compiler_info llvm_info;
> + enum radeon_family family;
> + enum ac_target_machine_options tm_options;
> + llvm::SmallString<0> CodeString;
> + llvm::raw_svector_ostream OStream;
Can we keep the naming style for variables consistent?
Otherwise this series is
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Thanks!
> + llvm::legacy::PassManager pass;
> +};
> +
> +/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
> +static thread_local std::list<ac_llvm_per_thread_info> ac_llvm_per_thread_list;
> +
> bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info,
> LLVMModuleRef M,
> char **ErrorMessage,
> LLVMMemoryBufferRef *OutMemBuf)
> {
> - return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, LLVMObjectFile,
> - ErrorMessage, OutMemBuf);
> + ac_llvm_per_thread_info *thread_info = nullptr;
> + if (info->thread_stored) {
> + for (auto &I : ac_llvm_per_thread_list) {
> + if (I.llvm_info.tm == info->tm) {
> + thread_info = &I;
> + break;
> + }
> + }
> +
> + if (!thread_info) {
> + assert(0);
> + return false;
> + }
> + } else {
> + return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, LLVMObjectFile,
> + ErrorMessage, OutMemBuf);
> + }
> +
> + llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(thread_info->llvm_info.tm);
> + llvm::Module *Mod = llvm::unwrap(M);
> + llvm::StringRef Data;
> +
> + Mod->setDataLayout(TM->createDataLayout());
> +
> + thread_info->pass.run(*Mod);
> +
> + Data = thread_info->OStream.str();
> + *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
> + thread_info->CodeString = "";
> + return false;
> +}
> +
> +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> + bool add_target_library_info,
> + enum radeon_family family,
> + enum ac_target_machine_options tm_options)
> +{
> + if (tm_options & AC_TM_THREAD_LLVM) {
> + for (auto &I : ac_llvm_per_thread_list) {
> + if (I.family == family &&
> + I.tm_options == tm_options) {
> + *info = I.llvm_info;
> + return true;
> + }
> + }
> +
> + ac_llvm_per_thread_list.emplace_back(family, tm_options);
> + ac_llvm_per_thread_info &tinfo = ac_llvm_per_thread_list.back();
> + if (!ac_llvm_compiler_init_internal(&tinfo.llvm_info,
> + true,
> + family,
> + tm_options))
> + return false;
> +
> + tinfo.llvm_info.thread_stored = true;
> + *info = tinfo.llvm_info;
> +
> + llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tinfo.llvm_info.tm);
> + if (TM->addPassesToEmitFile(tinfo.pass, tinfo.OStream,
> +#if HAVE_LLVM >= 0x0700
> + nullptr,
> +#endif
> + llvm::TargetMachine::CGFT_ObjectFile)) {
> + assert(0);
> + return false;
> + }
> + } else {
> + if (!ac_llvm_compiler_init_internal(info,
> + add_target_library_info,
> + family,
> + tm_options))
> + return false;
> + }
> + return true;
> +}
> +
> +void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
> +{
> + if (!info->thread_stored)
> + ac_llvm_compiler_dispose_internal(info);
> }
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index 7f8c5746b37..d8ec4ee0092 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -188,10 +188,10 @@ static LLVMPassManagerRef ac_init_passmgr(LLVMTargetLibraryInfoRef target_librar
> return passmgr;
> }
>
> -bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> - bool add_target_library_info,
> - enum radeon_family family,
> - enum ac_target_machine_options tm_options)
> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
> + bool add_target_library_info,
> + enum radeon_family family,
> + enum ac_target_machine_options tm_options)
> {
> memset(info, 0, sizeof(*info));
> info->tm = ac_create_target_machine(family, tm_options, &info->triple);
> @@ -223,7 +223,7 @@ fail:
> return false;
> }
>
> -void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info)
> {
> if (info->data_layout)
> LLVMDisposeMessage((char*)info->data_layout);
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 1ba972dab87..c579e6a9ec3 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -60,6 +60,7 @@ enum ac_target_machine_options {
> AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
> AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
> AC_TM_CHECK_IR = (1 << 5),
> + AC_TM_THREAD_LLVM = (1 << 6),
> };
>
> enum ac_float_mode {
> @@ -74,6 +75,7 @@ struct ac_llvm_compiler_info {
> LLVMTargetLibraryInfoRef target_library_info;
> const char *triple;
> const char *data_layout;
> + bool thread_stored;
> };
>
> const char *ac_get_llvm_processor_name(enum radeon_family family);
> @@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory)
> unsigned
> ac_count_scratch_private_memory(LLVMValueRef function);
>
> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
> + bool add_target_library_info, /* crash workaround */
> + enum radeon_family family,
> + enum ac_target_machine_options tm_options);
> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info);
> +
> bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> bool add_target_library_info, /* crash workaround */
> enum radeon_family family,
> @@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info,
> LLVMModuleRef M,
> char **ErrorMessage,
> LLVMMemoryBufferRef *OutMemBuf);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
> index f1b0dc26a63..9fe4c3b7404 100644
> --- a/src/amd/vulkan/radv_debug.h
> +++ b/src/amd/vulkan/radv_debug.h
> @@ -49,6 +49,7 @@ enum {
> RADV_DEBUG_ERRORS = 0x80000,
> RADV_DEBUG_STARTUP = 0x100000,
> RADV_DEBUG_CHECKIR = 0x200000,
> + RADV_DEBUG_NOTHREADLLVM = 0x400000,
> };
>
> enum {
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index ad3465f594e..73c48cef1f0 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {
> {"errors", RADV_DEBUG_ERRORS},
> {"startup", RADV_DEBUG_STARTUP},
> {"checkir", RADV_DEBUG_CHECKIR},
> + {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
> {NULL, 0}
> };
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 3bbb7867640..f7033aff771 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device,
> tm_options |= AC_TM_SISCHED;
> if (options->check_ir)
> tm_options |= AC_TM_CHECK_IR;
> + if (!(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM))
> + tm_options |= AC_TM_THREAD_LLVM;
>
> radv_init_llvm_once();
>
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list