<div dir="ltr">Hi Dave,<div><br></div><div>I did a quick test with this on Rise of the Tomb Raider. It reduced the time taken to create all pipelines for the whole game <span style="font-size:small;background-color:rgb(255,255,255);text-decoration-style:initial;text-decoration-color:initial;float:none;display:inline">over 8 threads </span>(with RADV_DEBUG=nocache) from 12m24s to 11m35s. Nice improvement :)</div><div><br></div><div>Also didn't see any issues, so:</div><div><br></div><div>Tested-by: Alex Smith <<a href="mailto:asmith@feralinteractive.com">asmith@feralinteractive.com</a>></div><div><br></div><div>Thanks,</div><div>Alex</div></div><div class="gmail_extra"><br><div class="gmail_quote">On 27 June 2018 at 04:58, Dave Airlie <span dir="ltr"><<a href="mailto:airlied@gmail.com" target="_blank">airlied@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
<br>
I'd like to encourage people to test this to see if it helps (like<br>
does it make app startup better or less hitching in dxvk).<br>
<br>
The basic idea is to store a bunch of LLVM related data structs<br>
in thread local storage so we can avoid reiniting them every time<br>
we compile a shader. Since we know llvm objects aren't thread safe<br>
it has to be stored using TLS to avoid any collisions.<br>
<br>
This should remove all the fixed overheads setup costs of creating<br>
the pass manager each time.<br>
<br>
This takes a demo app time to compile the radv meta shaders on nocache<br>
and exit from 1.7s to 1s.<br>
<br>
TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS<br>
works if you have radeonsi and radv loaded at the same time, if<br>
they'll magically try and use the same tls stuff, in which case<br>
this might explode all over the place.<br>
<br>
v2: fix llvm6 build, inline emit function, handle multiple targets<br>
in one thread<br>
v3: rebase and port onto new structure<br>
---<br>
 src/amd/common/ac_llvm_helper.<wbr>cpp | 120 ++++++++++++++++++++++++++++--<br>
 src/amd/common/ac_llvm_util.c     |  10 +--<br>
 src/amd/common/ac_llvm_util.h     |   9 +++<br>
 src/amd/vulkan/radv_debug.h       |   1 +<br>
 src/amd/vulkan/radv_device.c      |   1 +<br>
 src/amd/vulkan/radv_shader.c      |   2 +<br>
 6 files changed, 132 insertions(+), 11 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_llvm_<wbr>helper.cpp b/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
index 27403dbe085..f1f1399b3fb 100644<br>
--- a/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
+++ b/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
@@ -31,12 +31,21 @@<br>
<br>
 #include "ac_llvm_util.h"<br>
 #include <llvm-c/Core.h><br>
-#include <llvm/Target/TargetOptions.h><br>
-#include <llvm/ExecutionEngine/<wbr>ExecutionEngine.h><br>
-#include <llvm/IR/Attributes.h><br>
-#include <llvm/IR/CallSite.h><br>
+#include <llvm/Target/TargetMachine.h><br>
 #include <llvm/IR/IRBuilder.h><br>
 #include <llvm/Analysis/<wbr>TargetLibraryInfo.h><br>
+#include <llvm/IR/LegacyPassManager.h><br>
+<br>
+#include <llvm-c/Transforms/IPO.h><br>
+#include <llvm-c/Transforms/Scalar.h><br>
+#if HAVE_LLVM >= 0x0700<br>
+#include <llvm-c/Transforms/Utils.h><br>
+#endif<br>
+<br>
+#if HAVE_LLVM < 0x0700<br>
+#include "llvm/Support/raw_ostream.h"<br>
+#endif<br>
+#include <list><br>
<br>
 void ac_add_attr_dereferenceable(<wbr>LLVMValueRef val, uint64_t bytes)<br>
 {<br>
@@ -101,11 +110,110 @@ ac_dispose_target_library_<wbr>info(LLVMTargetLibraryInfoRef library_info)<br>
        delete reinterpret_cast<llvm::<wbr>TargetLibraryInfoImpl *>(library_info);<br>
 }<br>
<br>
+class ac_llvm_per_thread_info {<br>
+public:<br>
+       ac_llvm_per_thread_info(enum radeon_family arg_family,<br>
+                               enum ac_target_machine_options arg_tm_options)<br>
+               : family(arg_family), tm_options(arg_tm_options),<br>
+                 OStream(CodeString) {}<br>
+       ~ac_llvm_per_thread_info() {<br>
+               ac_llvm_compiler_dispose_<wbr>internal(&llvm_info);<br>
+       }<br>
+<br>
+       struct ac_llvm_compiler_info llvm_info;<br>
+       enum radeon_family family;<br>
+       enum ac_target_machine_options tm_options;<br>
+       llvm::SmallString<0> CodeString;<br>
+       llvm::raw_svector_ostream OStream;<br>
+       llvm::legacy::PassManager pass;<br>
+};<br>
+<br>
+/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */<br>
+static thread_local std::list<ac_llvm_per_thread_<wbr>info> ac_llvm_per_thread_list;<br>
+<br>
 bool ac_compile_to_memory_buffer(<wbr>struct ac_llvm_compiler_info *info,<br>
                                 LLVMModuleRef M,<br>
                                 char **ErrorMessage,<br>
                                 LLVMMemoryBufferRef *OutMemBuf)<br>
 {<br>
-       return LLVMTargetMachineEmitToMemoryB<wbr>uffer(info->tm, M, LLVMObjectFile,<br>
-                                                  ErrorMessage, OutMemBuf);<br>
+       ac_llvm_per_thread_info *thread_info = nullptr;<br>
+       if (info->thread_stored) {<br>
+               for (auto &I : ac_llvm_per_thread_list) {<br>
+                       if (<a href="http://I.llvm_info.tm" rel="noreferrer" target="_blank">I.llvm_info.tm</a> == info->tm) {<br>
+                               thread_info = &I;<br>
+                               break;<br>
+                       }<br>
+               }<br>
+<br>
+               if (!thread_info) {<br>
+                       assert(0);<br>
+                       return false;<br>
+               }<br>
+       } else {<br>
+               return LLVMTargetMachineEmitToMemoryB<wbr>uffer(info->tm, M, LLVMObjectFile,<br>
+                                                          ErrorMessage, OutMemBuf);<br>
+       }<br>
+<br>
+       llvm::TargetMachine *TM = reinterpret_cast<llvm::<wbr>TargetMachine*>(thread_info-><a href="http://llvm_info.tm" rel="noreferrer" target="_blank">l<wbr>lvm_info.tm</a>);<br>
+       llvm::Module *Mod = llvm::unwrap(M);<br>
+       llvm::StringRef Data;<br>
+<br>
+       Mod->setDataLayout(TM-><wbr>createDataLayout());<br>
+<br>
+       thread_info->pass.run(*Mod);<br>
+<br>
+       Data = thread_info->OStream.str();<br>
+       *OutMemBuf = LLVMCreateMemoryBufferWithMemo<wbr>ryRangeCopy(Data.data(), Data.size(), "");<br>
+       thread_info->CodeString = "";<br>
+       return false;<br>
+}<br>
+<br>
+bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
+                          bool add_target_library_info,<br>
+                          enum radeon_family family,<br>
+                          enum ac_target_machine_options tm_options)<br>
+{<br>
+       if (tm_options & AC_TM_THREAD_LLVM) {<br>
+               for (auto &I : ac_llvm_per_thread_list) {<br>
+                       if (I.family == family &&<br>
+                           I.tm_options == tm_options) {<br>
+                               *info = I.llvm_info;<br>
+                               return true;<br>
+                       }<br>
+               }<br>
+<br>
+               ac_llvm_per_thread_list.<wbr>emplace_back(family, tm_options);<br>
+               ac_llvm_per_thread_info &tinfo = ac_llvm_per_thread_list.back()<wbr>;<br>
+               if (!ac_llvm_compiler_init_<wbr>internal(&tinfo.llvm_info,<br>
+                                                   true,<br>
+                                                   family,<br>
+                                                   tm_options))<br>
+                       return false;<br>
+<br>
+               tinfo.llvm_info.thread_stored = true;<br>
+               *info = tinfo.llvm_info;<br>
+<br>
+               llvm::TargetMachine *TM = reinterpret_cast<llvm::<wbr>TargetMachine*>(<a href="http://tinfo.llvm_info.tm" rel="noreferrer" target="_blank">tinfo.llvm_<wbr>info.tm</a>);<br>
+               if (TM->addPassesToEmitFile(<wbr>tinfo.pass, tinfo.OStream,<br>
+#if HAVE_LLVM >= 0x0700<br>
+                                           nullptr,<br>
+#endif<br>
+                                       llvm::TargetMachine::CGFT_<wbr>ObjectFile)) {<br>
+                       assert(0);<br>
+                       return false;<br>
+               }<br>
+       } else {<br>
+               if (!ac_llvm_compiler_init_<wbr>internal(info,<br>
+                                                   add_target_library_info,<br>
+                                                   family,<br>
+                                                   tm_options))<br>
+                       return false;<br>
+       }<br>
+       return true;<br>
+}<br>
+<br>
+void ac_llvm_compiler_dispose(<wbr>struct ac_llvm_compiler_info *info)<br>
+{<br>
+       if (!info->thread_stored)<br>
+               ac_llvm_compiler_dispose_<wbr>internal(info);<br>
 }<br>
diff --git a/src/amd/common/ac_llvm_util.<wbr>c b/src/amd/common/ac_llvm_util.<wbr>c<br>
index 7f8c5746b37..d8ec4ee0092 100644<br>
--- a/src/amd/common/ac_llvm_util.<wbr>c<br>
+++ b/src/amd/common/ac_llvm_util.<wbr>c<br>
@@ -188,10 +188,10 @@ static LLVMPassManagerRef ac_init_passmgr(<wbr>LLVMTargetLibraryInfoRef target_librar<br>
        return passmgr;<br>
 }<br>
<br>
-bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
-                          bool add_target_library_info,<br>
-                          enum radeon_family family,<br>
-                          enum ac_target_machine_options tm_options)<br>
+bool ac_llvm_compiler_init_<wbr>internal(struct ac_llvm_compiler_info *info,<br>
+                                   bool add_target_library_info,<br>
+                                   enum radeon_family family,<br>
+                                   enum ac_target_machine_options tm_options)<br>
 {<br>
        memset(info, 0, sizeof(*info));<br>
        info->tm = ac_create_target_machine(<wbr>family, tm_options, &info->triple);<br>
@@ -223,7 +223,7 @@ fail:<br>
        return false;<br>
 }<br>
<br>
-void ac_llvm_compiler_dispose(<wbr>struct ac_llvm_compiler_info *info)<br>
+void ac_llvm_compiler_dispose_<wbr>internal(struct ac_llvm_compiler_info *info)<br>
 {<br>
        if (info->data_layout)<br>
                LLVMDisposeMessage((char*)<wbr>info->data_layout);<br>
diff --git a/src/amd/common/ac_llvm_util.<wbr>h b/src/amd/common/ac_llvm_util.<wbr>h<br>
index 1ba972dab87..c579e6a9ec3 100644<br>
--- a/src/amd/common/ac_llvm_util.<wbr>h<br>
+++ b/src/amd/common/ac_llvm_util.<wbr>h<br>
@@ -60,6 +60,7 @@ enum ac_target_machine_options {<br>
        AC_TM_FORCE_DISABLE_XNACK = (1 << 3),<br>
        AC_TM_PROMOTE_ALLOCA_TO_<wbr>SCRATCH = (1 << 4),<br>
        AC_TM_CHECK_IR = (1 << 5),<br>
+       AC_TM_THREAD_LLVM = (1 << 6),<br>
 };<br>
<br>
 enum ac_float_mode {<br>
@@ -74,6 +75,7 @@ struct ac_llvm_compiler_info {<br>
        LLVMTargetLibraryInfoRef target_library_info;<br>
        const char *triple;<br>
        const char *data_layout;<br>
+       bool thread_stored;<br>
 };<br>
<br>
 const char *ac_get_llvm_processor_name(<wbr>enum radeon_family family);<br>
@@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory)<br>
 unsigned<br>
 ac_count_scratch_private_<wbr>memory(LLVMValueRef function);<br>
<br>
+bool ac_llvm_compiler_init_<wbr>internal(struct ac_llvm_compiler_info *info,<br>
+                                   bool add_target_library_info, /* crash workaround */<br>
+                                   enum radeon_family family,<br>
+                                   enum ac_target_machine_options tm_options);<br>
+void ac_llvm_compiler_dispose_<wbr>internal(struct ac_llvm_compiler_info *info);<br>
+<br>
 bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
                           bool add_target_library_info, /* crash workaround */<br>
                           enum radeon_family family,<br>
@@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(<wbr>struct ac_llvm_compiler_info *info,<br>
                                 LLVMModuleRef M,<br>
                                 char **ErrorMessage,<br>
                                 LLVMMemoryBufferRef *OutMemBuf);<br>
+<br>
 #ifdef __cplusplus<br>
 }<br>
 #endif<br>
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h<br>
index f1b0dc26a63..9fe4c3b7404 100644<br>
--- a/src/amd/vulkan/radv_debug.h<br>
+++ b/src/amd/vulkan/radv_debug.h<br>
@@ -49,6 +49,7 @@ enum {<br>
        RADV_DEBUG_ERRORS            = 0x80000,<br>
        RADV_DEBUG_STARTUP           = 0x100000,<br>
        RADV_DEBUG_CHECKIR           = 0x200000,<br>
+       RADV_DEBUG_NOTHREADLLVM      = 0x400000,<br>
 };<br>
<br>
 enum {<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index ad3465f594e..73c48cef1f0 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {<br>
        {"errors", RADV_DEBUG_ERRORS},<br>
        {"startup", RADV_DEBUG_STARTUP},<br>
        {"checkir", RADV_DEBUG_CHECKIR},<br>
+       {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},<br>
        {NULL, 0}<br>
 };<br>
<br>
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c<br>
index 3bbb7867640..f7033aff771 100644<br>
--- a/src/amd/vulkan/radv_shader.c<br>
+++ b/src/amd/vulkan/radv_shader.c<br>
@@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device,<br>
                tm_options |= AC_TM_SISCHED;<br>
        if (options->check_ir)<br>
                tm_options |= AC_TM_CHECK_IR;<br>
+       if (!(device->instance->debug_<wbr>flags & RADV_DEBUG_NOTHREADLLVM))<br>
+               tm_options |= AC_TM_THREAD_LLVM;<br>
<br>
        radv_init_llvm_once();<br>
<span class="HOEnZb"><font color="#888888"> <br>
-- <br>
2.17.1<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>