<div dir="ltr">Hi Dave,<div><br></div><div>I did a quick test with this on Rise of the Tomb Raider. It reduced the time taken to create all pipelines for the whole game <span style="font-size:small;background-color:rgb(255,255,255);text-decoration-style:initial;text-decoration-color:initial;float:none;display:inline">over 8 threads </span>(with RADV_DEBUG=nocache) from 12m24s to 11m35s. Nice improvement :)</div><div><br></div><div>Also didn't see any issues, so:</div><div><br></div><div>Tested-by: Alex Smith <<a href="mailto:asmith@feralinteractive.com">asmith@feralinteractive.com</a>></div><div><br></div><div>Thanks,</div><div>Alex</div></div><div class="gmail_extra"><br><div class="gmail_quote">On 27 June 2018 at 04:58, Dave Airlie <span dir="ltr"><<a href="mailto:airlied@gmail.com" target="_blank">airlied@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
<br>
I'd like to encourage people to test this to see if it helps (like<br>
does it make app startup better or less hitching in dxvk).<br>
<br>
The basic idea is to store a bunch of LLVM related data structs<br>
in thread local storage so we can avoid reiniting them every time<br>
we compile a shader. Since we know llvm objects aren't thread safe<br>
it has to be stored using TLS to avoid any collisions.<br>
<br>
This should remove all the fixed overheads setup costs of creating<br>
the pass manager each time.<br>
<br>
This takes a demo app time to compile the radv meta shaders on nocache<br>
and exit from 1.7s to 1s.<br>
<br>
TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS<br>
works if you have radeonsi and radv loaded at the same time, if<br>
they'll magically try and use the same tls stuff, in which case<br>
this might explode all over the place.<br>
<br>
v2: fix llvm6 build, inline emit function, handle multiple targets<br>
in one thread<br>
v3: rebase and port onto new structure<br>
---<br>
src/amd/common/ac_llvm_helper.<wbr>cpp | 120 ++++++++++++++++++++++++++++--<br>
src/amd/common/ac_llvm_util.c | 10 +--<br>
src/amd/common/ac_llvm_util.h | 9 +++<br>
src/amd/vulkan/radv_debug.h | 1 +<br>
src/amd/vulkan/radv_device.c | 1 +<br>
src/amd/vulkan/radv_shader.c | 2 +<br>
6 files changed, 132 insertions(+), 11 deletions(-)<br>
<br>
diff --git a/src/amd/common/ac_llvm_<wbr>helper.cpp b/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
index 27403dbe085..f1f1399b3fb 100644<br>
--- a/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
+++ b/src/amd/common/ac_llvm_<wbr>helper.cpp<br>
@@ -31,12 +31,21 @@<br>
<br>
#include "ac_llvm_util.h"<br>
#include <llvm-c/Core.h><br>
-#include <llvm/Target/TargetOptions.h><br>
-#include <llvm/ExecutionEngine/<wbr>ExecutionEngine.h><br>
-#include <llvm/IR/Attributes.h><br>
-#include <llvm/IR/CallSite.h><br>
+#include <llvm/Target/TargetMachine.h><br>
#include <llvm/IR/IRBuilder.h><br>
#include <llvm/Analysis/<wbr>TargetLibraryInfo.h><br>
+#include <llvm/IR/LegacyPassManager.h><br>
+<br>
+#include <llvm-c/Transforms/IPO.h><br>
+#include <llvm-c/Transforms/Scalar.h><br>
+#if HAVE_LLVM >= 0x0700<br>
+#include <llvm-c/Transforms/Utils.h><br>
+#endif<br>
+<br>
+#if HAVE_LLVM < 0x0700<br>
+#include "llvm/Support/raw_ostream.h"<br>
+#endif<br>
+#include <list><br>
<br>
void ac_add_attr_dereferenceable(<wbr>LLVMValueRef val, uint64_t bytes)<br>
{<br>
@@ -101,11 +110,110 @@ ac_dispose_target_library_<wbr>info(LLVMTargetLibraryInfoRef library_info)<br>
delete reinterpret_cast<llvm::<wbr>TargetLibraryInfoImpl *>(library_info);<br>
}<br>
<br>
+class ac_llvm_per_thread_info {<br>
+public:<br>
+ ac_llvm_per_thread_info(enum radeon_family arg_family,<br>
+ enum ac_target_machine_options arg_tm_options)<br>
+ : family(arg_family), tm_options(arg_tm_options),<br>
+ OStream(CodeString) {}<br>
+ ~ac_llvm_per_thread_info() {<br>
+ ac_llvm_compiler_dispose_<wbr>internal(&llvm_info);<br>
+ }<br>
+<br>
+ struct ac_llvm_compiler_info llvm_info;<br>
+ enum radeon_family family;<br>
+ enum ac_target_machine_options tm_options;<br>
+ llvm::SmallString<0> CodeString;<br>
+ llvm::raw_svector_ostream OStream;<br>
+ llvm::legacy::PassManager pass;<br>
+};<br>
+<br>
+/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */<br>
+static thread_local std::list<ac_llvm_per_thread_<wbr>info> ac_llvm_per_thread_list;<br>
+<br>
bool ac_compile_to_memory_buffer(<wbr>struct ac_llvm_compiler_info *info,<br>
LLVMModuleRef M,<br>
char **ErrorMessage,<br>
LLVMMemoryBufferRef *OutMemBuf)<br>
{<br>
- return LLVMTargetMachineEmitToMemoryB<wbr>uffer(info->tm, M, LLVMObjectFile,<br>
- ErrorMessage, OutMemBuf);<br>
+ ac_llvm_per_thread_info *thread_info = nullptr;<br>
+ if (info->thread_stored) {<br>
+ for (auto &I : ac_llvm_per_thread_list) {<br>
+ if (<a href="http://I.llvm_info.tm" rel="noreferrer" target="_blank">I.llvm_info.tm</a> == info->tm) {<br>
+ thread_info = &I;<br>
+ break;<br>
+ }<br>
+ }<br>
+<br>
+ if (!thread_info) {<br>
+ assert(0);<br>
+ return false;<br>
+ }<br>
+ } else {<br>
+ return LLVMTargetMachineEmitToMemoryB<wbr>uffer(info->tm, M, LLVMObjectFile,<br>
+ ErrorMessage, OutMemBuf);<br>
+ }<br>
+<br>
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::<wbr>TargetMachine*>(thread_info-><a href="http://llvm_info.tm" rel="noreferrer" target="_blank">l<wbr>lvm_info.tm</a>);<br>
+ llvm::Module *Mod = llvm::unwrap(M);<br>
+ llvm::StringRef Data;<br>
+<br>
+ Mod->setDataLayout(TM-><wbr>createDataLayout());<br>
+<br>
+ thread_info->pass.run(*Mod);<br>
+<br>
+ Data = thread_info->OStream.str();<br>
+ *OutMemBuf = LLVMCreateMemoryBufferWithMemo<wbr>ryRangeCopy(Data.data(), Data.size(), "");<br>
+ thread_info->CodeString = "";<br>
+ return false;<br>
+}<br>
+<br>
+bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
+ bool add_target_library_info,<br>
+ enum radeon_family family,<br>
+ enum ac_target_machine_options tm_options)<br>
+{<br>
+ if (tm_options & AC_TM_THREAD_LLVM) {<br>
+ for (auto &I : ac_llvm_per_thread_list) {<br>
+ if (I.family == family &&<br>
+ I.tm_options == tm_options) {<br>
+ *info = I.llvm_info;<br>
+ return true;<br>
+ }<br>
+ }<br>
+<br>
+ ac_llvm_per_thread_list.<wbr>emplace_back(family, tm_options);<br>
+ ac_llvm_per_thread_info &tinfo = ac_llvm_per_thread_list.back()<wbr>;<br>
+ if (!ac_llvm_compiler_init_<wbr>internal(&tinfo.llvm_info,<br>
+ true,<br>
+ family,<br>
+ tm_options))<br>
+ return false;<br>
+<br>
+ tinfo.llvm_info.thread_stored = true;<br>
+ *info = tinfo.llvm_info;<br>
+<br>
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::<wbr>TargetMachine*>(<a href="http://tinfo.llvm_info.tm" rel="noreferrer" target="_blank">tinfo.llvm_<wbr>info.tm</a>);<br>
+ if (TM->addPassesToEmitFile(<wbr>tinfo.pass, tinfo.OStream,<br>
+#if HAVE_LLVM >= 0x0700<br>
+ nullptr,<br>
+#endif<br>
+ llvm::TargetMachine::CGFT_<wbr>ObjectFile)) {<br>
+ assert(0);<br>
+ return false;<br>
+ }<br>
+ } else {<br>
+ if (!ac_llvm_compiler_init_<wbr>internal(info,<br>
+ add_target_library_info,<br>
+ family,<br>
+ tm_options))<br>
+ return false;<br>
+ }<br>
+ return true;<br>
+}<br>
+<br>
+void ac_llvm_compiler_dispose(<wbr>struct ac_llvm_compiler_info *info)<br>
+{<br>
+ if (!info->thread_stored)<br>
+ ac_llvm_compiler_dispose_<wbr>internal(info);<br>
}<br>
diff --git a/src/amd/common/ac_llvm_util.<wbr>c b/src/amd/common/ac_llvm_util.<wbr>c<br>
index 7f8c5746b37..d8ec4ee0092 100644<br>
--- a/src/amd/common/ac_llvm_util.<wbr>c<br>
+++ b/src/amd/common/ac_llvm_util.<wbr>c<br>
@@ -188,10 +188,10 @@ static LLVMPassManagerRef ac_init_passmgr(<wbr>LLVMTargetLibraryInfoRef target_librar<br>
return passmgr;<br>
}<br>
<br>
-bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
- bool add_target_library_info,<br>
- enum radeon_family family,<br>
- enum ac_target_machine_options tm_options)<br>
+bool ac_llvm_compiler_init_<wbr>internal(struct ac_llvm_compiler_info *info,<br>
+ bool add_target_library_info,<br>
+ enum radeon_family family,<br>
+ enum ac_target_machine_options tm_options)<br>
{<br>
memset(info, 0, sizeof(*info));<br>
info->tm = ac_create_target_machine(<wbr>family, tm_options, &info->triple);<br>
@@ -223,7 +223,7 @@ fail:<br>
return false;<br>
}<br>
<br>
-void ac_llvm_compiler_dispose(<wbr>struct ac_llvm_compiler_info *info)<br>
+void ac_llvm_compiler_dispose_<wbr>internal(struct ac_llvm_compiler_info *info)<br>
{<br>
if (info->data_layout)<br>
LLVMDisposeMessage((char*)<wbr>info->data_layout);<br>
diff --git a/src/amd/common/ac_llvm_util.<wbr>h b/src/amd/common/ac_llvm_util.<wbr>h<br>
index 1ba972dab87..c579e6a9ec3 100644<br>
--- a/src/amd/common/ac_llvm_util.<wbr>h<br>
+++ b/src/amd/common/ac_llvm_util.<wbr>h<br>
@@ -60,6 +60,7 @@ enum ac_target_machine_options {<br>
AC_TM_FORCE_DISABLE_XNACK = (1 << 3),<br>
AC_TM_PROMOTE_ALLOCA_TO_<wbr>SCRATCH = (1 << 4),<br>
AC_TM_CHECK_IR = (1 << 5),<br>
+ AC_TM_THREAD_LLVM = (1 << 6),<br>
};<br>
<br>
enum ac_float_mode {<br>
@@ -74,6 +75,7 @@ struct ac_llvm_compiler_info {<br>
LLVMTargetLibraryInfoRef target_library_info;<br>
const char *triple;<br>
const char *data_layout;<br>
+ bool thread_stored;<br>
};<br>
<br>
const char *ac_get_llvm_processor_name(<wbr>enum radeon_family family);<br>
@@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory)<br>
unsigned<br>
ac_count_scratch_private_<wbr>memory(LLVMValueRef function);<br>
<br>
+bool ac_llvm_compiler_init_<wbr>internal(struct ac_llvm_compiler_info *info,<br>
+ bool add_target_library_info, /* crash workaround */<br>
+ enum radeon_family family,<br>
+ enum ac_target_machine_options tm_options);<br>
+void ac_llvm_compiler_dispose_<wbr>internal(struct ac_llvm_compiler_info *info);<br>
+<br>
bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,<br>
bool add_target_library_info, /* crash workaround */<br>
enum radeon_family family,<br>
@@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(<wbr>struct ac_llvm_compiler_info *info,<br>
LLVMModuleRef M,<br>
char **ErrorMessage,<br>
LLVMMemoryBufferRef *OutMemBuf);<br>
+<br>
#ifdef __cplusplus<br>
}<br>
#endif<br>
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h<br>
index f1b0dc26a63..9fe4c3b7404 100644<br>
--- a/src/amd/vulkan/radv_debug.h<br>
+++ b/src/amd/vulkan/radv_debug.h<br>
@@ -49,6 +49,7 @@ enum {<br>
RADV_DEBUG_ERRORS = 0x80000,<br>
RADV_DEBUG_STARTUP = 0x100000,<br>
RADV_DEBUG_CHECKIR = 0x200000,<br>
+ RADV_DEBUG_NOTHREADLLVM = 0x400000,<br>
};<br>
<br>
enum {<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index ad3465f594e..73c48cef1f0 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {<br>
{"errors", RADV_DEBUG_ERRORS},<br>
{"startup", RADV_DEBUG_STARTUP},<br>
{"checkir", RADV_DEBUG_CHECKIR},<br>
+ {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},<br>
{NULL, 0}<br>
};<br>
<br>
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c<br>
index 3bbb7867640..f7033aff771 100644<br>
--- a/src/amd/vulkan/radv_shader.c<br>
+++ b/src/amd/vulkan/radv_shader.c<br>
@@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device,<br>
tm_options |= AC_TM_SISCHED;<br>
if (options->check_ir)<br>
tm_options |= AC_TM_CHECK_IR;<br>
+ if (!(device->instance->debug_<wbr>flags & RADV_DEBUG_NOTHREADLLVM))<br>
+ tm_options |= AC_TM_THREAD_LLVM;<br>
<br>
radv_init_llvm_once();<br>
<span class="HOEnZb"><font color="#888888"> <br>
-- <br>
2.17.1<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>