[Mesa-dev] [PATCH] [rfc] ac/radv: using tls to store llvm related info and speed up compiles

Dave Airlie airlied at gmail.com
Tue Jun 26 20:04:29 UTC 2018


From: Dave Airlie <airlied at redhat.com>

I'd like to encourage people to test this to see if it helps (like
does it make app startup better or less hitching in dxvk).

The basic idea is to store a bunch of LLVM related data structs
in thread local storage so we can avoid reiniting them every time
we compile a shader. Since we know llvm objects aren't thread safe
it has to be stored using TLS to avoid any collisions.

This should remove all the fixed overheads setup costs of creating
the pass manager each time.

This takes a demo app time to compile the radv meta shaders on nocache
and exit from 1.7s to 1s.

TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS
works if you have radeonsi and radv loaded at the same time, if
they'll magically try and use the same tls stuff, in which case
this might explode all over the place.
---
 src/amd/common/ac_llvm_helper.cpp | 112 ++++++++++++++++++++++++++++--
 src/amd/common/ac_llvm_util.h     |  12 ++++
 src/amd/vulkan/radv_nir_to_llvm.c |  41 +++--------
 src/amd/vulkan/radv_shader.c      |  18 ++---
 4 files changed, 134 insertions(+), 49 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
index 1a2aee3bc9a..010ec6152b7 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,11 +31,15 @@
 
 #include "ac_llvm_util.h"
 #include <llvm-c/Core.h>
-#include <llvm/Target/TargetOptions.h>
-#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/IR/Attributes.h>
-#include <llvm/IR/CallSite.h>
+#include <llvm/Target/TargetMachine.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LegacyPassManager.h>
+
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/Scalar.h>
+#if HAVE_LLVM >= 0x0700
+#include <llvm-c/Transforms/Utils.h>
+#endif
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
@@ -87,3 +91,103 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 
 	return builder;
 }
+
+class ac_llvm_per_thread_info {
+public:
+	ac_llvm_per_thread_info() : OStream(CodeString) {}
+	~ac_llvm_per_thread_info() {
+		if (passmgr)
+			LLVMDisposePassManager(passmgr);
+
+		if (tm)
+			LLVMDisposeTargetMachine(tm);
+	}
+
+	void init_passmgr(bool check_ir) {
+		passmgr = LLVMCreatePassManager();
+		if (check_ir)
+			LLVMAddVerifierPass(passmgr);
+		/* This pass should eliminate all the load and store instructions */
+		LLVMAddPromoteMemoryToRegisterPass(passmgr);
+		/* Add some optimization passes */
+		LLVMAddScalarReplAggregatesPass(passmgr);
+		LLVMAddLICMPass(passmgr);
+		LLVMAddAggressiveDCEPass(passmgr);
+		LLVMAddCFGSimplificationPass(passmgr);
+		/* This is recommended by the instruction combining pass. */
+		LLVMAddEarlyCSEMemSSAPass(passmgr);
+		LLVMAddInstructionCombiningPass(passmgr);
+	}
+
+	LLVMTargetMachineRef tm;
+	llvm::SmallString<0> CodeString;
+	llvm::raw_svector_ostream OStream;
+	llvm::legacy::PassManager pass;
+	LLVMPassManagerRef passmgr;
+};
+
+static thread_local ac_llvm_per_thread_info ac_llvm_per_thread;
+
+static LLVMBool ac_target_machine_emit(LLVMModuleRef M,
+				       char **ErrorMessage,
+				       LLVMMemoryBufferRef *OutMemBuf)
+{
+	llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(ac_llvm_per_thread.tm);
+	llvm::Module *Mod = llvm::unwrap(M);
+	llvm::StringRef Data;
+
+	Mod->setDataLayout(TM->createDataLayout());
+
+	ac_llvm_per_thread.pass.run(*Mod);
+
+	Data = ac_llvm_per_thread.OStream.str();
+	*OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
+	ac_llvm_per_thread.CodeString = "";
+	return false;
+}
+
+bool ac_compile_to_memory_buffer(LLVMModuleRef M,
+				 char **ErrorMessage,
+				 LLVMMemoryBufferRef *OutMemBuf)
+{
+	return ac_target_machine_emit(M, ErrorMessage, OutMemBuf);
+}
+
+bool ac_llvm_init_per_thread_info(enum radeon_family family,
+				  enum ac_target_machine_options tm_options,
+				  LLVMTargetRef target,
+				  const char *triple,
+				  const char *features,
+				  bool check_ir,
+				  LLVMTargetMachineRef *tm)
+{
+	if (ac_llvm_per_thread.tm) {
+		*tm = ac_llvm_per_thread.tm;
+		return true;
+	}
+
+	ac_llvm_per_thread.tm = LLVMCreateTargetMachine(
+	                             target,
+	                             triple,
+	                             ac_get_llvm_processor_name(family),
+				     features,
+	                             LLVMCodeGenLevelDefault,
+	                             LLVMRelocDefault,
+	                             LLVMCodeModelDefault);
+
+	*tm = ac_llvm_per_thread.tm;
+
+	llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(ac_llvm_per_thread.tm);
+	if (TM->addPassesToEmitFile(ac_llvm_per_thread.pass, ac_llvm_per_thread.OStream, nullptr, llvm::TargetMachine::CGFT_ObjectFile)) {
+		assert(0);
+		return false;
+	}
+
+	ac_llvm_per_thread.init_passmgr(check_ir);
+	return true;
+}
+
+LLVMPassManagerRef ac_llvm_get_passmgr(void)
+{
+	return ac_llvm_per_thread.passmgr;
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 0aa803c5bc1..fd9fcb4ec8c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -110,6 +110,18 @@ ac_get_store_intr_attribs(bool writeonly_memory)
 unsigned
 ac_count_scratch_private_memory(LLVMValueRef function);
 
+bool ac_compile_to_memory_buffer(LLVMModuleRef M,
+				 char **ErrorMessage,
+				 LLVMMemoryBufferRef *OutMemBuf);
+
+bool ac_llvm_init_per_thread_info(enum radeon_family family,
+				  enum ac_target_machine_options tm_options,
+				  LLVMTargetRef target,
+				  const char *triple,
+				  const char *features,
+				  bool check_ir,
+				  LLVMTargetMachineRef *tm);
+LLVMPassManagerRef ac_llvm_get_passmgr(void);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index cd8d86603bc..6a7a36b7ca5 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2995,34 +2995,12 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
 static void ac_llvm_finalize_module(struct radv_shader_context *ctx,
 				    const struct radv_nir_compiler_options *options)
 {
-	LLVMPassManagerRef passmgr;
-	/* Create the pass manager */
-	passmgr = LLVMCreateFunctionPassManagerForModule(
-							ctx->ac.module);
-
-	if (options->check_ir)
-		LLVMAddVerifierPass(passmgr);
-
-	/* This pass should eliminate all the load and store instructions */
-	LLVMAddPromoteMemoryToRegisterPass(passmgr);
-
-	/* Add some optimization passes */
-	LLVMAddScalarReplAggregatesPass(passmgr);
-	LLVMAddLICMPass(passmgr);
-	LLVMAddAggressiveDCEPass(passmgr);
-	LLVMAddCFGSimplificationPass(passmgr);
-	/* This is recommended by the instruction combining pass. */
-	LLVMAddEarlyCSEMemSSAPass(passmgr);
-	LLVMAddInstructionCombiningPass(passmgr);
+	LLVMPassManagerRef passmgr = ac_llvm_get_passmgr();
 
 	/* Run the pass */
-	LLVMInitializeFunctionPassManager(passmgr);
-	LLVMRunFunctionPassManager(passmgr, ctx->main_function);
-	LLVMFinalizeFunctionPassManager(passmgr);
+	LLVMRunPassManager(passmgr, ctx->ac.module);
 
 	LLVMDisposeBuilder(ctx->ac.builder);
-	LLVMDisposePassManager(passmgr);
-
 	ac_llvm_context_dispose(&ctx->ac);
 }
 
@@ -3356,8 +3334,7 @@ static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 }
 
 static unsigned ac_llvm_compile(LLVMModuleRef M,
-                                struct ac_shader_binary *binary,
-                                LLVMTargetMachineRef tm)
+                                struct ac_shader_binary *binary)
 {
 	unsigned retval = 0;
 	char *err;
@@ -3374,8 +3351,7 @@ static unsigned ac_llvm_compile(LLVMModuleRef M,
 	                                &retval);
 
 	/* Compile IR*/
-	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
-	                                              &err, &out_buffer);
+	mem_err = ac_compile_to_memory_buffer(M, &err, &out_buffer);
 
 	/* Process Errors/Warnings */
 	if (mem_err) {
@@ -3398,8 +3374,7 @@ out:
 	return retval;
 }
 
-static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
-				   LLVMModuleRef llvm_module,
+static void ac_compile_llvm_module(LLVMModuleRef llvm_module,
 				   struct ac_shader_binary *binary,
 				   struct ac_shader_config *config,
 				   struct radv_shader_variant_info *shader_info,
@@ -3417,7 +3392,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
 		LLVMDisposeMessage(llvm_ir);
 	}
 
-	int v = ac_llvm_compile(llvm_module, binary, tm);
+	int v = ac_llvm_compile(llvm_module, binary);
 	if (v) {
 		fprintf(stderr, "compile failed\n");
 	}
@@ -3541,7 +3516,7 @@ radv_compile_nir_shader(LLVMTargetMachineRef tm,
 	llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
 	                                       options);
 
-	ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info,
+	ac_compile_llvm_module(llvm_module, binary, config, shader_info,
 			       nir[0]->info.stage, options);
 
 	for (int i = 0; i < nir_count; ++i)
@@ -3647,6 +3622,6 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm,
 
 	ac_llvm_finalize_module(&ctx, options);
 
-	ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info,
+	ac_compile_llvm_module(ctx.ac.module, binary, config, shader_info,
 			       MESA_SHADER_VERTEX, options);
 }
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 03185de304a..ab826761007 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -524,6 +524,7 @@ static LLVMTargetRef radv_get_llvm_target(const char *triple)
 
 static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family family,
 						       enum ac_target_machine_options tm_options,
+						       bool check_ir,
 						       const char **out_triple)
 {
 	assert(family >= CHIP_TAHITI);
@@ -538,15 +539,9 @@ static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family family
 		 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
 		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
 
-	LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
-	                             target,
-	                             triple,
-	                             ac_get_llvm_processor_name(family),
-				     features,
-	                             LLVMCodeGenLevelDefault,
-	                             LLVMRelocDefault,
-	                             LLVMCodeModelDefault);
-
+	LLVMTargetMachineRef tm;
+	ac_llvm_init_per_thread_info(family, tm_options, target, triple, features, check_ir,
+				     &tm);
 	if (out_triple)
 		*out_triple = triple;
 	return tm;
@@ -587,7 +582,8 @@ shader_variant_create(struct radv_device *device,
 		tm_options |= AC_TM_SUPPORTS_SPILL;
 	if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
 		tm_options |= AC_TM_SISCHED;
-	tm = radv_create_target_machine(chip_family, tm_options, NULL);
+
+	tm = radv_create_target_machine(chip_family, tm_options, options->check_ir, NULL);
 
 	if (gs_copy_shader) {
 		assert(shader_count == 1);
@@ -600,8 +596,6 @@ shader_variant_create(struct radv_device *device,
 					options);
 	}
 
-	LLVMDisposeTargetMachine(tm);
-
 	radv_fill_shader_variant(device, variant, &binary, stage);
 
 	if (code_out) {
-- 
2.17.1



More information about the mesa-dev mailing list