[Mesa-dev] [PATCH] ac: use amdgpu-flat-work-group-size

Marek Olšák maraeo at gmail.com
Fri May 31 20:08:17 UTC 2019


From: Marek Olšák <marek.olsak at amd.com>

---
 src/amd/common/ac_llvm_util.c            | 10 ++++++++++
 src/amd/common/ac_llvm_util.h            |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c        |  7 ++-----
 src/gallium/drivers/radeonsi/si_shader.c |  7 ++-----
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 5b701603ebb..c8a8bf146fe 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -262,20 +262,30 @@ ac_dump_module(LLVMModuleRef module)
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
 				     const char *name, unsigned value)
 {
 	char str[16];
 
 	snprintf(str, sizeof(str), "0x%x", value);
 	LLVMAddTargetDependentFunctionAttr(F, name, str);
 }
 
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
+{
+	if (!size)
+		return;
+
+	char str[32];
+	snprintf(str, sizeof(str), "%u,%u", size, size);
+	LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
+}
+
 unsigned
 ac_count_scratch_private_memory(LLVMValueRef function)
 {
 	unsigned private_mem_vgprs = 0;
 
 	/* Process all LLVM instructions. */
 	LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
 	while (bb) {
 		LLVMValueRef next = LLVMGetFirstInstruction(bb);
 
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index ca00540da80..18102be5207 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -102,20 +102,21 @@ void ac_dump_module(LLVMModuleRef module);
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
 bool ac_llvm_is_function(LLVMValueRef v);
 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
 
 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 				 enum ac_float_mode float_mode);
 
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
 				     const char *name, unsigned value);
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
 
 static inline unsigned
 ac_get_load_intr_attribs(bool can_speculate)
 {
 	/* READNONE means writes can't affect it, while READONLY means that
 	 * writes can affect it. */
 	return can_speculate ? AC_FUNC_ATTR_READNONE :
 			       AC_FUNC_ATTR_READONLY;
 }
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 341f6388f32..6f102647ba8 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -511,25 +511,22 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
 			ac_add_attr_dereferenceable(P, UINT64_MAX);
 		}
 	}
 
 	if (options->address32_hi) {
 		ac_llvm_add_target_dep_function_attr(main_function,
 						     "amdgpu-32bit-address-high-bits",
 						     options->address32_hi);
 	}
 
-	if (max_workgroup_size) {
-		ac_llvm_add_target_dep_function_attr(main_function,
-						     "amdgpu-max-work-group-size",
-						     max_workgroup_size);
-	}
+	ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+
 	if (options->unsafe_math) {
 		/* These were copied from some LLVM test. */
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "less-precise-fpmad",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "no-infs-fp-math",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(main_function,
 						   "no-nans-fp-math",
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d2927d0254b..1ba6b8b6033 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4276,25 +4276,22 @@ void si_create_function(struct si_shader_context *ctx,
 		if (fninfo->assign[i])
 			*fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
 	}
 
 	if (ctx->screen->info.address32_hi) {
 		ac_llvm_add_target_dep_function_attr(ctx->main_fn,
 						     "amdgpu-32bit-address-high-bits",
 						     ctx->screen->info.address32_hi);
 	}
 
-	if (max_workgroup_size) {
-		ac_llvm_add_target_dep_function_attr(ctx->main_fn,
-						     "amdgpu-max-work-group-size",
-						     max_workgroup_size);
-	}
+	ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
+
 	LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 					   "no-signed-zeros-fp-math",
 					   "true");
 
 	if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
 		/* These were copied from some LLVM test. */
 		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 						   "less-precise-fpmad",
 						   "true");
 		LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
-- 
2.17.1



More information about the mesa-dev mailing list