[Mesa-dev] [PATCH 1/8] radeonsi: consolidate max-work-group-size computation

Wed Nov 30 01:36:32 UTC 2016

From: Marek Olšák <marek.olsak at amd.com>

The next commit will need this.

Cc: 13.0 <mesa-stable at lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_shader.c | 43 ++++++++++++++------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1e3be62..b19c61e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5354,20 +5354,37 @@ static void declare_tess_lds(struct si_shader_context *ctx)
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
 	struct lp_build_context *uint = &bld_base->uint_bld;
 
 	unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
 	ctx->lds = LLVMBuildIntToPtr(gallivm->builder, uint->zero,
 		LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
 		"tess_lds");
 }
 
+static unsigned si_get_max_workgroup_size(struct si_shader *shader)
+{
+	const unsigned *properties = shader->selector->info.properties;
+	unsigned max_work_group_size =
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+
+	if (!max_work_group_size) {
+		/* This is a variable group size compute shader,
+		 * compile it for the maximum possible group size.
+		 */
+		max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+	}
+	return max_work_group_size;
+}
+
 static void create_function(struct si_shader_context *ctx)
 {
 	struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	struct si_shader *shader = ctx->shader;
 	LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
 	LLVMTypeRef returns[16+32*4];
 	unsigned i, last_sgpr, num_params, num_return_sgprs;
 	unsigned num_returns = 0;
 	unsigned num_prolog_vgprs = 0;
@@ -5580,36 +5597,23 @@ static void create_function(struct si_shader_context *ctx)
 				      "InitialPSInputAddr",
 				      S_0286D0_PERSP_SAMPLE_ENA(1) |
 				      S_0286D0_PERSP_CENTER_ENA(1) |
 				      S_0286D0_PERSP_CENTROID_ENA(1) |
 				      S_0286D0_LINEAR_SAMPLE_ENA(1) |
 				      S_0286D0_LINEAR_CENTER_ENA(1) |
 				      S_0286D0_LINEAR_CENTROID_ENA(1) |
 				      S_0286D0_FRONT_FACE_ENA(1) |
 				      S_0286D0_POS_FIXED_PT_ENA(1));
 	} else if (ctx->type == PIPE_SHADER_COMPUTE) {
-		const unsigned *properties = shader->selector->info.properties;
-		unsigned max_work_group_size =
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
-
-		if (!max_work_group_size) {
-			/* This is a variable group size compute shader,
-			 * compile it for the maximum possible group size.
-			 */
-			max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-		}
-
 		si_llvm_add_attribute(ctx->main_fn,
 				      "amdgpu-max-work-group-size",
-				      max_work_group_size);
+				      si_get_max_workgroup_size(shader));
 	}
 
 	shader->info.num_input_sgprs = 0;
 	shader->info.num_input_vgprs = 0;
 
 	for (i = 0; i <= last_sgpr; ++i)
 		shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
 
 	for (; i < num_params; ++i)
 		shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
@@ -7263,34 +7267,25 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	si_llvm_dispose(&ctx);
 	if (r) {
 		fprintf(stderr, "LLVM failed to compile shader\n");
 		return r;
 	}
 
 	/* Validate SGPR and VGPR usage for compute to detect compiler bugs.
 	 * LLVM 3.9svn has this bug.
 	 */
 	if (sel->type == PIPE_SHADER_COMPUTE) {
-		unsigned *props = sel->info.properties;
 		unsigned wave_size = 64;
 		unsigned max_vgprs = 256;
 		unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
 		unsigned max_sgprs_per_wave = 128;
-		unsigned max_block_threads;
-
-		if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
-			max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
-					    props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
-					    props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
-		else
-			max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-
+		unsigned max_block_threads = si_get_max_workgroup_size(shader);
 		unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
 		unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
 
 		max_vgprs = max_vgprs / min_waves_per_simd;
 		max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
 
 		if (shader->config.num_sgprs > max_sgprs ||
 		    shader->config.num_vgprs > max_vgprs) {
 			fprintf(stderr, "LLVM failed to compile a shader correctly: "
 				"SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
-- 
2.7.4