[Mesa-dev] [PATCH 1/3] radeonsi: move max_simd_waves computation into a separate function

Marek Olšák maraeo at gmail.com
Sat Jan 27 18:01:48 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 34 +++++++++++++++++++++-----------
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b18b4f6..f1ac94f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5353,47 +5353,42 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,
 	} else {
 		fprintf(file, "Shader %s binary:\n", name);
 		for (i = 0; i < binary->code_size; i += 4) {
 			fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
 				binary->code[i + 3], binary->code[i + 2],
 				binary->code[i + 1], binary->code[i]);
 		}
 	}
 }
 
-static void si_shader_dump_stats(struct si_screen *sscreen,
-				 const struct si_shader *shader,
-			         struct pipe_debug_callback *debug,
-			         unsigned processor,
-				 FILE *file,
-				 bool check_debug_option)
+static void si_calculate_max_simd_waves(struct si_shader *shader)
 {
-	const struct si_shader_config *conf = &shader->config;
-	unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0;
-	unsigned code_size = si_get_shader_binary_size(shader);
+	struct si_screen *sscreen = shader->selector->screen;
+	struct si_shader_config *conf = &shader->config;
+	unsigned num_inputs = shader->selector->info.num_inputs;
 	unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
 	unsigned lds_per_wave = 0;
 	unsigned max_simd_waves;
 
 	switch (sscreen->info.family) {
 	/* These always have 8 waves: */
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 		max_simd_waves = 8;
 		break;
 	default:
 		max_simd_waves = 10;
 	}
 
 	/* Compute LDS usage for PS. */
-	switch (processor) {
+	switch (shader->selector->type) {
 	case PIPE_SHADER_FRAGMENT:
 		/* The minimum usage per wave is (num_inputs * 48). The maximum
 		 * usage is (num_inputs * 48 * 16).
 		 * We can get anything in between and it varies between waves.
 		 *
 		 * The 48 bytes per input for a single primitive is equal to
 		 * 4 bytes/component * 4 components/input * 3 points.
 		 *
 		 * Other stages don't know the size at compile time or don't
 		 * allocate LDS per wave, but instead they do it per thread group.
@@ -5420,20 +5415,33 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
 	}
 
 	if (conf->num_vgprs)
 		max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
 
 	/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
 	 * 16KB makes some SIMDs unoccupied). */
 	if (lds_per_wave)
 		max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
+	conf->max_simd_waves = max_simd_waves;
+}
+
+static void si_shader_dump_stats(struct si_screen *sscreen,
+				 const struct si_shader *shader,
+			         struct pipe_debug_callback *debug,
+			         unsigned processor,
+				 FILE *file,
+				 bool check_debug_option)
+{
+	const struct si_shader_config *conf = &shader->config;
+	unsigned code_size = si_get_shader_binary_size(shader);
+
 	if (!check_debug_option ||
 	    si_can_dump_shader(sscreen, processor)) {
 		if (processor == PIPE_SHADER_FRAGMENT) {
 			fprintf(file, "*** SHADER CONFIG ***\n"
 				"SPI_PS_INPUT_ADDR = 0x%04x\n"
 				"SPI_PS_INPUT_ENA  = 0x%04x\n",
 				conf->spi_ps_input_addr, conf->spi_ps_input_ena);
 		}
 
 		fprintf(file, "*** SHADER STATS ***\n"
@@ -5444,30 +5452,30 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
 			"Private memory VGPRs: %d\n"
 			"Code Size: %d bytes\n"
 			"LDS: %d blocks\n"
 			"Scratch: %d bytes per wave\n"
 			"Max Waves: %d\n"
 			"********************\n\n\n",
 			conf->num_sgprs, conf->num_vgprs,
 			conf->spilled_sgprs, conf->spilled_vgprs,
 			conf->private_mem_vgprs, code_size,
 			conf->lds_size, conf->scratch_bytes_per_wave,
-			max_simd_waves);
+			conf->max_simd_waves);
 	}
 
 	pipe_debug_message(debug, SHADER_INFO,
 			   "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
 			   "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
 			   "Spilled VGPRs: %d PrivMem VGPRs: %d",
 			   conf->num_sgprs, conf->num_vgprs, code_size,
 			   conf->lds_size, conf->scratch_bytes_per_wave,
-			   max_simd_waves, conf->spilled_sgprs,
+			   conf->max_simd_waves, conf->spilled_sgprs,
 			   conf->spilled_vgprs, conf->private_mem_vgprs);
 }
 
 const char *si_get_shader_name(const struct si_shader *shader, unsigned processor)
 {
 	switch (processor) {
 	case PIPE_SHADER_VERTEX:
 		if (shader->key.as_es)
 			return "Vertex Shader as ES";
 		else if (shader->key.as_ls)
@@ -6960,20 +6968,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 		if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) {
 			shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs;
 			shader->info.num_input_vgprs += 1;
 		}
 		if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
 			shader->info.num_input_vgprs += 1;
 		if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
 			shader->info.num_input_vgprs += 1;
 	}
 
+	si_calculate_max_simd_waves(shader);
 	return 0;
 }
 
 /**
  * Create, compile and return a shader part (prolog or epilog).
  *
  * \param sscreen	screen
  * \param list		list of shader parts of the same category
  * \param type		shader type
  * \param key		shader part key
@@ -8033,20 +8042,21 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 							shader->prolog2->config.num_sgprs);
 			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
 							shader->prolog2->config.num_vgprs);
 		}
 		if (shader->epilog) {
 			shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
 							shader->epilog->config.num_sgprs);
 			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
 							shader->epilog->config.num_vgprs);
 		}
+		si_calculate_max_simd_waves(shader);
 	}
 
 	si_fix_resource_usage(sscreen, shader);
 	si_shader_dump(sscreen, shader, debug, sel->info.processor,
 		       stderr, true);
 
 	/* Upload. */
 	r = si_shader_binary_upload(sscreen, shader);
 	if (r) {
 		fprintf(stderr, "LLVM failed to upload shader\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c449aa9..6ed1646 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -550,20 +550,21 @@ struct si_shader_key {
 /* Restore the pack alignment to default. */
 #pragma pack(pop)
 
 struct si_shader_config {
 	unsigned			num_sgprs;
 	unsigned			num_vgprs;
 	unsigned			spilled_sgprs;
 	unsigned			spilled_vgprs;
 	unsigned			private_mem_vgprs;
 	unsigned			lds_size;
+	unsigned			max_simd_waves;
 	unsigned			spi_ps_input_ena;
 	unsigned			spi_ps_input_addr;
 	unsigned			float_mode;
 	unsigned			scratch_bytes_per_wave;
 	unsigned			rsrc1;
 	unsigned			rsrc2;
 };
 
 /* GCN-specific shader info. */
 struct si_shader_info {
-- 
2.7.4



More information about the mesa-dev mailing list