[Mesa-dev] [PATCH 1/3] radeonsi: move max_simd_waves computation into a separate function
Marek Olšák
maraeo at gmail.com
Sat Jan 27 18:01:48 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 34 +++++++++++++++++++++-----------
src/gallium/drivers/radeonsi/si_shader.h | 1 +
2 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b18b4f6..f1ac94f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5353,47 +5353,42 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,
} else {
fprintf(file, "Shader %s binary:\n", name);
for (i = 0; i < binary->code_size; i += 4) {
fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
binary->code[i + 1], binary->code[i]);
}
}
}
-static void si_shader_dump_stats(struct si_screen *sscreen,
- const struct si_shader *shader,
- struct pipe_debug_callback *debug,
- unsigned processor,
- FILE *file,
- bool check_debug_option)
+static void si_calculate_max_simd_waves(struct si_shader *shader)
{
- const struct si_shader_config *conf = &shader->config;
- unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0;
- unsigned code_size = si_get_shader_binary_size(shader);
+ struct si_screen *sscreen = shader->selector->screen;
+ struct si_shader_config *conf = &shader->config;
+ unsigned num_inputs = shader->selector->info.num_inputs;
unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
unsigned max_simd_waves;
switch (sscreen->info.family) {
/* These always have 8 waves: */
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
max_simd_waves = 8;
break;
default:
max_simd_waves = 10;
}
/* Compute LDS usage for PS. */
- switch (processor) {
+ switch (shader->selector->type) {
case PIPE_SHADER_FRAGMENT:
/* The minimum usage per wave is (num_inputs * 48). The maximum
* usage is (num_inputs * 48 * 16).
* We can get anything in between and it varies between waves.
*
* The 48 bytes per input for a single primitive is equal to
* 4 bytes/component * 4 components/input * 3 points.
*
* Other stages don't know the size at compile time or don't
* allocate LDS per wave, but instead they do it per thread group.
@@ -5420,20 +5415,33 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
}
if (conf->num_vgprs)
max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
* 16KB makes some SIMDs unoccupied). */
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+ conf->max_simd_waves = max_simd_waves;
+}
+
+static void si_shader_dump_stats(struct si_screen *sscreen,
+ const struct si_shader *shader,
+ struct pipe_debug_callback *debug,
+ unsigned processor,
+ FILE *file,
+ bool check_debug_option)
+{
+ const struct si_shader_config *conf = &shader->config;
+ unsigned code_size = si_get_shader_binary_size(shader);
+
if (!check_debug_option ||
si_can_dump_shader(sscreen, processor)) {
if (processor == PIPE_SHADER_FRAGMENT) {
fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
fprintf(file, "*** SHADER STATS ***\n"
@@ -5444,30 +5452,30 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
"Private memory VGPRs: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs,
conf->private_mem_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
- max_simd_waves);
+ conf->max_simd_waves);
}
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
"LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
"Spilled VGPRs: %d PrivMem VGPRs: %d",
conf->num_sgprs, conf->num_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
- max_simd_waves, conf->spilled_sgprs,
+ conf->max_simd_waves, conf->spilled_sgprs,
conf->spilled_vgprs, conf->private_mem_vgprs);
}
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor)
{
switch (processor) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_es)
return "Vertex Shader as ES";
else if (shader->key.as_ls)
@@ -6960,20 +6968,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) {
shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs;
shader->info.num_input_vgprs += 1;
}
if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
shader->info.num_input_vgprs += 1;
}
+ si_calculate_max_simd_waves(shader);
return 0;
}
/**
* Create, compile and return a shader part (prolog or epilog).
*
* \param sscreen screen
* \param list list of shader parts of the same category
* \param type shader type
* \param key shader part key
@@ -8033,20 +8042,21 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
shader->prolog2->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->prolog2->config.num_vgprs);
}
if (shader->epilog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->epilog->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->epilog->config.num_vgprs);
}
+ si_calculate_max_simd_waves(shader);
}
si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,
stderr, true);
/* Upload. */
r = si_shader_binary_upload(sscreen, shader);
if (r) {
fprintf(stderr, "LLVM failed to upload shader\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c449aa9..6ed1646 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -550,20 +550,21 @@ struct si_shader_key {
/* Restore the pack alignment to default. */
#pragma pack(pop)
struct si_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
unsigned private_mem_vgprs;
unsigned lds_size;
+ unsigned max_simd_waves;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
unsigned float_mode;
unsigned scratch_bytes_per_wave;
unsigned rsrc1;
unsigned rsrc2;
};
/* GCN-specific shader info. */
struct si_shader_info {
--
2.7.4
More information about the mesa-dev
mailing list