[Mesa-dev] [PATCH 08/10] radeonsi: count and report temp arrays in scratch separately
Marek Olšák
maraeo at gmail.com
Mon Nov 28 11:17:01 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 40 ++++++++++++++++++++++++++++----
src/gallium/drivers/radeonsi/si_shader.h | 1 +
2 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 20f4a1d..f4c6e9c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5341,20 +5341,23 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
switch (kind) {
case LLVMIntegerTypeKind:
return LLVMGetIntTypeWidth(type) / 8;
case LLVMFloatTypeKind:
return 4;
case LLVMPointerTypeKind:
return 8;
case LLVMVectorTypeKind:
return LLVMGetVectorSize(type) *
llvm_get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) *
+ llvm_get_type_size(LLVMGetElementType(type));
default:
assert(0);
return 0;
}
}
static void declare_tess_lds(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
@@ -5989,39 +5992,41 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
fprintf(file, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
"Spilled SGPRs: %d\n"
"Spilled VGPRs: %d\n"
+ "Private memory VGPRs: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
- conf->spilled_sgprs, conf->spilled_vgprs, code_size,
+ conf->spilled_sgprs, conf->spilled_vgprs,
+ conf->private_mem_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
}
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
"LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
- "Spilled VGPRs: %d",
+ "Spilled VGPRs: %d PrivMem VGPRs: %d",
conf->num_sgprs, conf->num_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves, conf->spilled_sgprs,
- conf->spilled_vgprs);
+ conf->spilled_vgprs, conf->private_mem_vgprs);
}
static const char *si_get_shader_name(struct si_shader *shader,
unsigned processor)
{
switch (processor) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_es)
return "Vertex Shader as ES";
else if (shader->key.as_ls)
@@ -6564,20 +6569,46 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
V_008DFC_SQ_EXP_PARAM + new_count, 0));
shader->info.vs_output_param_offset[out] = new_count;
new_count++;
break;
}
}
shader->info.nr_param_exports = new_count;
}
}
+static void si_count_scratch_private_memory(struct si_shader_context *ctx)
+{
+ ctx->shader->config.private_mem_vgprs = 0;
+
+ /* Process all LLVM instructions. */
+ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
+ while (bb) {
+ LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+ while (next) {
+ LLVMValueRef inst = next;
+ next = LLVMGetNextInstruction(next);
+
+ if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+ continue;
+
+ LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+ /* No idea why LLVM aligns allocas to 4 elements. */
+ unsigned alignment = LLVMGetAlignment(inst);
+ unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment);
+ ctx->shader->config.private_mem_vgprs += dw_size;
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
struct si_shader *shader)
{
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
ctx->load_input = declare_input_vs;
if (shader->key.as_ls)
@@ -7220,22 +7251,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
mod = bld_base->base.gallivm->module;
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(&sscreen->b, ctx.type))
LLVMDumpModule(mod);
si_llvm_finalize_module(&ctx,
r600_extra_shader_checks(&sscreen->b, ctx.type));
- /* Post-optimization transformations. */
+ /* Post-optimization transformations and analysis. */
si_eliminate_const_vs_outputs(&ctx);
+ si_count_scratch_private_memory(&ctx);
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, ctx.type, "TGSI shader");
si_llvm_dispose(&ctx);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
return r;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index aa37676..d4bc47b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -441,20 +441,21 @@ struct si_shader_key {
unsigned clip_disable:1;
} hw_vs; /* HW VS (it can be VS, TES, GS) */
} opt;
};
struct si_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
+ unsigned private_mem_vgprs;
unsigned lds_size;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
unsigned float_mode;
unsigned scratch_bytes_per_wave;
unsigned rsrc1;
unsigned rsrc2;
};
enum {
--
2.7.4
More information about the mesa-dev
mailing list