[Mesa-dev] [PATCH 08/10] radeonsi: count and report temp arrays in scratch separately
Nicolai Hähnle
nhaehnle at gmail.com
Tue Nov 29 19:41:36 UTC 2016
Maybe only do this when debug printing is enabled?
Nicolai
On 28.11.2016 12:17, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 40 ++++++++++++++++++++++++++++----
> src/gallium/drivers/radeonsi/si_shader.h | 1 +
> 2 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 20f4a1d..f4c6e9c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5341,20 +5341,23 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
> switch (kind) {
> case LLVMIntegerTypeKind:
> return LLVMGetIntTypeWidth(type) / 8;
> case LLVMFloatTypeKind:
> return 4;
> case LLVMPointerTypeKind:
> return 8;
> case LLVMVectorTypeKind:
> return LLVMGetVectorSize(type) *
> llvm_get_type_size(LLVMGetElementType(type));
> + case LLVMArrayTypeKind:
> + return LLVMGetArrayLength(type) *
> + llvm_get_type_size(LLVMGetElementType(type));
> default:
> assert(0);
> return 0;
> }
> }
>
> static void declare_tess_lds(struct si_shader_context *ctx)
> {
> struct gallivm_state *gallivm = &ctx->gallivm;
> struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
> @@ -5989,39 +5992,41 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
> "SPI_PS_INPUT_ADDR = 0x%04x\n"
> "SPI_PS_INPUT_ENA = 0x%04x\n",
> conf->spi_ps_input_addr, conf->spi_ps_input_ena);
> }
>
> fprintf(file, "*** SHADER STATS ***\n"
> "SGPRS: %d\n"
> "VGPRS: %d\n"
> "Spilled SGPRs: %d\n"
> "Spilled VGPRs: %d\n"
> + "Private memory VGPRs: %d\n"
> "Code Size: %d bytes\n"
> "LDS: %d blocks\n"
> "Scratch: %d bytes per wave\n"
> "Max Waves: %d\n"
> "********************\n\n\n",
> conf->num_sgprs, conf->num_vgprs,
> - conf->spilled_sgprs, conf->spilled_vgprs, code_size,
> + conf->spilled_sgprs, conf->spilled_vgprs,
> + conf->private_mem_vgprs, code_size,
> conf->lds_size, conf->scratch_bytes_per_wave,
> max_simd_waves);
> }
>
> pipe_debug_message(debug, SHADER_INFO,
> "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
> "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
> - "Spilled VGPRs: %d",
> + "Spilled VGPRs: %d PrivMem VGPRs: %d",
> conf->num_sgprs, conf->num_vgprs, code_size,
> conf->lds_size, conf->scratch_bytes_per_wave,
> max_simd_waves, conf->spilled_sgprs,
> - conf->spilled_vgprs);
> + conf->spilled_vgprs, conf->private_mem_vgprs);
> }
>
> static const char *si_get_shader_name(struct si_shader *shader,
> unsigned processor)
> {
> switch (processor) {
> case PIPE_SHADER_VERTEX:
> if (shader->key.as_es)
> return "Vertex Shader as ES";
> else if (shader->key.as_ls)
> @@ -6564,20 +6569,46 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
> V_008DFC_SQ_EXP_PARAM + new_count, 0));
> shader->info.vs_output_param_offset[out] = new_count;
> new_count++;
> break;
> }
> }
> shader->info.nr_param_exports = new_count;
> }
> }
>
> +static void si_count_scratch_private_memory(struct si_shader_context *ctx)
> +{
> + ctx->shader->config.private_mem_vgprs = 0;
> +
> + /* Process all LLVM instructions. */
> + LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
> + while (bb) {
> + LLVMValueRef next = LLVMGetFirstInstruction(bb);
> +
> + while (next) {
> + LLVMValueRef inst = next;
> + next = LLVMGetNextInstruction(next);
> +
> + if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
> + continue;
> +
> + LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
> + /* No idea why LLVM aligns allocas to 4 elements. */
> + unsigned alignment = LLVMGetAlignment(inst);
> + unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment);
> + ctx->shader->config.private_mem_vgprs += dw_size;
> + }
> + bb = LLVMGetNextBasicBlock(bb);
> + }
> +}
> +
> static bool si_compile_tgsi_main(struct si_shader_context *ctx,
> struct si_shader *shader)
> {
> struct si_shader_selector *sel = shader->selector;
> struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
>
> switch (ctx->type) {
> case PIPE_SHADER_VERTEX:
> ctx->load_input = declare_input_vs;
> if (shader->key.as_ls)
> @@ -7220,22 +7251,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
> mod = bld_base->base.gallivm->module;
>
> /* Dump LLVM IR before any optimization passes */
> if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
> r600_can_dump_shader(&sscreen->b, ctx.type))
> LLVMDumpModule(mod);
>
> si_llvm_finalize_module(&ctx,
> r600_extra_shader_checks(&sscreen->b, ctx.type));
>
> - /* Post-optimization transformations. */
> + /* Post-optimization transformations and analysis. */
> si_eliminate_const_vs_outputs(&ctx);
> + si_count_scratch_private_memory(&ctx);
>
> /* Compile to bytecode. */
> r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
> mod, debug, ctx.type, "TGSI shader");
> si_llvm_dispose(&ctx);
> if (r) {
> fprintf(stderr, "LLVM failed to compile shader\n");
> return r;
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index aa37676..d4bc47b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -441,20 +441,21 @@ struct si_shader_key {
> unsigned clip_disable:1;
> } hw_vs; /* HW VS (it can be VS, TES, GS) */
> } opt;
> };
>
> struct si_shader_config {
> unsigned num_sgprs;
> unsigned num_vgprs;
> unsigned spilled_sgprs;
> unsigned spilled_vgprs;
> + unsigned private_mem_vgprs;
> unsigned lds_size;
> unsigned spi_ps_input_ena;
> unsigned spi_ps_input_addr;
> unsigned float_mode;
> unsigned scratch_bytes_per_wave;
> unsigned rsrc1;
> unsigned rsrc2;
> };
>
> enum {
>
More information about the mesa-dev
mailing list