[Mesa-dev] [PATCH 04/10] radeonsi: use ac_shader_config
Nicolai Hähnle
nhaehnle at gmail.com
Fri May 3 11:18:23 UTC 2019
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/amd/common/ac_binary.c | 2 +
src/gallium/drivers/radeonsi/si_compute.c | 14 +--
src/gallium/drivers/radeonsi/si_shader.c | 112 +++-------------------
src/gallium/drivers/radeonsi/si_shader.h | 25 +----
4 files changed, 27 insertions(+), 126 deletions(-)
diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c
index 44251886b5f..d0ca55e0e0d 100644
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@@ -218,26 +218,28 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));
switch (reg) {
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
conf->float_mode = G_00B028_FLOAT_MODE(value);
+ conf->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
+ conf->rsrc2 = value;
break;
case R_0286CC_SPI_PS_INPUT_ENA:
conf->spi_ps_input_ena = value;
break;
case R_0286D0_SPI_PS_INPUT_ADDR:
conf->spi_ps_input_addr = value;
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 541d7e6f118..02d7bac406a 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -59,21 +59,21 @@ static const amd_kernel_code_t *si_compute_get_code_object(
uint64_t symbol_offset)
{
if (!program->use_code_object_v2) {
return NULL;
}
return (const amd_kernel_code_t*)
(program->shader.binary.code + symbol_offset);
}
static void code_object_to_config(const amd_kernel_code_t *code_object,
- struct si_shader_config *out_config) {
+ struct ac_shader_config *out_config) {
uint32_t rsrc1 = code_object->compute_pgm_resource_registers;
uint32_t rsrc2 = code_object->compute_pgm_resource_registers >> 32;
out_config->num_sgprs = code_object->wavefront_sgpr_count;
out_config->num_vgprs = code_object->workitem_vgpr_count;
out_config->float_mode = G_00B028_FLOAT_MODE(rsrc1);
out_config->rsrc1 = rsrc1;
out_config->lds_size = MAX2(out_config->lds_size, G_00B84C_LDS_SIZE(rsrc2));
out_config->rsrc2 = rsrc2;
out_config->scratch_bytes_per_wave =
@@ -241,22 +241,22 @@ static void *si_create_compute_state(
const amd_kernel_code_t *code_object =
si_compute_get_code_object(program, 0);
code_object_to_config(code_object, &program->shader.config);
if (program->shader.binary.reloc_count != 0) {
fprintf(stderr, "Error: %d unsupported relocations\n",
program->shader.binary.reloc_count);
FREE(program);
return NULL;
}
} else {
- si_shader_binary_read_config(&program->shader.binary,
- &program->shader.config, 0);
+ ac_shader_binary_read_config(&program->shader.binary,
+ &program->shader.config, 0, false);
}
si_shader_dump(sctx->screen, &program->shader, &sctx->debug,
PIPE_SHADER_COMPUTE, stderr, true);
if (si_shader_binary_upload(sctx->screen, &program->shader) < 0) {
fprintf(stderr, "LLVM failed to upload shader\n");
FREE(program);
return NULL;
}
}
@@ -362,21 +362,21 @@ static void si_initialize_compute(struct si_context *sctx)
bc_va >> 8);
}
}
sctx->cs_shader_state.emitted_program = NULL;
sctx->cs_shader_state.initialized = true;
}
static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
struct si_shader *shader,
- struct si_shader_config *config)
+ struct ac_shader_config *config)
{
uint64_t scratch_bo_size, scratch_needed;
scratch_bo_size = 0;
scratch_needed = config->scratch_bytes_per_wave * sctx->scratch_waves;
if (sctx->compute_scratch_buffer)
scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
if (scratch_bo_size < scratch_needed) {
si_resource_reference(&sctx->compute_scratch_buffer, NULL);
@@ -405,38 +405,38 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
return true;
}
static bool si_switch_compute_shader(struct si_context *sctx,
struct si_compute *program,
struct si_shader *shader,
const amd_kernel_code_t *code_object,
unsigned offset)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- struct si_shader_config inline_config = {0};
- struct si_shader_config *config;
+ struct ac_shader_config inline_config = {0};
+ struct ac_shader_config *config;
uint64_t shader_va;
if (sctx->cs_shader_state.emitted_program == program &&
sctx->cs_shader_state.offset == offset)
return true;
if (program->ir_type != PIPE_SHADER_IR_NATIVE) {
config = &shader->config;
} else {
unsigned lds_blocks;
config = &inline_config;
if (code_object) {
code_object_to_config(code_object, config);
} else {
- si_shader_binary_read_config(&shader->binary, config, offset);
+ ac_shader_binary_read_config(&shader->binary, config, offset, false);
}
lds_blocks = config->lds_size;
/* XXX: We are over allocating LDS. For SI, the shader reports
* LDS in blocks of 256 bytes, so if there are 4 bytes lds
* allocated in the shader and 4 bytes allocated by the state
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
*/
if (sctx->chip_class <= SI) {
lds_blocks += align(program->local_size, 256) >> 8;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f6d882cf583..da43447013d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4962,104 +4962,20 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
/* The stipple pattern is 32x32, each row has 32 bits. */
offset = LLVMBuildMul(builder, address[1],
LLVMConstInt(ctx->i32, 4, 0), "");
row = buffer_load_const(ctx, desc, offset);
row = ac_to_integer(&ctx->ac, row);
bit = LLVMBuildLShr(builder, row, address[0], "");
bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
ac_build_kill_if_false(&ctx->ac, bit);
}
-void si_shader_binary_read_config(struct ac_shader_binary *binary,
- struct si_shader_config *conf,
- unsigned symbol_offset)
-{
- unsigned i;
- const unsigned char *config =
- ac_shader_binary_config_start(binary, symbol_offset);
- bool really_needs_scratch = false;
-
- /* LLVM adds SGPR spills to the scratch size.
- * Find out if we really need the scratch buffer.
- */
- for (i = 0; i < binary->reloc_count; i++) {
- const struct ac_shader_reloc *reloc = &binary->relocs[i];
-
- if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
- !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
- really_needs_scratch = true;
- break;
- }
- }
-
- /* XXX: We may be able to emit some of these values directly rather than
- * extracting fields to be emitted later.
- */
-
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
- unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
- switch (reg) {
- case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
- case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
- case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
- case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
- case R_00B848_COMPUTE_PGM_RSRC1:
- conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
- conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
- conf->float_mode = G_00B028_FLOAT_MODE(value);
- conf->rsrc1 = value;
- break;
- case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
- break;
- case R_00B84C_COMPUTE_PGM_RSRC2:
- conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
- conf->rsrc2 = value;
- break;
- case R_0286CC_SPI_PS_INPUT_ENA:
- conf->spi_ps_input_ena = value;
- break;
- case R_0286D0_SPI_PS_INPUT_ADDR:
- conf->spi_ps_input_addr = value;
- break;
- case R_0286E8_SPI_TMPRING_SIZE:
- case R_00B860_COMPUTE_TMPRING_SIZE:
- /* WAVESIZE is in units of 256 dwords. */
- if (really_needs_scratch)
- conf->scratch_bytes_per_wave =
- G_00B860_WAVESIZE(value) * 256 * 4;
- break;
- case 0x4: /* SPILLED_SGPRS */
- conf->spilled_sgprs = value;
- break;
- case 0x8: /* SPILLED_VGPRS */
- conf->spilled_vgprs = value;
- break;
- default:
- {
- static bool printed;
-
- if (!printed) {
- fprintf(stderr, "Warning: LLVM emitted unknown "
- "config register: 0x%x\n", reg);
- printed = true;
- }
- }
- break;
- }
- }
-
- if (!conf->spi_ps_input_addr)
- conf->spi_ps_input_addr = conf->spi_ps_input_ena;
-}
-
void si_shader_apply_scratch_relocs(struct si_shader *shader,
uint64_t scratch_va)
{
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
/* Enable scratch coalescing. */
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
@@ -5213,21 +5129,21 @@ static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,
fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
binary->code[i + 1], binary->code[i]);
}
}
}
static void si_calculate_max_simd_waves(struct si_shader *shader)
{
struct si_screen *sscreen = shader->selector->screen;
- struct si_shader_config *conf = &shader->config;
+ struct ac_shader_config *conf = &shader->config;
unsigned num_inputs = shader->selector->info.num_inputs;
unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
unsigned max_simd_waves;
max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);
/* Compute LDS usage for PS. */
switch (shader->selector->type) {
case PIPE_SHADER_FRAGMENT:
@@ -5262,46 +5178,46 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)
}
if (conf->num_vgprs)
max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
* 16KB makes some SIMDs unoccupied). */
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
- conf->max_simd_waves = max_simd_waves;
+ shader->max_simd_waves = max_simd_waves;
}
void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- const struct si_shader_config *conf = &shader->config;
+ const struct ac_shader_config *conf = &shader->config;
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
"LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
"Spilled VGPRs: %d PrivMem VGPRs: %d",
conf->num_sgprs, conf->num_vgprs,
si_get_shader_binary_size(shader),
conf->lds_size, conf->scratch_bytes_per_wave,
- conf->max_simd_waves, conf->spilled_sgprs,
- conf->spilled_vgprs, conf->private_mem_vgprs);
+ shader->max_simd_waves, conf->spilled_sgprs,
+ conf->spilled_vgprs, shader->private_mem_vgprs);
}
static void si_shader_dump_stats(struct si_screen *sscreen,
const struct si_shader *shader,
unsigned processor,
FILE *file,
bool check_debug_option)
{
- const struct si_shader_config *conf = &shader->config;
+ const struct ac_shader_config *conf = &shader->config;
if (!check_debug_option ||
si_can_dump_shader(sscreen, processor)) {
if (processor == PIPE_SHADER_FRAGMENT) {
fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
@@ -5311,24 +5227,24 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
"Spilled SGPRs: %d\n"
"Spilled VGPRs: %d\n"
"Private memory VGPRs: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs,
- conf->private_mem_vgprs,
+ shader->private_mem_vgprs,
si_get_shader_binary_size(shader),
conf->lds_size, conf->scratch_bytes_per_wave,
- conf->max_simd_waves);
+ shader->max_simd_waves);
}
}
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor)
{
switch (processor) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_es)
return "Vertex Shader as ES";
else if (shader->key.as_ls)
@@ -5399,21 +5315,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
debug, "epilog", file);
fprintf(file, "\n");
}
si_shader_dump_stats(sscreen, shader, processor, file,
check_debug_option);
}
static int si_compile_llvm(struct si_screen *sscreen,
struct ac_shader_binary *binary,
- struct si_shader_config *conf,
+ struct ac_shader_config *conf,
struct ac_llvm_compiler *compiler,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
unsigned processor,
const char *name,
bool less_optimized)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
@@ -5433,21 +5349,21 @@ static int si_compile_llvm(struct si_screen *sscreen,
LLVMDisposeMessage(ir);
}
if (!si_replace_shader(count, binary)) {
r = si_llvm_compile(mod, binary, compiler, debug,
less_optimized);
if (r)
return r;
}
- si_shader_binary_read_config(binary, conf, 0);
+ ac_shader_binary_read_config(binary, conf, 0, false);
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*
* If denormals are enabled, all floating-point output modifiers are
* ignored.
*
* Don't enable denormals for 32-bit floats, because:
* - Floating-point output modifiers would be ignored by the hw.
* - Some opcodes don't support denormals, such as v_mad_f32. We would
@@ -6799,21 +6715,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
need_prolog ? 1 : 0, 0);
}
si_llvm_optimize_module(&ctx);
/* Post-optimization transformations and analysis. */
si_optimize_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
si_can_dump_shader(sscreen, ctx.type)) {
- ctx.shader->config.private_mem_vgprs =
+ ctx.shader->private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_fn);
}
/* Make sure the input is a pointer and not integer followed by inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
LLVMPointerTypeKind);
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
ctx.ac.module, debug, ctx.type,
@@ -7954,23 +7870,23 @@ int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compile
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->previous_stage->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->previous_stage->config.num_vgprs);
shader->config.spilled_sgprs =
MAX2(shader->config.spilled_sgprs,
shader->previous_stage->config.spilled_sgprs);
shader->config.spilled_vgprs =
MAX2(shader->config.spilled_vgprs,
shader->previous_stage->config.spilled_vgprs);
- shader->config.private_mem_vgprs =
- MAX2(shader->config.private_mem_vgprs,
- shader->previous_stage->config.private_mem_vgprs);
+ shader->private_mem_vgprs =
+ MAX2(shader->private_mem_vgprs,
+ shader->previous_stage->private_mem_vgprs);
shader->config.scratch_bytes_per_wave =
MAX2(shader->config.scratch_bytes_per_wave,
shader->previous_stage->config.scratch_bytes_per_wave);
shader->info.uses_instanceid |=
shader->previous_stage->info.uses_instanceid;
}
if (shader->prolog2) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->prolog2->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index ecf7f8bbd7a..6c8f70dc94b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -552,36 +552,20 @@ struct si_shader_key {
* but forces monolithic shaders to be used as soon as
* possible, because it's in the "opt" group.
*/
unsigned prefer_mono:1;
} opt;
};
/* Restore the pack alignment to default. */
#pragma pack(pop)
-struct si_shader_config {
- unsigned num_sgprs;
- unsigned num_vgprs;
- unsigned spilled_sgprs;
- unsigned spilled_vgprs;
- unsigned private_mem_vgprs;
- unsigned lds_size;
- unsigned max_simd_waves;
- unsigned spi_ps_input_ena;
- unsigned spi_ps_input_addr;
- unsigned float_mode;
- unsigned scratch_bytes_per_wave;
- unsigned rsrc1;
- unsigned rsrc2;
-};
-
/* GCN-specific shader info. */
struct si_shader_info {
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
ubyte num_input_sgprs;
ubyte num_input_vgprs;
signed char face_vgpr_index;
signed char ancillary_vgpr_index;
bool uses_instanceid;
ubyte nr_pos_exports;
ubyte nr_param_exports;
@@ -605,22 +589,24 @@ struct si_shader {
struct si_shader_key key;
struct util_queue_fence ready;
bool compilation_failed;
bool is_monolithic;
bool is_optimized;
bool is_binary_shared;
bool is_gs_copy_shader;
/* The following data is all that's needed for binary shaders. */
struct ac_shader_binary binary;
- struct si_shader_config config;
+ struct ac_shader_config config;
struct si_shader_info info;
+ unsigned private_mem_vgprs;
+ unsigned max_simd_waves;
/* Shader key + LLVM IR + disassembly + statistics.
* Generated for debug contexts only.
*/
char *shader_log;
size_t shader_log_size;
/* For save precompute context registers values. */
union {
struct {
@@ -662,21 +648,21 @@ struct si_shader {
/*For save precompute registers value */
unsigned vgt_tf_param; /* VGT_TF_PARAM */
unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
};
struct si_shader_part {
struct si_shader_part *next;
union si_shader_part_key key;
struct ac_shader_binary binary;
- struct si_shader_config config;
+ struct ac_shader_config config;
};
/* si_shader.c */
struct si_shader *
si_generate_gs_copy_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug);
int si_compile_tgsi_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
@@ -692,23 +678,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f, bool check_debug_option);
void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_shader *shader,
uint64_t scratch_va);
-void si_shader_binary_read_config(struct ac_shader_binary *binary,
- struct si_shader_config *conf,
- unsigned symbol_offset);
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor);
/* si_shader_nir.c */
void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info);
void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
struct tgsi_tessctrl_info *out);
void si_lower_nir(struct si_shader_selector *sel);
/* Inline helpers. */
--
2.20.1
More information about the mesa-dev
mailing list