Mesa (main): radeonsi: use si_shader::wave_size
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Nov 26 11:57:31 UTC 2021
Module: Mesa
Branch: main
Commit: d08b09cb7e96cecb4f224f698ed4e7ef5bacd707
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d08b09cb7e96cecb4f224f698ed4e7ef5bacd707
Author: Marek Olšák <marek.olsak at amd.com>
Date: Fri Nov 19 04:01:34 2021 -0500
radeonsi: use si_shader::wave_size
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13878>
---
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 5 ++--
src/gallium/drivers/radeonsi/si_compute.c | 8 +++---
src/gallium/drivers/radeonsi/si_debug.c | 11 ++++----
src/gallium/drivers/radeonsi/si_shader.c | 32 +++++++++++------------
src/gallium/drivers/radeonsi/si_shader_llvm.c | 2 +-
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 4 +--
src/gallium/drivers/radeonsi/si_sqtt.c | 2 +-
src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +-
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 11 ++++----
9 files changed, 35 insertions(+), 42 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 8c63e0f5f52..9d2bb1ac054 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -2276,14 +2276,13 @@ retry_select_mode:
/* Round up towards full wave sizes for better ALU utilization. */
if (!max_vert_out_per_gs_instance) {
- const unsigned wavesize = si_get_shader_wave_size(shader);
unsigned orig_max_esverts;
unsigned orig_max_gsprims;
do {
orig_max_esverts = max_esverts;
orig_max_gsprims = max_gsprims;
- max_esverts = align(max_esverts, wavesize);
+ max_esverts = align(max_esverts, shader->wave_size);
max_esverts = MIN2(max_esverts, max_esverts_base);
if (esvert_lds_size)
max_esverts =
@@ -2293,7 +2292,7 @@ retry_select_mode:
/* Hardware restriction: minimum value of max_esverts */
max_esverts = MAX2(max_esverts, min_esverts);
- max_gsprims = align(max_gsprims, wavesize);
+ max_gsprims = align(max_gsprims, shader->wave_size);
max_gsprims = MIN2(max_gsprims, max_gsprims_base);
if (gsprim_lds_size) {
/* Don't count unusable vertices to the LDS size. Those are vertices above
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 864732385e4..0772e49670b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -68,7 +68,7 @@ static const amd_kernel_code_t *si_compute_get_code_object(const struct si_compu
if (!ac_rtld_open(&rtld,
(struct ac_rtld_open_info){.info = &sel->screen->info,
.shader_type = MESA_SHADER_COMPUTE,
- .wave_size = sel->screen->compute_wave_size,
+ .wave_size = program->shader.wave_size,
.num_parts = 1,
.elf_ptrs = &program->shader.binary.elf_buffer,
.elf_sizes = &program->shader.binary.elf_size}))
@@ -193,7 +193,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) /
- ((sscreen->compute_wave_size == 32 ||
+ ((shader->wave_size == 32 ||
sscreen->info.wave64_vgpr_alloc_granularity == 8) ? 8 : 4)) |
S_00B848_DX10_CLAMP(1) |
S_00B848_MEM_ORDERED(si_shader_mem_ordered(shader)) |
@@ -770,7 +770,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
bool render_cond_bit = sctx->render_cond_enabled;
unsigned threads_per_threadgroup = info->block[0] * info->block[1] * info->block[2];
unsigned waves_per_threadgroup =
- DIV_ROUND_UP(threads_per_threadgroup, sscreen->compute_wave_size);
+ DIV_ROUND_UP(threads_per_threadgroup, sctx->cs_shader_state.program->shader.wave_size);
unsigned threadgroups_per_cu = 1;
if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
@@ -792,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
/* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order. (same as Vulkan) */
S_00B800_ORDER_MODE(sctx->chip_class >= GFX7) |
- S_00B800_CS_W32_EN(sscreen->compute_wave_size == 32);
+ S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32);
const uint *last_block = info->last_block;
bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index a195bc18b3c..c88eb734241 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -919,28 +919,27 @@ static void si_print_annotated_shader(struct si_shader *shader, struct ac_wave_i
*/
unsigned num_inst = 0;
uint64_t inst_addr = start_addr;
- unsigned wave_size = si_get_shader_wave_size(shader);
struct ac_rtld_binary rtld_binaries[5] = {};
struct si_shader_inst *instructions =
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
if (shader->prolog) {
si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, &inst_addr, &num_inst,
- instructions, stage, wave_size);
+ instructions, stage, shader->wave_size);
}
if (shader->previous_stage) {
si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, &inst_addr,
- &num_inst, instructions, stage, wave_size);
+ &num_inst, instructions, stage, shader->wave_size);
}
if (shader->prolog2) {
si_add_split_disasm(screen, &rtld_binaries[2], &shader->prolog2->binary, &inst_addr,
- &num_inst, instructions, stage, wave_size);
+ &num_inst, instructions, stage, shader->wave_size);
}
si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, &inst_addr, &num_inst,
- instructions, stage, wave_size);
+ instructions, stage, shader->wave_size);
if (shader->epilog) {
si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, &inst_addr, &num_inst,
- instructions, stage, wave_size);
+ instructions, stage, shader->wave_size);
}
fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 80b3d6144a7..7ebc3831b33 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -819,7 +819,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
.halt_at_entry = screen->options.halt_shaders,
},
.shader_type = sel->info.stage,
- .wave_size = si_get_shader_wave_size(shader),
+ .wave_size = shader->wave_size,
.num_parts = num_parts,
.elf_ptrs = part_elfs,
.elf_sizes = part_sizes,
@@ -992,7 +992,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)
case MESA_SHADER_COMPUTE: {
unsigned max_workgroup_size = si_get_max_workgroup_size(shader);
lds_per_wave = (conf->lds_size * lds_increment) /
- DIV_ROUND_UP(max_workgroup_size, sscreen->compute_wave_size);
+ DIV_ROUND_UP(max_workgroup_size, shader->wave_size);
}
break;
default:;
@@ -1025,7 +1025,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
if (screen->options.debug_disassembly)
si_shader_dump_disassembly(screen, &shader->binary, shader->selector->info.stage,
- si_get_shader_wave_size(shader), debug, "main", NULL);
+ shader->wave_size, debug, "main", NULL);
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
@@ -1123,25 +1123,24 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
if (!check_debug_option ||
(si_can_dump_shader(sscreen, stage) && !(sscreen->debug_flags & DBG(NO_ASM)))) {
- unsigned wave_size = si_get_shader_wave_size(shader);
fprintf(file, "\n%s:\n", si_get_shader_name(shader));
if (shader->prolog)
- si_shader_dump_disassembly(sscreen, &shader->prolog->binary, stage, wave_size, debug,
+ si_shader_dump_disassembly(sscreen, &shader->prolog->binary, stage, shader->wave_size, debug,
"prolog", file);
if (shader->previous_stage)
si_shader_dump_disassembly(sscreen, &shader->previous_stage->binary, stage,
- wave_size, debug, "previous stage", file);
+ shader->wave_size, debug, "previous stage", file);
if (shader->prolog2)
- si_shader_dump_disassembly(sscreen, &shader->prolog2->binary, stage, wave_size,
+ si_shader_dump_disassembly(sscreen, &shader->prolog2->binary, stage, shader->wave_size,
debug, "prolog2", file);
- si_shader_dump_disassembly(sscreen, &shader->binary, stage, wave_size, debug, "main",
+ si_shader_dump_disassembly(sscreen, &shader->binary, stage, shader->wave_size, debug, "main",
file);
if (shader->epilog)
- si_shader_dump_disassembly(sscreen, &shader->epilog->binary, stage, wave_size, debug,
+ si_shader_dump_disassembly(sscreen, &shader->epilog->binary, stage, shader->wave_size, debug,
"epilog", file);
fprintf(file, "\n");
}
@@ -1330,7 +1329,7 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_
{
memset(key, 0, sizeof(*key));
key->vs_prolog.states = *prolog_key;
- key->vs_prolog.wave32 = si_get_shader_wave_size(shader_out) == 32;
+ key->vs_prolog.wave32 = shader_out->wave_size == 32;
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.num_inputs = info->num_inputs;
key->vs_prolog.as_ls = shader_out->key.ge.as_ls;
@@ -1522,14 +1521,13 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
/* Validate SGPR and VGPR usage for compute to detect compiler bugs. */
if (sel->info.stage == MESA_SHADER_COMPUTE) {
- unsigned wave_size = sscreen->compute_wave_size;
unsigned max_vgprs =
- sscreen->info.num_physical_wave64_vgprs_per_simd * (wave_size == 32 ? 2 : 1);
+ sscreen->info.num_physical_wave64_vgprs_per_simd * (shader->wave_size == 32 ? 2 : 1);
unsigned max_sgprs = sscreen->info.num_physical_sgprs_per_simd;
unsigned max_sgprs_per_wave = 128;
unsigned simds_per_tg = 4; /* assuming WGP mode on gfx10 */
unsigned threads_per_tg = si_get_max_workgroup_size(shader);
- unsigned waves_per_tg = DIV_ROUND_UP(threads_per_tg, wave_size);
+ unsigned waves_per_tg = DIV_ROUND_UP(threads_per_tg, shader->wave_size);
unsigned waves_per_simd = DIV_ROUND_UP(waves_per_tg, simds_per_tg);
max_vgprs = max_vgprs / waves_per_simd;
@@ -1709,7 +1707,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
/* Get the epilog. */
union si_shader_part_key epilog_key;
memset(&epilog_key, 0, sizeof(epilog_key));
- epilog_key.tcs_epilog.wave32 = si_get_shader_wave_size(shader) == 32;
+ epilog_key.tcs_epilog.wave32 = shader->wave_size == 32;
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, MESA_SHADER_TESS_CTRL, false,
@@ -1754,7 +1752,7 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
memset(key, 0, sizeof(*key));
key->ps_prolog.states = shader->key.ps.part.prolog;
- key->ps_prolog.wave32 = si_get_shader_wave_size(shader) == 32;
+ key->ps_prolog.wave32 = shader->wave_size == 32;
key->ps_prolog.colors_read = info->colors_read;
key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
@@ -1888,7 +1886,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
{
struct si_shader_info *info = &shader->selector->info;
memset(key, 0, sizeof(*key));
- key->ps_epilog.wave32 = si_get_shader_wave_size(shader) == 32;
+ key->ps_epilog.wave32 = shader->wave_size == 32;
key->ps_epilog.colors_written = info->colors_written;
key->ps_epilog.color_types = info->output_color_types;
key->ps_epilog.writes_z = info->writes_z;
@@ -2013,7 +2011,7 @@ void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader)
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
if (shader->selector->info.stage == MESA_SHADER_COMPUTE &&
- si_get_max_workgroup_size(shader) > sscreen->compute_wave_size) {
+ si_get_max_workgroup_size(shader) > shader->wave_size) {
si_multiwave_lds_size_workaround(sscreen, &shader->config.lds_size);
}
}
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 4e7a8a49431..0577287f73c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1090,7 +1090,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
- si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader));
+ si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
LLVMValueRef ngg_cull_main_fn = NULL;
if (sel->info.stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index 27041f9125d..d059f860629 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -427,9 +427,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true;
shader->wave_size = si_get_shader_wave_size(shader);
- si_llvm_context_init(&ctx, sscreen, compiler,
- si_get_wave_size(sscreen, MESA_SHADER_VERTEX,
- false, false));
+ si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.shader = shader;
ctx.stage = MESA_SHADER_VERTEX;
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c
index 52cb5331188..17c037037ea 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -1014,7 +1014,7 @@ si_sqtt_add_code_object(struct si_context* sctx,
record->shader_data[gl_shader_stage].hw_stage = hw_stage;
record->shader_data[gl_shader_stage].is_combined = false;
record->shader_data[gl_shader_stage].scratch_memory_size = shader->config.scratch_bytes_per_wave;
- record->shader_data[gl_shader_stage].wavefront_size = si_get_shader_wave_size(shader);
+ record->shader_data[gl_shader_stage].wavefront_size = shader->wave_size;
record->shader_stages_mask |= 1 << gl_shader_stage;
record->num_shaders_combined++;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 2a8a9f1b1f0..90b01e033a1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -631,7 +631,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
* if it's only partially filled.
*/
unsigned temp_verts_per_tg = *num_patches * max_verts_per_patch;
- unsigned wave_size = sctx->screen->ge_wave_size;
+ unsigned wave_size = ls_current->wave_size;
if (temp_verts_per_tg > wave_size &&
(wave_size - temp_verts_per_tg % wave_size >= MAX2(max_verts_per_patch, 8)))
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 3a6ae910327..548262dc0c2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -586,7 +586,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
si_pm4_set_reg(
pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
- S_00B428_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
+ S_00B428_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
(sscreen->info.chip_class <= GFX9 ? S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8)
: 0) |
S_00B428_DX10_CLAMP(1) | S_00B428_MEM_ORDERED(si_shader_mem_ordered(shader)) |
@@ -1207,7 +1207,6 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
- unsigned wave_size = si_get_shader_wave_size(shader);
unsigned late_alloc_wave64, cu_mask;
ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling,
@@ -1217,7 +1216,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
- S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
+ S_00B228_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
S_00B228_MEM_ORDERED(si_shader_mem_ordered(shader)) |
/* Disable the WGP mode on gfx10.3 because it can hang. (it happened on VanGogh)
@@ -1511,7 +1510,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
uint32_t rsrc1 =
- S_00B128_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ge_wave_size == 32 ? 8 : 4)) |
+ S_00B128_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B128_DX10_CLAMP(1) |
S_00B128_MEM_ORDERED(si_shader_mem_ordered(shader)) |
S_00B128_FLOAT_MODE(shader->config.float_mode);
@@ -1715,7 +1714,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
/* Set interpolation controls. */
spi_ps_in_control = S_0286D8_NUM_INTERP(num_interp) |
- S_0286D8_PS_W32_EN(sscreen->ps_wave_size == 32);
+ S_0286D8_PS_W32_EN(shader->wave_size == 32);
shader->ctx_reg.ps.num_interp = num_interp;
shader->ctx_reg.ps.spi_baryc_cntl = spi_baryc_cntl;
@@ -1731,7 +1730,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
uint32_t rsrc1 =
- S_00B028_VGPRS((shader->config.num_vgprs - 1) / (sscreen->ps_wave_size == 32 ? 8 : 4)) |
+ S_00B028_VGPRS((shader->config.num_vgprs - 1) / (shader->wave_size == 32 ? 8 : 4)) |
S_00B028_DX10_CLAMP(1) | S_00B028_MEM_ORDERED(si_shader_mem_ordered(shader)) |
S_00B028_FLOAT_MODE(shader->config.float_mode);
More information about the mesa-commit
mailing list