Mesa (main): aco: remove 'max_waves' and use 'num_waves' to adjust for LDS and workgroup size
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 29 15:56:51 UTC 2022
Module: Mesa
Branch: main
Commit: 6220046ad198ed8914c7a04ed3d5dd3cf6efc3c2
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6220046ad198ed8914c7a04ed3d5dd3cf6efc3c2
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Tue Apr 19 16:58:26 2022 +0200
aco: remove 'max_waves' and use 'num_waves' to adjust for LDS and workgroup size
Totals from 21 (0.02% of 134913) affected shaders: (GFX10.3)
VGPRs: 1024 -> 1176 (+14.84%)
CodeSize: 127824 -> 127664 (-0.13%); split: -0.17%, +0.04%
MaxWaves: 416 -> 378 (-9.13%)
Instrs: 22521 -> 22502 (-0.08%); split: -0.17%, +0.09%
Latency: 146386 -> 143154 (-2.21%); split: -2.21%, +0.00%
InvThroughput: 28379 -> 28944 (+1.99%); split: -0.23%, +2.22%
VClause: 575 -> 579 (+0.70%); split: -0.87%, +1.57%
SClause: 692 -> 645 (-6.79%)
Copies: 780 -> 747 (-4.23%); split: -4.74%, +0.51%
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16039>
---
src/amd/compiler/aco_ir.h | 3 +--
src/amd/compiler/aco_live_var_analysis.cpp | 16 +++++++---------
2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 327acbf0e6a..f74a2e93c07 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -2052,8 +2052,6 @@ public:
std::vector<Block> blocks;
std::vector<RegClass> temp_rc = {s1};
RegisterDemand max_reg_demand = RegisterDemand();
- uint16_t num_waves = 0;
- uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
ac_shader_config* config;
const struct radv_shader_info* info;
enum chip_class chip_class;
@@ -2069,6 +2067,7 @@ public:
Temp private_segment_buffer;
Temp scratch_offset;
+ uint16_t num_waves = 0;
uint16_t min_waves = 0;
unsigned workgroup_size; /* if known; otherwise UINT_MAX */
bool wgp_mode;
diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp
index 58a4023d787..18c9053db94 100644
--- a/src/amd/compiler/aco_live_var_analysis.cpp
+++ b/src/amd/compiler/aco_live_var_analysis.cpp
@@ -368,7 +368,6 @@ calc_min_waves(Program* program)
void
update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
{
- unsigned max_waves_per_simd = program->dev.max_wave64_per_simd * (64 / program->wave_size);
unsigned simd_per_cu_wgp = program->dev.simd_per_cu * (program->wgp_mode ? 2 : 1);
unsigned lds_limit = program->wgp_mode ? program->dev.lds_limit * 2 : program->dev.lds_limit;
unsigned max_workgroups_per_cu_wgp = program->wgp_mode ? 32 : 16;
@@ -387,11 +386,12 @@ update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
get_vgpr_alloc(program, new_demand.vgpr) + program->config->num_shared_vgprs / 2;
program->num_waves =
std::min<uint16_t>(program->num_waves, program->dev.physical_vgprs / vgpr_demand);
- program->max_waves = max_waves_per_simd;
+ uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size);
+ program->num_waves = std::min(program->num_waves, max_waves);
- /* adjust max_waves for workgroup and LDS limits */
+ /* adjust num_waves for workgroup and LDS limits */
unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
- unsigned workgroups_per_cu_wgp = max_waves_per_simd * simd_per_cu_wgp / waves_per_workgroup;
+ unsigned workgroups_per_cu_wgp = program->num_waves * simd_per_cu_wgp / waves_per_workgroup;
unsigned lds_per_workgroup =
align(program->config->lds_size * program->dev.lds_encoding_granule,
@@ -416,12 +416,10 @@ update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
/* in cases like waves_per_workgroup=3 or lds=65536 and
* waves_per_workgroup=1, we want the maximum possible number of waves per
* SIMD and not the minimum. so DIV_ROUND_UP is used */
- program->max_waves = std::min<uint16_t>(
- program->max_waves,
- DIV_ROUND_UP(workgroups_per_cu_wgp * waves_per_workgroup, simd_per_cu_wgp));
+ program->num_waves =
+ DIV_ROUND_UP(workgroups_per_cu_wgp * waves_per_workgroup, simd_per_cu_wgp);
- /* incorporate max_waves and calculate max_reg_demand */
- program->num_waves = std::min<uint16_t>(program->num_waves, program->max_waves);
+ /* calculate max_reg_demand */
program->max_reg_demand.vgpr = get_addr_vgpr_from_waves(program, program->num_waves);
program->max_reg_demand.sgpr = get_addr_sgpr_from_waves(program, program->num_waves);
}
More information about the mesa-commit
mailing list