Mesa (master): aco: change gpr_alloc_granule to full alignment
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Feb 12 19:10:51 UTC 2021
Module: Mesa
Branch: master
Commit: eaf681724e992209f1ca97e2c37837170b8087aa
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eaf681724e992209f1ca97e2c37837170b8087aa
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Tue Feb 2 17:33:09 2021 +0100
aco: change gpr_alloc_granule to full alignment
This also switches the alloc_granule of Tonga and Iceland
to 96, so that the calculation is consistent.
Also changes the granularity for RDNA to 16 to keep
better stats with the upcoming patch.
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8921>
---
src/amd/compiler/aco_ir.cpp | 19 ++++++++++---------
src/amd/compiler/aco_ir.h | 4 ++--
src/amd/compiler/aco_live_var_analysis.cpp | 18 +++++++++---------
3 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 90d0f5e9ab7..a156d109b31 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -100,27 +100,28 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->vgpr_limit = 256;
program->physical_vgprs = 256;
- program->vgpr_alloc_granule = 3;
+ program->vgpr_alloc_granule = 4;
if (chip_class >= GFX10) {
program->physical_sgprs = 2560; /* doesn't matter as long as it's at least 128 * 20 */
program->physical_vgprs = 512;
- program->sgpr_alloc_granule = 127;
+ program->sgpr_alloc_granule = 128;
program->sgpr_limit = 106;
if (chip_class >= GFX10_3)
- program->vgpr_alloc_granule = program->wave_size == 32 ? 15 : 7;
+ program->vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
else
- program->vgpr_alloc_granule = program->wave_size == 32 ? 7 : 3;
+ program->vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
} else if (program->chip_class >= GFX8) {
program->physical_sgprs = 800;
- program->sgpr_alloc_granule = 15;
- if (family == CHIP_TONGA || family == CHIP_ICELAND)
+ program->sgpr_alloc_granule = 16;
+ program->sgpr_limit = 102;
+ if (family == CHIP_TONGA || family == CHIP_ICELAND) {
+ program->sgpr_alloc_granule = 96;
program->sgpr_limit = 94; /* workaround hardware bug */
- else
- program->sgpr_limit = 102;
+ }
} else {
program->physical_sgprs = 512;
- program->sgpr_alloc_granule = 7;
+ program->sgpr_alloc_granule = 8;
program->sgpr_limit = 104;
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 8f0e2f13f98..d2fdf380117 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1825,8 +1825,8 @@ public:
uint16_t sgpr_limit;
uint16_t physical_sgprs;
uint16_t physical_vgprs;
- uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
- uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
+ uint16_t sgpr_alloc_granule;
+ uint16_t vgpr_alloc_granule; /* must be power of two */
unsigned workgroup_size; /* if known; otherwise UINT_MAX */
bool xnack_enabled = false;
diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp
index 61a1c626c5c..5f6c5b00a29 100644
--- a/src/amd/compiler/aco_live_var_analysis.cpp
+++ b/src/amd/compiler/aco_live_var_analysis.cpp
@@ -273,30 +273,30 @@ uint16_t get_extra_sgprs(Program *program)
uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs)
{
- assert(addressable_sgprs <= program->sgpr_limit);
uint16_t sgprs = addressable_sgprs + get_extra_sgprs(program);
- uint16_t granule = program->sgpr_alloc_granule + 1;
- return align(std::max(sgprs, granule), granule);
+ uint16_t granule = program->sgpr_alloc_granule;
+ return ALIGN_NPOT(std::max(sgprs, granule), granule);
}
uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs)
{
assert(addressable_vgprs <= program->vgpr_limit);
- uint16_t granule = program->vgpr_alloc_granule + 1;
+ uint16_t granule = program->vgpr_alloc_granule;
return align(std::max(addressable_vgprs, granule), granule);
}
uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves)
{
- uint16_t sgprs = program->physical_sgprs / max_waves & ~program->sgpr_alloc_granule;
- sgprs -= get_extra_sgprs(program);
- return std::min(sgprs, program->sgpr_limit);
+ uint16_t sgprs = (program->physical_sgprs / max_waves) - program->sgpr_alloc_granule + 1;
+ sgprs = get_sgpr_alloc(program, sgprs);
+ sgprs -= get_extra_sgprs(program);
+ return std::min(sgprs, program->sgpr_limit);
}
uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves)
{
- uint16_t vgprs = program->physical_vgprs / max_waves & ~program->vgpr_alloc_granule;
- return std::min(vgprs, program->vgpr_limit);
+ uint16_t vgprs = program->physical_vgprs / max_waves & ~(program->vgpr_alloc_granule - 1);
+ return std::min(vgprs, program->vgpr_limit);
}
void calc_min_waves(Program* program)
More information about the mesa-commit
mailing list