Mesa (master): ac: split lds_granularity into encode and allocation granularities

Mon Feb 15 15:04:40 UTC 2021

Module: Mesa
Branch: master
Commit: aa53335135572518c5308c70b11c8cc5c87ecdb9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa53335135572518c5308c70b11c8cc5c87ecdb9

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Thu Jan 28 11:59:21 2021 +0000

ac: split lds_granularity into encode and allocation granularities

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8761>

---

 src/amd/common/ac_gpu_info.c | 9 +++++++--
 src/amd/common/ac_gpu_info.h | 3 ++-
 src/amd/common/ac_rgp.c      | 2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 50f2a849e68..4ed9f5ab4a9 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -722,7 +722,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
     * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
     */
    info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
-   info->lds_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
+   /* lds_encode_granularity is the block size used for encoding registers.
+    * lds_alloc_granularity is what the hardware will align the LDS size to.
+    */
+   info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
+   info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
 
    assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
    assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
@@ -1058,7 +1062,8 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
    fprintf(f, "    tcc_harvested = %u\n", info->tcc_harvested);
    fprintf(f, "    pc_lines = %u\n", info->pc_lines);
    fprintf(f, "    lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
-   fprintf(f, "    lds_granularity = %i\n", info->lds_granularity);
+   fprintf(f, "    lds_alloc_granularity = %i\n", info->lds_alloc_granularity);
+   fprintf(f, "    lds_encode_granularity = %i\n", info->lds_encode_granularity);
    fprintf(f, "    max_memory_clock = %i\n", info->max_memory_clock);
    fprintf(f, "    ce_ram_size = %i\n", info->ce_ram_size);
    fprintf(f, "    l1_cache_size = %i\n", info->l1_cache_size);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 3f9cde55732..57262c3a993 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -110,7 +110,8 @@ struct radeon_info {
    bool tcc_harvested;
    unsigned pc_lines;
    uint32_t lds_size_per_workgroup;
-   uint32_t lds_granularity;
+   uint32_t lds_alloc_granularity;
+   uint32_t lds_encode_granularity;
    uint32_t max_memory_clock;
    uint32_t ce_ram_size;
    uint32_t l1_cache_size;
diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c
index ee4bc25eb6a..4c61ddb1b43 100644
--- a/src/amd/common/ac_rgp.c
+++ b/src/amd/common/ac_rgp.c
@@ -405,7 +405,7 @@ static void ac_fill_sqtt_asic_info(struct radeon_info *rad_info,
    chunk->max_memory_clock = rad_info->max_memory_clock * 1000000;
    chunk->memory_ops_per_clock = 0;
    chunk->memory_chip_type = ac_vram_type_to_sqtt_memory_type(rad_info->vram_type);
-   chunk->lds_granularity = rad_info->lds_granularity;
+   chunk->lds_granularity = rad_info->lds_encode_granularity;
 
    for (unsigned se = 0; se < 4; se++) {
       for (unsigned sa = 0; sa < 2; sa++) {