Mesa (master): aco: Align NGG scratch size to 16 so a single ds_read can always read it.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 14 14:15:10 UTC 2021


Module: Mesa
Branch: master
Commit: 75cd43741a8607fc30f4577ae73004c495c39a5a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=75cd43741a8607fc30f4577ae73004c495c39a5a

Author: Timur Kristóf <timur.kristof at gmail.com>
Date:   Sat Apr 10 14:51:56 2021 +0200

aco: Align NGG scratch size to 16 so a single ds_read can always read it.

Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10155>

---

 src/amd/compiler/aco_instruction_selection.cpp       | 2 +-
 src/amd/compiler/aco_instruction_selection_setup.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 75b9fff0159..5d5f7e429b4 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11409,7 +11409,7 @@ std::pair<Temp, Temp> ngg_gs_workgroup_reduce_and_scan(isel_context *ctx, Temp s
 
    /* The first lane of each wave loads every wave's results from LDS, to avoid bank conflicts */
    Temp reduction_per_wave_vector = load_lds(ctx, 4u * num_lds_dwords, bld.tmp(RegClass(RegType::vgpr, num_lds_dwords)),
-                                             bld.copy(bld.def(v1), Operand(0u)), ctx->ngg_gs_scratch_addr, 4u);
+                                             bld.copy(bld.def(v1), Operand(0u)), ctx->ngg_gs_scratch_addr, 16u);
 
    begin_divergent_if_else(ctx, &ic);
    end_divergent_if(ctx, &ic);
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index bbdbd800e7a..29b3845f02e 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -419,8 +419,9 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir)
       ctx->ngg_gs_emit_vtx_bytes = ctx->ngg_gs_primflags_offset + 4u;
       ctx->ngg_gs_emit_addr = esgs_ring_bytes;
       ctx->ngg_gs_scratch_addr = ctx->ngg_gs_emit_addr + ngg_emit_bytes;
+      ctx->ngg_gs_scratch_addr = ALIGN(ctx->ngg_gs_scratch_addr, 16u);
 
-      unsigned total_lds_bytes = esgs_ring_bytes + ngg_emit_bytes + ngg_gs_scratch_bytes;
+      unsigned total_lds_bytes = ctx->ngg_gs_scratch_addr + ngg_gs_scratch_bytes;
       assert(total_lds_bytes >= ctx->ngg_gs_emit_addr);
       assert(total_lds_bytes >= ctx->ngg_gs_scratch_addr);
       ctx->program->config->lds_size = DIV_ROUND_UP(total_lds_bytes, ctx->program->dev.lds_encoding_granule);



More information about the mesa-commit mailing list