Mesa (master): radeonsi: don't subtract max_verts_per_prim from hw_max_esverts on gfx10.3

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 18 06:41:16 UTC 2020


Module: Mesa
Branch: master
Commit: ea90d8a74498148a1abbde3d554fc3ce3d37cb67
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea90d8a74498148a1abbde3d554fc3ce3d37cb67

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Oct 21 14:39:26 2020 -0400

radeonsi: don't subtract max_verts_per_prim from hw_max_esverts on gfx10.3

gfx10.3 does it properly. This change enables the last 2 lanes in a workgroup
on gfx10.3.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7542>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 1b1b9b3f368..9ccde8fe80b 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -2037,8 +2037,12 @@ retry_select_mode:
             max_esverts =
                MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
          max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+
          /* Hardware restriction: minimum value of max_esverts */
-         max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+         if (gs_sel->screen->info.chip_class == GFX10)
+            max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+         else
+            max_esverts = MAX2(max_esverts, min_esverts);
 
          max_gsprims = align(max_gsprims, wavesize);
          max_gsprims = MIN2(max_gsprims, max_gsprims_base);
@@ -2056,10 +2060,16 @@ retry_select_mode:
       } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
 
       /* Verify the restriction. */
-      assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+      if (gs_sel->screen->info.chip_class == GFX10)
+         assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+      else
+         assert(max_esverts >= min_esverts);
    } else {
       /* Hardware restriction: minimum value of max_esverts */
-      max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+      if (gs_sel->screen->info.chip_class == GFX10)
+         max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+      else
+         max_esverts = MAX2(max_esverts, min_esverts);
    }
 
    unsigned max_out_vertices =
@@ -2077,12 +2087,16 @@ retry_select_mode:
       prim_amp_factor = gs_sel->info.base.gs.vertices_out;
    }
 
-   /* The GE only checks against the maximum number of ES verts after
+   /* On gfx10, the GE only checks against the maximum number of ES verts after
     * allocating a full GS primitive. So we need to ensure that whenever
     * this check passes, there is enough space for a full primitive without
     * vertex reuse.
     */
-   shader->ngg.hw_max_esverts = max_esverts - max_verts_per_prim + 1;
+   if (gs_sel->screen->info.chip_class == GFX10)
+      shader->ngg.hw_max_esverts = max_esverts - max_verts_per_prim + 1;
+   else
+      shader->ngg.hw_max_esverts = max_esverts;
+
    shader->ngg.max_gsprims = max_gsprims;
    shader->ngg.max_out_verts = max_out_vertices;
    shader->ngg.prim_amp_factor = prim_amp_factor;



More information about the mesa-commit mailing list