Mesa (master): radeonsi: set VB user SGPRs in si_upload_vertex_buffer_descriptors

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Jan 30 20:59:51 UTC 2021


Module: Mesa
Branch: master
Commit: d5b529ad1486dd6a31fe394ad0e4b18bf3a79d6b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5b529ad1486dd6a31fe394ad0e4b18bf3a79d6b

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Mon Jan 11 15:02:48 2021 -0500

radeonsi: set VB user SGPRs in si_upload_vertex_buffer_descriptors

so that we don't have to enter the state emit loop and invoke the more
complicated function si_emit_graphics_shader_pointers.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8794>

---

 src/gallium/drivers/radeonsi/si_descriptors.c  | 29 +---------
 src/gallium/drivers/radeonsi/si_state_draw.cpp | 77 +++++++++++++++++++++-----
 2 files changed, 65 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index c497717942a..fb1813020ab 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2021,37 +2021,10 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
                                        sh_base[PIPE_SHADER_TESS_CTRL]);
    si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
                                        sh_base[PIPE_SHADER_GEOMETRY]);
+   radeon_end();
 
    sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_INTERNAL, SI_DESCS_FIRST_COMPUTE);
 
-   if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) {
-      /* Find the location of the VB descriptor pointer. */
-      unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
-      if (sctx->chip_class >= GFX9) {
-         if (sctx->tes_shader.cso)
-            sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
-         else if (sctx->gs_shader.cso)
-            sh_dw_offset = GFX9_VSGS_NUM_USER_SGPR;
-      }
-
-      unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
-      radeon_set_sh_reg_seq(cs, sh_offset, 1);
-      radeon_emit_32bit_pointer(
-         sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + sctx->vb_descriptors_offset);
-      sctx->vertex_buffer_pointer_dirty = false;
-   }
-
-   if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements &&
-       sctx->screen->num_vbos_in_user_sgprs) {
-      unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs);
-      unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4;
-
-      radeon_set_sh_reg_seq(cs, sh_offset, num_desc * 4);
-      radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_desc * 4);
-      sctx->vertex_buffer_user_sgprs_dirty = false;
-   }
-   radeon_end();
-
    if (sctx->graphics_bindless_pointer_dirty) {
       si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
       sctx->graphics_bindless_pointer_dirty = false;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 30bbbd4bc0c..fb4f721dbc4 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -1330,13 +1330,16 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx)
    *sctx->last_pkt3_write_data = PKT3(PKT3_NOP, 3, 0);
 }
 
-template <chip_class GFX_VERSION> ALWAYS_INLINE
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
 static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 {
+   unsigned count = sctx->num_vertex_elements;
+   bool pointer_dirty, user_sgprs_dirty;
+
+   assert(count <= SI_MAX_ATTRIBS);
+
    if (sctx->vertex_buffers_dirty) {
-      unsigned count = sctx->num_vertex_elements;
       assert(count);
-      assert(count <= SI_MAX_ATTRIBS);
 
       struct si_vertex_elements *velems = sctx->vertex_elements;
       unsigned alloc_size = velems->vb_desc_list_alloc_size;
@@ -1359,11 +1362,9 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
          sctx->vb_descriptors_gpu_list = ptr;
          radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer,
                                    RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
-         sctx->vertex_buffer_pointer_dirty = true;
          sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
       } else {
          si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
-         sctx->vertex_buffer_pointer_dirty = false;
          sctx->prefetch_L2_mask &= ~SI_PREFETCH_VBO_DESCRIPTORS;
       }
 
@@ -1422,13 +1423,52 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
          }
       }
 
-      /* Don't flush the const cache. It would have a very negative effect
-       * on performance (confirmed by testing). New descriptors are always
-       * uploaded to a fresh new buffer, so I don't think flushing the const
-       * cache is needed. */
-      si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
-      sctx->vertex_buffer_user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
       sctx->vertex_buffers_dirty = false;
+
+      pointer_dirty = alloc_size != 0;
+      user_sgprs_dirty = num_vbos_in_user_sgprs > 0;
+   } else {
+      pointer_dirty = sctx->vertex_buffer_pointer_dirty;
+      user_sgprs_dirty = sctx->vertex_buffer_user_sgprs_dirty;
+   }
+
+   if (pointer_dirty || user_sgprs_dirty) {
+      struct radeon_cmdbuf *cs = &sctx->gfx_cs;
+      unsigned num_vbos_in_user_sgprs = sctx->screen->num_vbos_in_user_sgprs;
+      unsigned sh_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
+                                               PIPE_SHADER_VERTEX);
+      assert(count);
+
+      radeon_begin(cs);
+
+      /* Set the pointer to vertex buffer descriptors. */
+      if (pointer_dirty && count > num_vbos_in_user_sgprs) {
+         /* Find the location of the VB descriptor pointer. */
+         unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
+         if (GFX_VERSION >= GFX9) {
+            if (HAS_TESS)
+               sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
+            else if (HAS_GS)
+               sh_dw_offset = GFX9_VSGS_NUM_USER_SGPR;
+         }
+
+         radeon_set_sh_reg(cs, sh_base + sh_dw_offset * 4,
+                           sctx->vb_descriptors_buffer->gpu_address +
+                           sctx->vb_descriptors_offset);
+         sctx->vertex_buffer_pointer_dirty = false;
+      }
+
+      /* Set VB descriptors in user SGPRs. */
+      if (user_sgprs_dirty) {
+         assert(num_vbos_in_user_sgprs);
+
+         unsigned num_sgprs = MIN2(count, num_vbos_in_user_sgprs) * 4;
+
+         radeon_set_sh_reg_seq(cs, sh_base + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4, num_sgprs);
+         radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_sgprs);
+         sctx->vertex_buffer_user_sgprs_dirty = false;
+      }
+      radeon_end();
    }
 
    return true;
@@ -2062,8 +2102,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
    /* Graphics shader descriptors must be uploaded after si_update_shaders because
     * it binds tess and GS ring buffers.
     */
-   if (unlikely(!si_upload_graphics_shader_descriptors(sctx) ||
-                !si_upload_vertex_buffer_descriptors<GFX_VERSION>(sctx))) {
+   if (unlikely(!si_upload_graphics_shader_descriptors(sctx))) {
       DRAW_CLEANUP;
       return;
    }
@@ -2103,6 +2142,12 @@ static void si_draw_vbo(struct pipe_context *ctx,
       sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
       /* <-- CUs are idle here. */
 
+      /* This uploads VBO descriptors and sets user SGPRs. */
+      if (unlikely((!si_upload_vertex_buffer_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) {
+         DRAW_CLEANUP;
+         return;
+      }
+
       if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
          sctx->atoms.s.render_cond.emit(sctx);
          sctx->dirty_atoms &= ~si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
@@ -2136,6 +2181,12 @@ static void si_draw_vbo(struct pipe_context *ctx,
       /* Only prefetch the API VS and VBO descriptors. */
       si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, PREFETCH_BEFORE_DRAW>(sctx);
 
+      /* This uploads VBO descriptors and sets user SGPRs. */
+      if (unlikely((!si_upload_vertex_buffer_descriptors<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx)))) {
+         DRAW_CLEANUP;
+         return;
+      }
+
       si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
             (sctx, info, indirect, prim, instance_count, min_direct_count,
              primitive_restart, masked_atoms);



More information about the mesa-commit mailing list