Mesa (main): mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Oct 1 15:45:22 UTC 2021


Module: Mesa
Branch: main
Commit: e78d7fe7d55370a5b6656027e22acd15b0bab817
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e78d7fe7d55370a5b6656027e22acd15b0bab817

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Wed Aug 11 23:32:38 2021 -0400

mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead

Acked-By: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>

---

 src/mesa/main/dd.h                      |  18 ++++++
 src/mesa/main/dlist.c                   |  13 +++-
 src/mesa/state_tracker/st_atom.c        |  24 +++----
 src/mesa/state_tracker/st_atom.h        |   7 ++
 src/mesa/state_tracker/st_atom_array.c  |  37 +++++++++++
 src/mesa/state_tracker/st_cb_feedback.c |   2 +-
 src/mesa/state_tracker/st_context.c     |   2 +-
 src/mesa/state_tracker/st_draw.c        |  61 +++++++++++++++++-
 src/mesa/state_tracker/st_draw.h        |   3 +-
 src/mesa/vbo/vbo_save.h                 |   8 +++
 src/mesa/vbo/vbo_save_api.c             |  19 ++++++
 src/mesa/vbo/vbo_save_draw.c            | 111 ++++++++++++++++++++++++++++++++
 12 files changed, 287 insertions(+), 18 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 78fa4c4e235..dd246585dca 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -60,12 +60,17 @@ struct gl_texture_image;
 struct gl_texture_object;
 struct gl_memory_info;
 struct gl_transform_feedback_object;
+struct gl_vertex_array_object;
 struct ati_fragment_shader;
 struct util_queue_monitoring;
 struct _mesa_prim;
 struct _mesa_index_buffer;
 struct pipe_draw_info;
 struct pipe_draw_start_count_bias;
+struct pipe_vertex_state;
+struct pipe_draw_vertex_state_info;
+struct pipe_vertex_buffer;
+struct pipe_vertex_element;
 
 /* GL_ARB_vertex_buffer_object */
 /* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@@ -641,8 +646,21 @@ struct dd_function_table {
    void (*DrawTransformFeedback)(struct gl_context *ctx, GLenum mode,
                                  unsigned num_instances, unsigned stream,
                                  struct gl_transform_feedback_object *tfb_vertcount);
+
+   void (*DrawGalliumVertexState)(struct gl_context *ctx,
+                                  struct pipe_vertex_state *state,
+                                  struct pipe_draw_vertex_state_info info,
+                                  const struct pipe_draw_start_count_bias *draws,
+                                  const uint8_t *mode,
+                                  unsigned num_draws,
+                                  bool per_vertex_edgeflags);
    /*@}*/
 
+   struct pipe_vertex_state *
+      (*CreateGalliumVertexState)(struct gl_context *ctx,
+                                  const struct gl_vertex_array_object *vao,
+                                  struct gl_buffer_object *indexbuf,
+                                  uint32_t enabled_attribs);
 
    /**
     * \name State-changing functions.
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f53045aae45..2ace2291005 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -73,7 +73,7 @@
 #include "vbo/vbo_util.h"
 #include "vbo/vbo_save.h"
 #include "util/format_r11g11b10f.h"
-
+#include "util/u_inlines.h"
 #include "util/u_memory.h"
 
 #define USE_BITMAP_ATLAS 1
@@ -797,8 +797,15 @@ void mesa_print_display_list(GLuint list);
 static void
 vbo_destroy_vertex_list(struct gl_context *ctx, struct vbo_save_vertex_list *node)
 {
-   for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm)
-      _mesa_reference_vao(ctx, &node->VAO[vpm], NULL);
+   for (gl_vertex_processing_mode mode = VP_MODE_FF; mode < VP_MODE_MAX; ++mode) {
+      _mesa_reference_vao(ctx, &node->VAO[mode], NULL);
+      if (node->merged.gallium.private_refcount[mode]) {
+         assert(node->merged.gallium.private_refcount[mode] > 0);
+         p_atomic_add(&node->merged.gallium.state[mode]->reference.count,
+                      -node->merged.gallium.private_refcount[mode]);
+      }
+      pipe_vertex_state_reference(&node->merged.gallium.state[mode], NULL);
+   }
 
    if (node->merged.mode) {
       free(node->merged.mode);
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 8ba57130565..de9369e6e3a 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -140,31 +140,33 @@ static void check_program_state( struct st_context *st )
    st->dirty |= dirty;
 }
 
-static void check_attrib_edgeflag(struct st_context *st)
+void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags)
 {
-   GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
-   struct gl_program *vp = st->ctx->VertexProgram._Current;
-
-   edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
-                       st->ctx->Polygon.BackMode != GL_FILL;
-
-   vertdata_edgeflags = edgeflags_enabled &&
-      _mesa_draw_edge_flag_array_enabled(st->ctx);
+   bool edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
+                            st->ctx->Polygon.BackMode != GL_FILL;
+   bool vertdata_edgeflags = edgeflags_enabled && per_vertex_edgeflags;
 
    if (vertdata_edgeflags != st->vertdata_edgeflags) {
       st->vertdata_edgeflags = vertdata_edgeflags;
+
+      struct gl_program *vp = st->ctx->VertexProgram._Current;
       if (vp)
          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_program(vp));
    }
 
-   edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
-                          !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
+   bool edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
+                               !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
    if (edgeflag_culls_prims != st->edgeflag_culls_prims) {
       st->edgeflag_culls_prims = edgeflag_culls_prims;
       st->dirty |= ST_NEW_RASTERIZER;
    }
 }
 
+static void check_attrib_edgeflag(struct st_context *st)
+{
+   st_update_edgeflags(st, _mesa_draw_edge_flag_array_enabled(st->ctx));
+}
+
 
 /***********************************************************************
  * Update all derived state:
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index a9806bacd82..bf2e80bbd8a 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -58,6 +58,7 @@ enum st_pipeline {
 void st_init_atoms( struct st_context *st );
 void st_destroy_atoms( struct st_context *st );
 void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
+void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags);
 
 void
 st_setup_arrays(struct st_context *st,
@@ -74,6 +75,12 @@ st_setup_current_user(struct st_context *st,
                       struct cso_velems_state *velements,
                       struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
 
+struct pipe_vertex_state *
+st_create_gallium_vertex_state(struct gl_context *ctx,
+                               const struct gl_vertex_array_object *vao,
+                               struct gl_buffer_object *indexbuf,
+                               uint32_t enabled_attribs);
+
 /* Define ST_NEW_xxx_INDEX */
 enum {
 #define ST_STATE(FLAG, st_update) FLAG##_INDEX,
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index a3eb8e82931..8b5fd101890 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -322,3 +322,40 @@ st_update_array(struct st_context *st)
                                        vbuffer);
    st->last_num_vbuffers = num_vbuffers;
 }
+
+struct pipe_vertex_state *
+st_create_gallium_vertex_state(struct gl_context *ctx,
+                               const struct gl_vertex_array_object *vao,
+                               struct gl_buffer_object *indexbuf,
+                               uint32_t enabled_attribs)
+{
+   struct st_context *st = st_context(ctx);
+   const GLbitfield inputs_read = enabled_attribs;
+   const GLbitfield dual_slot_inputs = 0; /* always zero */
+   struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
+   unsigned num_vbuffers = 0;
+   struct cso_velems_state velements;
+   bool uses_user_vertex_buffers;
+
+   setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0,
+                &velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
+
+   if (num_vbuffers != 1 || uses_user_vertex_buffers) {
+      assert(!"this should never happen with display lists");
+      return NULL;
+   }
+
+   velements.count = util_bitcount(inputs_read);
+
+   struct pipe_screen *screen = st->screen;
+   struct pipe_vertex_state *state =
+      screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
+                                  velements.count,
+                                  indexbuf ?
+                                    st_buffer_object(indexbuf)->buffer : NULL,
+                                  enabled_attribs);
+
+   for (unsigned i = 0; i < num_vbuffers; i++)
+      pipe_vertex_buffer_unreference(&vbuffer[i]);
+   return state;
+}
diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c
index b9dbed84729..e3157f45aea 100644
--- a/src/mesa/state_tracker/st_cb_feedback.c
+++ b/src/mesa/state_tracker/st_cb_feedback.c
@@ -285,7 +285,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
 
    if (newMode == GL_RENDER) {
       /* restore normal VBO draw function */
-      st_init_draw_functions(&ctx->Driver);
+      st_init_draw_functions(st->screen, &ctx->Driver);
    }
    else if (newMode == GL_SELECT) {
       if (!st->selection_stage)
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 1c53045175b..aac1bd6ea0c 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -960,7 +960,7 @@ st_init_driver_functions(struct pipe_screen *screen,
 {
    _mesa_init_sampler_object_functions(functions);
 
-   st_init_draw_functions(functions);
+   st_init_draw_functions(screen, functions);
    st_init_blit_functions(functions);
    st_init_bufferobject_functions(screen, functions);
    st_init_clear_functions(functions);
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 83f429efab6..bfe443781c4 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -309,14 +309,73 @@ st_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
    cso_draw_vbo(st->cso_context, &info, 0, &indirect, draw);
 }
 
+static void
+st_draw_gallium_vertex_state(struct gl_context *ctx,
+                             struct pipe_vertex_state *state,
+                             struct pipe_draw_vertex_state_info info,
+                             const struct pipe_draw_start_count_bias *draws,
+                             const uint8_t *mode,
+                             unsigned num_draws,
+                             bool per_vertex_edgeflags)
+{
+   struct st_context *st = st_context(ctx);
+   bool old_vertdata_edgeflags = st->vertdata_edgeflags;
+
+   /* We don't flag any other states to make st_validate state update edge
+    * flags, so we need to update them here.
+    */
+   st_update_edgeflags(st, per_vertex_edgeflags);
+
+   prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK_NO_VARRAYS,
+                ST_PIPELINE_RENDER_NO_VARRAYS);
+
+   struct pipe_context *pipe = st->pipe;
+   uint32_t velem_mask = ctx->VertexProgram._Current->info.inputs_read;
+
+   if (!mode) {
+      pipe->draw_vertex_state(pipe, state, velem_mask, info, draws, num_draws);
+   } else {
+      /* Find consecutive draws where mode doesn't vary. */
+      for (unsigned i = 0, first = 0; i <= num_draws; i++) {
+         if (i == num_draws || mode[i] != mode[first]) {
+            unsigned current_num_draws = i - first;
+
+            /* Increase refcount to be able to use take_vertex_state_ownership
+             * with all draws.
+             */
+            if (i != num_draws && info.take_vertex_state_ownership)
+               p_atomic_inc(&state->reference.count);
+
+            info.mode = mode[first];
+            pipe->draw_vertex_state(pipe, state, velem_mask, info, &draws[first],
+                                    current_num_draws);
+            first = i;
+         }
+      }
+   }
+
+   /* If per-vertex edge flags are different than the non-display-list state,
+    *  just flag ST_NEW_VERTEX_ARRAY, which will also completely revalidate
+    * edge flags in st_validate_state.
+    */
+   if (st->vertdata_edgeflags != old_vertdata_edgeflags)
+      st->dirty |= ST_NEW_VERTEX_ARRAYS;
+}
+
 void
-st_init_draw_functions(struct dd_function_table *functions)
+st_init_draw_functions(struct pipe_screen *screen,
+                       struct dd_function_table *functions)
 {
    functions->Draw = NULL;
    functions->DrawGallium = st_draw_gallium;
    functions->DrawGalliumMultiMode = st_draw_gallium_multimode;
    functions->DrawIndirect = st_indirect_draw_vbo;
    functions->DrawTransformFeedback = st_draw_transform_feedback;
+
+   if (screen->get_param(screen, PIPE_CAP_DRAW_VERTEX_STATE)) {
+      functions->DrawGalliumVertexState = st_draw_gallium_vertex_state;
+      functions->CreateGalliumVertexState = st_create_gallium_vertex_state;
+   }
 }
 
 
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 857e769a59e..d472d1d83f2 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -41,7 +41,8 @@ struct _mesa_prim;
 struct gl_context;
 struct st_context;
 
-void st_init_draw_functions(struct dd_function_table *functions);
+void st_init_draw_functions(struct pipe_screen *screen,
+                            struct dd_function_table *functions);
 
 void st_destroy_draw( struct st_context *st );
 
diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
index 30f9cbff969..96ed597781d 100644
--- a/src/mesa/vbo/vbo_save.h
+++ b/src/mesa/vbo/vbo_save.h
@@ -64,6 +64,14 @@ struct vbo_save_vertex_list {
          struct pipe_draw_start_count_bias start_count;
       };
       unsigned num_draws;
+
+      struct {
+         struct gl_context *ctx;
+         struct pipe_vertex_state *state[VP_MODE_MAX];
+         int private_refcount[VP_MODE_MAX];
+         GLbitfield enabled_attribs[VP_MODE_MAX];
+         struct pipe_draw_vertex_state_info info;
+      } gallium;
    } merged;
 
    /* Cold: used during construction or to handle egde-cases */
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index de4a6fe32f5..7f2b1d4b917 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -900,6 +900,25 @@ end:
       _mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
    }
 
+   /* Prepare for DrawGalliumVertexState */
+   if (node->merged.num_draws && ctx->Driver.DrawGalliumVertexState) {
+      for (unsigned i = 0; i < VP_MODE_MAX; i++) {
+         uint32_t enabled_attribs = _vbo_get_vao_filter(i) &
+                                    node->VAO[i]->_EnabledWithMapMode;
+
+         node->merged.gallium.state[i] =
+            ctx->Driver.CreateGalliumVertexState(ctx, node->VAO[i],
+                                                 node->cold->ib.obj,
+                                                 enabled_attribs);
+         node->merged.gallium.private_refcount[i] = 0;
+         node->merged.gallium.enabled_attribs[i] = enabled_attribs;
+      }
+
+      node->merged.gallium.ctx = ctx;
+      node->merged.gallium.info.mode = node->merged.info.mode;
+      node->merged.gallium.info.take_vertex_state_ownership = false;
+      assert(node->merged.info.index_size == 4);
+   }
 
    /* Deal with GL_COMPILE_AND_EXECUTE:
     */
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 11572a2b687..50d4896ef9b 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -180,6 +180,114 @@ vbo_save_playback_vertex_list_loopback(struct gl_context *ctx, void *data)
    loopback_vertex_list(ctx, node);
 }
 
+enum vbo_save_status {
+   DONE,
+   USE_SLOW_PATH,
+};
+
+static enum vbo_save_status
+vbo_save_playback_vertex_list_gallium(struct gl_context *ctx,
+                                      const struct vbo_save_vertex_list *node,
+                                      bool copy_to_current)
+{
+   /* Don't use this if selection or feedback mode is enabled. st/mesa can't
+    * handle it.
+    */
+   if (!ctx->Driver.DrawGalliumVertexState || ctx->RenderMode != GL_RENDER)
+      return USE_SLOW_PATH;
+
+   const gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
+
+   /* This sets which vertex arrays are enabled, which determines
+    * which attribs have stride = 0 and whether edge flags are enabled.
+    */
+   const GLbitfield enabled = node->merged.gallium.enabled_attribs[mode];
+   ctx->Array._DrawVAOEnabledAttribs = enabled;
+   _mesa_set_varying_vp_inputs(ctx, enabled);
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Use the slow path when there are vertex inputs without vertex
+    * elements. This happens with zero-stride attribs and non-fixed-func
+    * shaders.
+    *
+    * Dual-slot inputs are also unsupported because the higher slot is
+    * always missing in vertex elements.
+    *
+    * TODO: Add support for zero-stride attribs.
+    */
+   struct gl_program *vp = ctx->VertexProgram._Current;
+
+   if (vp->info.inputs_read & ~enabled || vp->DualSlotInputs)
+      return USE_SLOW_PATH;
+
+   struct pipe_vertex_state *state = node->merged.gallium.state[mode];
+   struct pipe_draw_vertex_state_info info = node->merged.gallium.info;
+
+   /* Return precomputed GL errors such as invalid shaders. */
+   if (!ctx->ValidPrimMask) {
+      _mesa_error(ctx, ctx->DrawGLError, "glCallList");
+      return DONE;
+   }
+
+   if (node->merged.gallium.ctx == ctx) {
+      /* This mechanism allows passing references to the driver without
+       * using atomics to increase the reference count.
+       *
+       * This private refcount can be decremented without atomics but only
+       * one context (ctx above) can use this counter (so that it's only
+       * used by 1 thread).
+       *
+       * This number is atomically added to reference.count at
+       * initialization. If it's never used, the same number is atomically
+       * subtracted from reference.count before destruction. If this number
+       * is decremented, we can pass one reference to the driver without
+       * touching reference.count with atomics. At destruction we only
+       * subtract the number of references we have not returned. This can
+       * possibly turn a million atomic increments into 1 add and 1 subtract
+       * atomic op over the whole lifetime of an app.
+       */
+      int * const private_refcount = (int*)&node->merged.gallium.private_refcount[mode];
+      assert(*private_refcount >= 0);
+
+      if (unlikely(*private_refcount == 0)) {
+         /* pipe_vertex_state can be reused through util_vertex_state_cache,
+          * and there can be many display lists over-incrementing this number,
+          * causing it to overflow.
+          *
+          * Guess that the same state can never be used by N=500000 display
+          * lists, so one display list can only increment it by
+          * INT_MAX / N.
+          */
+         const int add_refs = INT_MAX / 500000;
+         p_atomic_add(&state->reference.count, add_refs);
+         *private_refcount = add_refs;
+      }
+
+      (*private_refcount)--;
+      info.take_vertex_state_ownership = true;
+   }
+
+   /* Fast path using a pre-built gallium vertex buffer state. */
+   if (node->merged.mode || node->merged.num_draws > 1) {
+      ctx->Driver.DrawGalliumVertexState(ctx, state, info,
+                                         node->merged.start_counts,
+                                         node->merged.mode,
+                                         node->merged.num_draws,
+                                         enabled & VERT_ATTRIB_EDGEFLAG);
+   } else if (node->merged.num_draws) {
+      ctx->Driver.DrawGalliumVertexState(ctx, state, info,
+                                         &node->merged.start_count,
+                                         NULL, 1,
+                                         enabled & VERT_ATTRIB_EDGEFLAG);
+   }
+
+   if (copy_to_current)
+      playback_copy_to_current(ctx, node);
+   return DONE;
+}
+
 /**
  * Execute the buffer and save copied verts.
  * This is called from the display list code when executing
@@ -202,6 +310,9 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data, bool copy_to_c
       return;
    }
 
+   if (vbo_save_playback_vertex_list_gallium(ctx, node, copy_to_current) == DONE)
+      return;
+
    bind_vertex_list(ctx, node);
 
    /* Need that at least one time. */



More information about the mesa-commit mailing list