Mesa (main): mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Oct 1 15:45:22 UTC 2021
Module: Mesa
Branch: main
Commit: e78d7fe7d55370a5b6656027e22acd15b0bab817
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e78d7fe7d55370a5b6656027e22acd15b0bab817
Author: Marek Olšák <marek.olsak at amd.com>
Date: Wed Aug 11 23:32:38 2021 -0400
mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead
Acked-By: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>
---
src/mesa/main/dd.h | 18 ++++++
src/mesa/main/dlist.c | 13 +++-
src/mesa/state_tracker/st_atom.c | 24 +++----
src/mesa/state_tracker/st_atom.h | 7 ++
src/mesa/state_tracker/st_atom_array.c | 37 +++++++++++
src/mesa/state_tracker/st_cb_feedback.c | 2 +-
src/mesa/state_tracker/st_context.c | 2 +-
src/mesa/state_tracker/st_draw.c | 61 +++++++++++++++++-
src/mesa/state_tracker/st_draw.h | 3 +-
src/mesa/vbo/vbo_save.h | 8 +++
src/mesa/vbo/vbo_save_api.c | 19 ++++++
src/mesa/vbo/vbo_save_draw.c | 111 ++++++++++++++++++++++++++++++++
12 files changed, 287 insertions(+), 18 deletions(-)
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 78fa4c4e235..dd246585dca 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -60,12 +60,17 @@ struct gl_texture_image;
struct gl_texture_object;
struct gl_memory_info;
struct gl_transform_feedback_object;
+struct gl_vertex_array_object;
struct ati_fragment_shader;
struct util_queue_monitoring;
struct _mesa_prim;
struct _mesa_index_buffer;
struct pipe_draw_info;
struct pipe_draw_start_count_bias;
+struct pipe_vertex_state;
+struct pipe_draw_vertex_state_info;
+struct pipe_vertex_buffer;
+struct pipe_vertex_element;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@@ -641,8 +646,21 @@ struct dd_function_table {
void (*DrawTransformFeedback)(struct gl_context *ctx, GLenum mode,
unsigned num_instances, unsigned stream,
struct gl_transform_feedback_object *tfb_vertcount);
+
+ void (*DrawGalliumVertexState)(struct gl_context *ctx,
+ struct pipe_vertex_state *state,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ const uint8_t *mode,
+ unsigned num_draws,
+ bool per_vertex_edgeflags);
/*@}*/
+ struct pipe_vertex_state *
+ (*CreateGalliumVertexState)(struct gl_context *ctx,
+ const struct gl_vertex_array_object *vao,
+ struct gl_buffer_object *indexbuf,
+ uint32_t enabled_attribs);
/**
* \name State-changing functions.
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f53045aae45..2ace2291005 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -73,7 +73,7 @@
#include "vbo/vbo_util.h"
#include "vbo/vbo_save.h"
#include "util/format_r11g11b10f.h"
-
+#include "util/u_inlines.h"
#include "util/u_memory.h"
#define USE_BITMAP_ATLAS 1
@@ -797,8 +797,15 @@ void mesa_print_display_list(GLuint list);
static void
vbo_destroy_vertex_list(struct gl_context *ctx, struct vbo_save_vertex_list *node)
{
- for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm)
- _mesa_reference_vao(ctx, &node->VAO[vpm], NULL);
+ for (gl_vertex_processing_mode mode = VP_MODE_FF; mode < VP_MODE_MAX; ++mode) {
+ _mesa_reference_vao(ctx, &node->VAO[mode], NULL);
+ if (node->merged.gallium.private_refcount[mode]) {
+ assert(node->merged.gallium.private_refcount[mode] > 0);
+ p_atomic_add(&node->merged.gallium.state[mode]->reference.count,
+ -node->merged.gallium.private_refcount[mode]);
+ }
+ pipe_vertex_state_reference(&node->merged.gallium.state[mode], NULL);
+ }
if (node->merged.mode) {
free(node->merged.mode);
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 8ba57130565..de9369e6e3a 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -140,31 +140,33 @@ static void check_program_state( struct st_context *st )
st->dirty |= dirty;
}
-static void check_attrib_edgeflag(struct st_context *st)
+void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags)
{
- GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
- struct gl_program *vp = st->ctx->VertexProgram._Current;
-
- edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
- st->ctx->Polygon.BackMode != GL_FILL;
-
- vertdata_edgeflags = edgeflags_enabled &&
- _mesa_draw_edge_flag_array_enabled(st->ctx);
+ bool edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
+ st->ctx->Polygon.BackMode != GL_FILL;
+ bool vertdata_edgeflags = edgeflags_enabled && per_vertex_edgeflags;
if (vertdata_edgeflags != st->vertdata_edgeflags) {
st->vertdata_edgeflags = vertdata_edgeflags;
+
+ struct gl_program *vp = st->ctx->VertexProgram._Current;
if (vp)
st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_program(vp));
}
- edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
- !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
+ bool edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
+ !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
if (edgeflag_culls_prims != st->edgeflag_culls_prims) {
st->edgeflag_culls_prims = edgeflag_culls_prims;
st->dirty |= ST_NEW_RASTERIZER;
}
}
+static void check_attrib_edgeflag(struct st_context *st)
+{
+ st_update_edgeflags(st, _mesa_draw_edge_flag_array_enabled(st->ctx));
+}
+
/***********************************************************************
* Update all derived state:
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index a9806bacd82..bf2e80bbd8a 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -58,6 +58,7 @@ enum st_pipeline {
void st_init_atoms( struct st_context *st );
void st_destroy_atoms( struct st_context *st );
void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
+void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags);
void
st_setup_arrays(struct st_context *st,
@@ -74,6 +75,12 @@ st_setup_current_user(struct st_context *st,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
+struct pipe_vertex_state *
+st_create_gallium_vertex_state(struct gl_context *ctx,
+ const struct gl_vertex_array_object *vao,
+ struct gl_buffer_object *indexbuf,
+ uint32_t enabled_attribs);
+
/* Define ST_NEW_xxx_INDEX */
enum {
#define ST_STATE(FLAG, st_update) FLAG##_INDEX,
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index a3eb8e82931..8b5fd101890 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -322,3 +322,40 @@ st_update_array(struct st_context *st)
vbuffer);
st->last_num_vbuffers = num_vbuffers;
}
+
+struct pipe_vertex_state *
+st_create_gallium_vertex_state(struct gl_context *ctx,
+ const struct gl_vertex_array_object *vao,
+ struct gl_buffer_object *indexbuf,
+ uint32_t enabled_attribs)
+{
+ struct st_context *st = st_context(ctx);
+ const GLbitfield inputs_read = enabled_attribs;
+ const GLbitfield dual_slot_inputs = 0; /* always zero */
+ struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
+ unsigned num_vbuffers = 0;
+ struct cso_velems_state velements;
+ bool uses_user_vertex_buffers;
+
+ setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0,
+ &velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
+
+ if (num_vbuffers != 1 || uses_user_vertex_buffers) {
+ assert(!"this should never happen with display lists");
+ return NULL;
+ }
+
+ velements.count = util_bitcount(inputs_read);
+
+ struct pipe_screen *screen = st->screen;
+ struct pipe_vertex_state *state =
+ screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
+ velements.count,
+ indexbuf ?
+ st_buffer_object(indexbuf)->buffer : NULL,
+ enabled_attribs);
+
+ for (unsigned i = 0; i < num_vbuffers; i++)
+ pipe_vertex_buffer_unreference(&vbuffer[i]);
+ return state;
+}
diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c
index b9dbed84729..e3157f45aea 100644
--- a/src/mesa/state_tracker/st_cb_feedback.c
+++ b/src/mesa/state_tracker/st_cb_feedback.c
@@ -285,7 +285,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
if (newMode == GL_RENDER) {
/* restore normal VBO draw function */
- st_init_draw_functions(&ctx->Driver);
+ st_init_draw_functions(st->screen, &ctx->Driver);
}
else if (newMode == GL_SELECT) {
if (!st->selection_stage)
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 1c53045175b..aac1bd6ea0c 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -960,7 +960,7 @@ st_init_driver_functions(struct pipe_screen *screen,
{
_mesa_init_sampler_object_functions(functions);
- st_init_draw_functions(functions);
+ st_init_draw_functions(screen, functions);
st_init_blit_functions(functions);
st_init_bufferobject_functions(screen, functions);
st_init_clear_functions(functions);
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 83f429efab6..bfe443781c4 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -309,14 +309,73 @@ st_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
cso_draw_vbo(st->cso_context, &info, 0, &indirect, draw);
}
+static void
+st_draw_gallium_vertex_state(struct gl_context *ctx,
+ struct pipe_vertex_state *state,
+ struct pipe_draw_vertex_state_info info,
+ const struct pipe_draw_start_count_bias *draws,
+ const uint8_t *mode,
+ unsigned num_draws,
+ bool per_vertex_edgeflags)
+{
+ struct st_context *st = st_context(ctx);
+ bool old_vertdata_edgeflags = st->vertdata_edgeflags;
+
+ /* We don't flag any other states to make st_validate state update edge
+ * flags, so we need to update them here.
+ */
+ st_update_edgeflags(st, per_vertex_edgeflags);
+
+ prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK_NO_VARRAYS,
+ ST_PIPELINE_RENDER_NO_VARRAYS);
+
+ struct pipe_context *pipe = st->pipe;
+ uint32_t velem_mask = ctx->VertexProgram._Current->info.inputs_read;
+
+ if (!mode) {
+ pipe->draw_vertex_state(pipe, state, velem_mask, info, draws, num_draws);
+ } else {
+ /* Find consecutive draws where mode doesn't vary. */
+ for (unsigned i = 0, first = 0; i <= num_draws; i++) {
+ if (i == num_draws || mode[i] != mode[first]) {
+ unsigned current_num_draws = i - first;
+
+ /* Increase refcount to be able to use take_vertex_state_ownership
+ * with all draws.
+ */
+ if (i != num_draws && info.take_vertex_state_ownership)
+ p_atomic_inc(&state->reference.count);
+
+ info.mode = mode[first];
+ pipe->draw_vertex_state(pipe, state, velem_mask, info, &draws[first],
+ current_num_draws);
+ first = i;
+ }
+ }
+ }
+
+ /* If per-vertex edge flags are different than the non-display-list state,
+ * just flag ST_NEW_VERTEX_ARRAY, which will also completely revalidate
+ * edge flags in st_validate_state.
+ */
+ if (st->vertdata_edgeflags != old_vertdata_edgeflags)
+ st->dirty |= ST_NEW_VERTEX_ARRAYS;
+}
+
void
-st_init_draw_functions(struct dd_function_table *functions)
+st_init_draw_functions(struct pipe_screen *screen,
+ struct dd_function_table *functions)
{
functions->Draw = NULL;
functions->DrawGallium = st_draw_gallium;
functions->DrawGalliumMultiMode = st_draw_gallium_multimode;
functions->DrawIndirect = st_indirect_draw_vbo;
functions->DrawTransformFeedback = st_draw_transform_feedback;
+
+ if (screen->get_param(screen, PIPE_CAP_DRAW_VERTEX_STATE)) {
+ functions->DrawGalliumVertexState = st_draw_gallium_vertex_state;
+ functions->CreateGalliumVertexState = st_create_gallium_vertex_state;
+ }
}
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 857e769a59e..d472d1d83f2 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -41,7 +41,8 @@ struct _mesa_prim;
struct gl_context;
struct st_context;
-void st_init_draw_functions(struct dd_function_table *functions);
+void st_init_draw_functions(struct pipe_screen *screen,
+ struct dd_function_table *functions);
void st_destroy_draw( struct st_context *st );
diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
index 30f9cbff969..96ed597781d 100644
--- a/src/mesa/vbo/vbo_save.h
+++ b/src/mesa/vbo/vbo_save.h
@@ -64,6 +64,14 @@ struct vbo_save_vertex_list {
struct pipe_draw_start_count_bias start_count;
};
unsigned num_draws;
+
+ struct {
+ struct gl_context *ctx;
+ struct pipe_vertex_state *state[VP_MODE_MAX];
+ int private_refcount[VP_MODE_MAX];
+ GLbitfield enabled_attribs[VP_MODE_MAX];
+ struct pipe_draw_vertex_state_info info;
+ } gallium;
} merged;
/* Cold: used during construction or to handle egde-cases */
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index de4a6fe32f5..7f2b1d4b917 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -900,6 +900,25 @@ end:
_mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
}
+ /* Prepare for DrawGalliumVertexState */
+ if (node->merged.num_draws && ctx->Driver.DrawGalliumVertexState) {
+ for (unsigned i = 0; i < VP_MODE_MAX; i++) {
+ uint32_t enabled_attribs = _vbo_get_vao_filter(i) &
+ node->VAO[i]->_EnabledWithMapMode;
+
+ node->merged.gallium.state[i] =
+ ctx->Driver.CreateGalliumVertexState(ctx, node->VAO[i],
+ node->cold->ib.obj,
+ enabled_attribs);
+ node->merged.gallium.private_refcount[i] = 0;
+ node->merged.gallium.enabled_attribs[i] = enabled_attribs;
+ }
+
+ node->merged.gallium.ctx = ctx;
+ node->merged.gallium.info.mode = node->merged.info.mode;
+ node->merged.gallium.info.take_vertex_state_ownership = false;
+ assert(node->merged.info.index_size == 4);
+ }
/* Deal with GL_COMPILE_AND_EXECUTE:
*/
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 11572a2b687..50d4896ef9b 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -180,6 +180,114 @@ vbo_save_playback_vertex_list_loopback(struct gl_context *ctx, void *data)
loopback_vertex_list(ctx, node);
}
+enum vbo_save_status {
+ DONE,
+ USE_SLOW_PATH,
+};
+
+static enum vbo_save_status
+vbo_save_playback_vertex_list_gallium(struct gl_context *ctx,
+ const struct vbo_save_vertex_list *node,
+ bool copy_to_current)
+{
+ /* Don't use this if selection or feedback mode is enabled. st/mesa can't
+ * handle it.
+ */
+ if (!ctx->Driver.DrawGalliumVertexState || ctx->RenderMode != GL_RENDER)
+ return USE_SLOW_PATH;
+
+ const gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
+
+ /* This sets which vertex arrays are enabled, which determines
+ * which attribs have stride = 0 and whether edge flags are enabled.
+ */
+ const GLbitfield enabled = node->merged.gallium.enabled_attribs[mode];
+ ctx->Array._DrawVAOEnabledAttribs = enabled;
+ _mesa_set_varying_vp_inputs(ctx, enabled);
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ /* Use the slow path when there are vertex inputs without vertex
+ * elements. This happens with zero-stride attribs and non-fixed-func
+ * shaders.
+ *
+ * Dual-slot inputs are also unsupported because the higher slot is
+ * always missing in vertex elements.
+ *
+ * TODO: Add support for zero-stride attribs.
+ */
+ struct gl_program *vp = ctx->VertexProgram._Current;
+
+ if (vp->info.inputs_read & ~enabled || vp->DualSlotInputs)
+ return USE_SLOW_PATH;
+
+ struct pipe_vertex_state *state = node->merged.gallium.state[mode];
+ struct pipe_draw_vertex_state_info info = node->merged.gallium.info;
+
+ /* Return precomputed GL errors such as invalid shaders. */
+ if (!ctx->ValidPrimMask) {
+ _mesa_error(ctx, ctx->DrawGLError, "glCallList");
+ return DONE;
+ }
+
+ if (node->merged.gallium.ctx == ctx) {
+ /* This mechanism allows passing references to the driver without
+ * using atomics to increase the reference count.
+ *
+ * This private refcount can be decremented without atomics but only
+ * one context (ctx above) can use this counter (so that it's only
+ * used by 1 thread).
+ *
+ * This number is atomically added to reference.count at
+ * initialization. If it's never used, the same number is atomically
+ * subtracted from reference.count before destruction. If this number
+ * is decremented, we can pass one reference to the driver without
+ * touching reference.count with atomics. At destruction we only
+ * subtract the number of references we have not returned. This can
+ * possibly turn a million atomic increments into 1 add and 1 subtract
+ * atomic op over the whole lifetime of an app.
+ */
+ int * const private_refcount = (int*)&node->merged.gallium.private_refcount[mode];
+ assert(*private_refcount >= 0);
+
+ if (unlikely(*private_refcount == 0)) {
+ /* pipe_vertex_state can be reused through util_vertex_state_cache,
+ * and there can be many display lists over-incrementing this number,
+ * causing it to overflow.
+ *
+ * Guess that the same state can never be used by N=500000 display
+ * lists, so one display list can only increment it by
+ * INT_MAX / N.
+ */
+ const int add_refs = INT_MAX / 500000;
+ p_atomic_add(&state->reference.count, add_refs);
+ *private_refcount = add_refs;
+ }
+
+ (*private_refcount)--;
+ info.take_vertex_state_ownership = true;
+ }
+
+ /* Fast path using a pre-built gallium vertex buffer state. */
+ if (node->merged.mode || node->merged.num_draws > 1) {
+ ctx->Driver.DrawGalliumVertexState(ctx, state, info,
+ node->merged.start_counts,
+ node->merged.mode,
+ node->merged.num_draws,
+ enabled & VERT_ATTRIB_EDGEFLAG);
+ } else if (node->merged.num_draws) {
+ ctx->Driver.DrawGalliumVertexState(ctx, state, info,
+ &node->merged.start_count,
+ NULL, 1,
+ enabled & VERT_ATTRIB_EDGEFLAG);
+ }
+
+ if (copy_to_current)
+ playback_copy_to_current(ctx, node);
+ return DONE;
+}
+
/**
* Execute the buffer and save copied verts.
* This is called from the display list code when executing
@@ -202,6 +310,9 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data, bool copy_to_c
return;
}
+ if (vbo_save_playback_vertex_list_gallium(ctx, node, copy_to_current) == DONE)
+ return;
+
bind_vertex_list(ctx, node);
/* Need that at least one time. */
More information about the mesa-commit
mailing list