Mesa (main): st/mesa: use POPCNT in st_update_array if the CPU supports it
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Nov 3 23:50:04 UTC 2021
Module: Mesa
Branch: main
Commit: d24539b15256ac255ffb75f199ffda962b39bad5
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d24539b15256ac255ffb75f199ffda962b39bad5
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sat Oct 23 23:25:00 2021 -0400
st/mesa: use POPCNT in st_update_array if the CPU supports it
The st_update_array overhead decreases from 8.28% to 7.67% for a viewperf
subtest.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13512>
---
src/mesa/state_tracker/st_atom.c | 16 +++++++--
src/mesa/state_tracker/st_atom.h | 4 +++
src/mesa/state_tracker/st_atom_array.cpp | 57 +++++++++++++++++++++-----------
src/mesa/state_tracker/st_context.c | 4 +--
4 files changed, 56 insertions(+), 25 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index de9369e6e3a..df2c5895fe7 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -38,21 +38,31 @@
#include "st_manager.h"
#include "st_util.h"
+#include "util/u_cpu_detect.h"
+
typedef void (*update_func_t)(struct st_context *st);
/* The list state update functions. */
-static const update_func_t update_functions[] =
+static update_func_t update_functions[ST_NUM_ATOMS];
+
+static void
+init_atoms_once(void)
{
-#define ST_STATE(FLAG, st_update) st_update,
+#define ST_STATE(FLAG, st_update) update_functions[FLAG##_INDEX] = st_update;
#include "st_atom_list.h"
#undef ST_STATE
-};
+ if (util_get_cpu_caps()->has_popcnt)
+ update_functions[ST_NEW_VERTEX_ARRAYS_INDEX] = st_update_array_with_popcnt;
+}
void st_init_atoms( struct st_context *st )
{
STATIC_ASSERT(ARRAY_SIZE(update_functions) <= 64);
+
+ static once_flag flag = ONCE_FLAG_INIT;
+ call_once(&flag, init_atoms_once);
}
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 0c53a229a1a..df2d68318f1 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -79,6 +79,9 @@ st_setup_current_user(struct st_context *st,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
+void
+st_update_array_with_popcnt(struct st_context *st);
+
struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
@@ -90,6 +93,7 @@ enum {
#define ST_STATE(FLAG, st_update) FLAG##_INDEX,
#include "st_atom_list.h"
#undef ST_STATE
+ ST_NUM_ATOMS,
};
/* Define ST_NEW_xxx values as static const uint64_t values.
diff --git a/src/mesa/state_tracker/st_atom_array.cpp b/src/mesa/state_tracker/st_atom_array.cpp
index 3a8a991a7aa..59a38931a05 100644
--- a/src/mesa/state_tracker/st_atom_array.cpp
+++ b/src/mesa/state_tracker/st_atom_array.cpp
@@ -70,7 +70,7 @@ init_velement(struct pipe_vertex_element *velements,
/* ALWAYS_INLINE helps the compiler realize that most of the parameters are
* on the stack.
*/
-static void ALWAYS_INLINE
+template<util_popcnt POPCNT> static void ALWAYS_INLINE
setup_arrays(struct st_context *st,
const struct gl_vertex_array_object *vao,
const GLbitfield dual_slot_inputs,
@@ -119,7 +119,7 @@ setup_arrays(struct st_context *st,
init_velement(velements->velems, &attrib->Format, 0,
binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr),
- util_bitcount(inputs_read & BITFIELD_MASK(attr)));
+ util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
}
return;
}
@@ -161,11 +161,12 @@ setup_arrays(struct st_context *st,
init_velement(velements->velems, &attrib->Format, off,
binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr),
- util_bitcount(inputs_read & BITFIELD_MASK(attr)));
+ util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
} while (attrmask);
}
}
+/* Only used by the select/feedback mode. */
void
st_setup_arrays(struct st_context *st,
const struct st_vertex_program *vp,
@@ -176,11 +177,11 @@ st_setup_arrays(struct st_context *st,
{
struct gl_context *ctx = st->ctx;
- setup_arrays(st, ctx->Array._DrawVAO, vp->Base.Base.DualSlotInputs,
- vp_variant->vert_attrib_mask,
- _mesa_draw_nonzero_divisor_bits(ctx),
- _mesa_draw_array_bits(ctx), _mesa_draw_user_array_bits(ctx),
- velements, vbuffer, num_vbuffers, has_user_vertex_buffers);
+ setup_arrays<POPCNT_NO>(st, ctx->Array._DrawVAO, vp->Base.Base.DualSlotInputs,
+ vp_variant->vert_attrib_mask,
+ _mesa_draw_nonzero_divisor_bits(ctx),
+ _mesa_draw_array_bits(ctx), _mesa_draw_user_array_bits(ctx),
+ velements, vbuffer, num_vbuffers, has_user_vertex_buffers);
}
/* ALWAYS_INLINE helps the compiler realize that most of the parameters are
@@ -189,7 +190,7 @@ st_setup_arrays(struct st_context *st,
* Return the index of the vertex buffer where current attribs have been
* uploaded.
*/
-static void ALWAYS_INLINE
+template<util_popcnt POPCNT> static void ALWAYS_INLINE
st_setup_current(struct st_context *st,
const struct st_vertex_program *vp,
const struct st_common_variant *vp_variant,
@@ -222,7 +223,7 @@ st_setup_current(struct st_context *st,
init_velement(velements->velems, &attrib->Format, cursor - data,
0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
- util_bitcount(inputs_read & BITFIELD_MASK(attr)));
+ util_bitcount_fast<POPCNT>(inputs_read & BITFIELD_MASK(attr)));
cursor += alignment;
} while (curmask);
@@ -250,6 +251,7 @@ st_setup_current(struct st_context *st,
}
}
+/* Only used by the select/feedback mode. */
void
st_setup_current_user(struct st_context *st,
const struct st_vertex_program *vp,
@@ -281,8 +283,8 @@ st_setup_current_user(struct st_context *st,
}
}
-void
-st_update_array(struct st_context *st)
+template<util_popcnt POPCNT> inline void
+st_update_array_templ(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
/* vertex program validation must be done before this */
@@ -297,15 +299,17 @@ st_update_array(struct st_context *st)
/* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
/* Setup arrays */
- setup_arrays(st, ctx->Array._DrawVAO, vp->Base.Base.DualSlotInputs,
- vp_variant->vert_attrib_mask,
- _mesa_draw_nonzero_divisor_bits(ctx),
- _mesa_draw_array_bits(ctx), _mesa_draw_user_array_bits(ctx),
- &velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
+ setup_arrays<POPCNT>(st, ctx->Array._DrawVAO, vp->Base.Base.DualSlotInputs,
+ vp_variant->vert_attrib_mask,
+ _mesa_draw_nonzero_divisor_bits(ctx),
+ _mesa_draw_array_bits(ctx),
+ _mesa_draw_user_array_bits(ctx), &velements, vbuffer,
+ &num_vbuffers, &uses_user_vertex_buffers);
/* _NEW_CURRENT_ATTRIB */
/* Setup zero-stride attribs. */
- st_setup_current(st, vp, vp_variant, &velements, vbuffer, &num_vbuffers);
+ st_setup_current<POPCNT>(st, vp, vp_variant, &velements, vbuffer,
+ &num_vbuffers);
velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
@@ -323,6 +327,18 @@ st_update_array(struct st_context *st)
st->last_num_vbuffers = num_vbuffers;
}
+void
+st_update_array(struct st_context *st)
+{
+ st_update_array_templ<POPCNT_NO>(st);
+}
+
+void
+st_update_array_with_popcnt(struct st_context *st)
+{
+ st_update_array_templ<POPCNT_YES>(st);
+}
+
struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
@@ -337,8 +353,9 @@ st_create_gallium_vertex_state(struct gl_context *ctx,
struct cso_velems_state velements;
bool uses_user_vertex_buffers;
- setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0,
- &velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
+ setup_arrays<POPCNT_NO>(st, vao, dual_slot_inputs, inputs_read, 0,
+ inputs_read, 0, &velements, vbuffer, &num_vbuffers,
+ &uses_user_vertex_buffers);
if (num_vbuffers != 1 || uses_user_vertex_buffers) {
assert(!"this should never happen with display lists");
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index aac1bd6ea0c..ab2cc9642f9 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -584,6 +584,8 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
uint i;
struct st_context *st = ST_CALLOC_STRUCT( st_context);
+ util_cpu_detect();
+
st->options = *options;
ctx->st = st;
@@ -842,8 +844,6 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
!st->lower_ucp;
st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders;
- util_cpu_detect();
-
if (util_get_cpu_caps()->num_L3_caches == 1 ||
!st->pipe->set_context_param)
st->pin_thread_counter = ST_L3_PINNING_DISABLED;
More information about the mesa-commit
mailing list