[Mesa-dev] [PATCH 3/9] st/mesa: completely rewrite state atoms
Rob Clark
robdclark at gmail.com
Mon Jul 25 15:42:01 UTC 2016
On Mon, Jul 25, 2016 at 11:16 AM, Brian Paul <brianp at vmware.com> wrote:
> On 07/18/2016 07:11 AM, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> The goal is to do this in st_validate_state:
>> while (dirty)
>> atoms[u_bit_scan(&dirty)]->update(st);
>>
>> That implies that atoms can't specify which flags they consume.
>> There is exactly one ST_NEW_* flag for each atom. (58 flags in total)
>>
>> There are macros that combine multiple flags into one for easier use.
>>
>> All _NEW_* flags are translated into ST_NEW_* flags in
>> st_invalidate_state.
>> st/mesa doesn't keep the _NEW_* flags after that.
>>
>> torcs is 2% faster between the previous patch and the end of this series.
>> ---
>> src/mesa/state_tracker/st_atom.c | 153 +++++-------------
>> src/mesa/state_tracker/st_atom.h | 210
>> +++++++++++++++++--------
>> src/mesa/state_tracker/st_atom_array.c | 4 -
>> src/mesa/state_tracker/st_atom_atomicbuf.c | 24 ---
>> src/mesa/state_tracker/st_atom_blend.c | 4 -
>> src/mesa/state_tracker/st_atom_clip.c | 4 -
>> src/mesa/state_tracker/st_atom_constbuf.c | 48 ------
>> src/mesa/state_tracker/st_atom_depth.c | 4 -
>> src/mesa/state_tracker/st_atom_framebuffer.c | 4 -
>> src/mesa/state_tracker/st_atom_image.c | 24 ---
>> src/mesa/state_tracker/st_atom_list.h | 75 +++++++++
>> src/mesa/state_tracker/st_atom_msaa.c | 8 -
>> src/mesa/state_tracker/st_atom_pixeltransfer.c | 4 -
>> src/mesa/state_tracker/st_atom_rasterizer.c | 16 --
>> src/mesa/state_tracker/st_atom_sampler.c | 4 -
>> src/mesa/state_tracker/st_atom_scissor.c | 8 -
>> src/mesa/state_tracker/st_atom_shader.c | 24 ---
>> src/mesa/state_tracker/st_atom_stipple.c | 5 -
>> src/mesa/state_tracker/st_atom_storagebuf.c | 24 ---
>> src/mesa/state_tracker/st_atom_tess.c | 4 -
>> src/mesa/state_tracker/st_atom_texture.c | 24 ---
>> src/mesa/state_tracker/st_atom_viewport.c | 4 -
>> src/mesa/state_tracker/st_cb_bitmap.c | 10 +-
>> src/mesa/state_tracker/st_cb_bufferobjects.c | 10 +-
>> src/mesa/state_tracker/st_cb_compute.c | 2 +-
>> src/mesa/state_tracker/st_cb_feedback.c | 2 +-
>> src/mesa/state_tracker/st_cb_program.c | 38 ++---
>> src/mesa/state_tracker/st_cb_texture.c | 2 +-
>> src/mesa/state_tracker/st_context.c | 100 ++++++++++--
>> src/mesa/state_tracker/st_context.h | 42 +----
>> src/mesa/state_tracker/st_draw.c | 4 +-
>> src/mesa/state_tracker/st_manager.c | 4 +-
>> 32 files changed, 377 insertions(+), 516 deletions(-)
>> create mode 100644 src/mesa/state_tracker/st_atom_list.h
>>
>> diff --git a/src/mesa/state_tracker/st_atom.c
>> b/src/mesa/state_tracker/st_atom.c
>> index 9d5cc0f..5843d2a 100644
>> --- a/src/mesa/state_tracker/st_atom.c
>> +++ b/src/mesa/state_tracker/st_atom.c
>> @@ -37,87 +37,18 @@
>> #include "st_manager.h"
>>
>>
>> -/**
>> - * This is used to initialize st->render_atoms[].
>> - */
>> -static const struct st_tracked_state *render_atoms[] =
>> -{
>> - &st_update_depth_stencil_alpha,
>> - &st_update_clip,
>> -
>> - &st_update_fp,
>> - &st_update_gp,
>> - &st_update_tep,
>> - &st_update_tcp,
>> - &st_update_vp,
>> -
>> - &st_update_rasterizer,
>> - &st_update_polygon_stipple,
>> - &st_update_viewport,
>> - &st_update_scissor,
>> - &st_update_window_rectangles,
>> - &st_update_blend,
>> - &st_update_vertex_texture,
>> - &st_update_fragment_texture,
>> - &st_update_geometry_texture,
>> - &st_update_tessctrl_texture,
>> - &st_update_tesseval_texture,
>> - &st_update_sampler, /* depends on update_*_texture for swizzle */
>> - &st_bind_vs_images,
>> - &st_bind_tcs_images,
>> - &st_bind_tes_images,
>> - &st_bind_gs_images,
>> - &st_bind_fs_images,
>> - &st_update_framebuffer, /* depends on update_*_texture and
>> bind_*_images */
>> - &st_update_msaa,
>> - &st_update_sample_shading,
>> - &st_update_vs_constants,
>> - &st_update_tcs_constants,
>> - &st_update_tes_constants,
>> - &st_update_gs_constants,
>> - &st_update_fs_constants,
>> - &st_bind_vs_ubos,
>> - &st_bind_tcs_ubos,
>> - &st_bind_tes_ubos,
>> - &st_bind_fs_ubos,
>> - &st_bind_gs_ubos,
>> - &st_bind_vs_atomics,
>> - &st_bind_tcs_atomics,
>> - &st_bind_tes_atomics,
>> - &st_bind_fs_atomics,
>> - &st_bind_gs_atomics,
>> - &st_bind_vs_ssbos,
>> - &st_bind_tcs_ssbos,
>> - &st_bind_tes_ssbos,
>> - &st_bind_fs_ssbos,
>> - &st_bind_gs_ssbos,
>> - &st_update_pixel_transfer,
>> - &st_update_tess,
>> -
>> - /* this must be done after the vertex program update */
>> - &st_update_array
>> -};
>> -
>> -
>> -/**
>> - * This is used to initialize st->compute_atoms[].
>> - */
>> -static const struct st_tracked_state *compute_atoms[] =
>> +/* The list state update functions. */
>> +static const struct st_tracked_state *atoms[] =
>> {
>> - &st_update_cp,
>> - &st_update_compute_texture,
>> - &st_update_sampler, /* depends on update_compute_texture for swizzle
>> */
>> - &st_update_cs_constants,
>> - &st_bind_cs_ubos,
>> - &st_bind_cs_atomics,
>> - &st_bind_cs_ssbos,
>> - &st_bind_cs_images,
>> +#define ST_STATE(FLAG, st_update) &st_update,
>> +#include "st_atom_list.h"
>> +#undef ST_STATE
>> };
>>
>>
>> void st_init_atoms( struct st_context *st )
>> {
>> - /* no-op */
>> + STATIC_ASSERT(ARRAY_SIZE(atoms) <= 64);
>> }
>>
>>
>> @@ -127,13 +58,6 @@ void st_destroy_atoms( struct st_context *st )
>> }
>>
>>
>> -
>> -static bool
>> -check_state(const struct st_state_flags *a, const struct st_state_flags
>> *b)
>> -{
>> - return (a->mesa & b->mesa) || (a->st & b->st);
>> -}
>> -
>> /* Too complex to figure out, just check every time:
>> */
>> static void check_program_state( struct st_context *st )
>> @@ -141,13 +65,13 @@ static void check_program_state( struct st_context
>> *st )
>> struct gl_context *ctx = st->ctx;
>>
>> if (ctx->VertexProgram._Current != &st->vp->Base)
>> - st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
>> + st->dirty |= ST_NEW_VERTEX_PROGRAM;
>>
>> if (ctx->FragmentProgram._Current != &st->fp->Base)
>> - st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
>> + st->dirty |= ST_NEW_FRAGMENT_PROGRAM;
>>
>> if (ctx->GeometryProgram._Current != &st->gp->Base)
>> - st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
>> + st->dirty |= ST_NEW_GEOMETRY_PROGRAM;
>> }
>>
>> static void check_attrib_edgeflag(struct st_context *st)
>> @@ -165,14 +89,14 @@ static void check_attrib_edgeflag(struct st_context
>> *st)
>> arrays[VERT_ATTRIB_EDGEFLAG]->StrideB != 0;
>> if (vertdata_edgeflags != st->vertdata_edgeflags) {
>> st->vertdata_edgeflags = vertdata_edgeflags;
>> - st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
>> + st->dirty |= ST_NEW_VERTEX_PROGRAM;
>> }
>>
>> edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
>>
>> !st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
>> if (edgeflag_culls_prims != st->edgeflag_culls_prims) {
>> st->edgeflag_culls_prims = edgeflag_culls_prims;
>> - st->dirty.st |= ST_NEW_RASTERIZER;
>> + st->dirty |= ST_NEW_RASTERIZER;
>> }
>> }
>>
>> @@ -183,49 +107,42 @@ static void check_attrib_edgeflag(struct st_context
>> *st)
>>
>> void st_validate_state( struct st_context *st, enum st_pipeline pipeline
>> )
>> {
>> - const struct st_tracked_state **atoms;
>> - struct st_state_flags *state;
>> - GLuint num_atoms;
>> - GLuint i;
>> + uint64_t dirty, pipeline_mask;
>> + uint32_t dirty_lo, dirty_hi;
>> +
>> + /* Get Mesa driver state. */
>> + st->dirty |= st->ctx->NewDriverState & ST_ALL_STATES_MASK;
>> + st->ctx->NewDriverState = 0;
>>
>> /* Get pipeline state. */
>> switch (pipeline) {
>> - case ST_PIPELINE_RENDER:
>> - atoms = render_atoms;
>> - num_atoms = ARRAY_SIZE(render_atoms);
>> - state = &st->dirty;
>> + case ST_PIPELINE_RENDER:
>> + check_attrib_edgeflag(st);
>> + check_program_state(st);
>> + st_manager_validate_framebuffers(st);
>> +
>> + pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK;
>> break;
>> case ST_PIPELINE_COMPUTE:
>> - atoms = compute_atoms;
>> - num_atoms = ARRAY_SIZE(compute_atoms);
>> - state = &st->dirty_cp;
>> + pipeline_mask = ST_PIPELINE_COMPUTE_STATE_MASK;
>> break;
>> default:
>> unreachable("Invalid pipeline specified");
>> }
>>
>> - /* Get Mesa driver state. */
>> - st->dirty.st |= st->ctx->NewDriverState;
>> - st->dirty_cp.st |= st->ctx->NewDriverState;
>> - st->ctx->NewDriverState = 0;
>> -
>> - if (pipeline == ST_PIPELINE_RENDER) {
>> - check_attrib_edgeflag(st);
>> -
>> - check_program_state(st);
>> -
>> - st_manager_validate_framebuffers(st);
>> - }
>> -
>> - if (state->st == 0 && state->mesa == 0)
>> + dirty = st->dirty & pipeline_mask;
>> + if (!dirty)
>> return;
>>
>> - /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/
>> + dirty_lo = dirty;
>> + dirty_hi = dirty >> 32;
>>
>> - for (i = 0; i < num_atoms; i++) {
>> - if (check_state(state, &atoms[i]->dirty))
>> - atoms[i]->update( st );
>> - }
>> + /* Update states. */
>> + while (dirty_lo)
>> + atoms[u_bit_scan(&dirty_lo)]->update(st);
>> + while (dirty_hi)
>> + atoms[32 + u_bit_scan(&dirty_hi)]->update(st);
>>
>
> Could we just use the u_bit_scan64() function and void the hi/lo split?
fwiw, we actually did discuss that on irc, but I guess no one
summarized on email thread..
Marek's concern was that would generate worse code on 32b since it
would pull the right-shift into the loop.
I'm not entirely sure if gcc would be clever enough in this case or
not. I guess someone needs to compare generated asm in both cases.
And either use u_bit_scan64() if the compiler is clever enough, or add
a comment explaining the reason.
BR,
-R
> Otherwise, the series looks good to me. Nice clean-ups!
>
> -Brian
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list