[Mesa-dev] [PATCH 8/8] radeonsi: don't update dependent states if it has no effect

Marek Olšák maraeo at gmail.com
Wed Jun 7 18:30:40 UTC 2017


On Wed, Jun 7, 2017 at 2:34 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 06.06.2017 21:16, Samuel Pitoiset wrote:
>>
>> I really like the idea. :-)
>
>
> Seconded!
>
>
>> Though, I have two general comments:
>>
>> 1) I think it would be better to introduce some sort of compare helper
>> functions for the different state changes. Also, for correctness it might be
>> safer to do the opposite checks (if someone introduce a new field and forget
>> to update the relevant part).
>
>
> What do you mean by opposite checks, and how would it help?
>
> I've also been occasionally thinking about how we track the various
> dirtiness bits. It would certainly be nice if we could have some way of
> getting more confidence that no dependencies are missing, to make
> re-juggling these kinds of optimizations easier. The test coverage in
> typical test suites for these things is probably quite bad.
>
> It would be cool if we could either generate some of the dependency tracking
> automatically, or at least have a static verifier.
>
> Anyway, it's still definitely worth making the changes of this patch.
>
>
>> 2) I would suggest to introduce uses_instance_divisors in a separate
>> patch.
>
>
> Yeah, it wouldn't hurt to split this up a bit.
>
> Two more comments below.
>
>
>
>>
>> Btw, I'm not going to review patch 4 because I'm not familiar enough with
>> this part.
>>
>> On 06/05/2017 06:51 PM, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> This and the previous commit decrease IB sizes and the number of
>>> si_update_shaders invocations as follows:
>>>
>>>                   IB size   si_update_shader calls
>>> Borderlands 2      -10%            -27%
>>> Deus Ex: MD         -5%            -11%
>>> Talos Principle     -8%            -30%
>>> ---
>>>   src/gallium/drivers/radeonsi/si_state.c         | 68
>>> ++++++++++++++++++++++---
>>>   src/gallium/drivers/radeonsi/si_state.h         |  1 +
>>>   src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++--
>>>   3 files changed, 80 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>>> b/src/gallium/drivers/radeonsi/si_state.c
>>> index 5e5f564..323ec5e 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> @@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct
>>> pipe_context *ctx,
>>>   static void *si_create_blend_state(struct pipe_context *ctx,
>>>                      const struct pipe_blend_state *state)
>>>   {
>>>       return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
>>>   }
>>>   static void si_bind_blend_state(struct pipe_context *ctx, void *state)
>>>   {
>>>       struct si_context *sctx = (struct si_context *)ctx;
>>> -    si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
>>> -    si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> -    sctx->do_update_shaders = true;
>>> +    struct si_state_blend *old_blend = sctx->queued.named.blend;
>>> +    struct si_state_blend *blend = (struct si_state_blend *)state;
>>> +
>>> +    if (!state)
>>> +        return;
>
>
> Does this ever happen? If it does, shouldn't we still bind the NULL state?

radeonsi already doesn't bind null CSOs for a few other states. I
think we can only get these calls when u_blitter is invoked first in
the context and it restores NULL states at the end.


>
>
>
>>> +
>>> +    if (!old_blend ||
>>> +         old_blend->cb_target_mask != blend->cb_target_mask ||
>>> +         old_blend->dual_src_blend != blend->dual_src_blend)
>>> +        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> +
>>> +    si_pm4_bind_state(sctx, blend, state);
>>> +
>>> +    if (!old_blend ||
>>> +        old_blend->cb_target_mask != blend->cb_target_mask ||
>>> +        old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
>>> +        old_blend->alpha_to_one != blend->alpha_to_one ||
>>> +        old_blend->dual_src_blend != blend->dual_src_blend ||
>>> +        old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
>>> +        old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
>>> +        sctx->do_update_shaders = true;
>>>   }
>>>   static void si_delete_blend_state(struct pipe_context *ctx, void
>>> *state)
>>>   {
>>>       struct si_context *sctx = (struct si_context *)ctx;
>>>       si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
>>>   }
>>>   static void si_set_blend_color(struct pipe_context *ctx,
>>>                      const struct pipe_blend_color *state)
>>> @@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context
>>> *ctx, void *state)
>>>       }
>>>       sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
>>>       sctx->current_vs_state |=
>>> S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
>>>       r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable,
>>> rs->clip_halfz);
>>>       si_pm4_bind_state(sctx, rasterizer, rs);
>>>       si_update_poly_offset_state(sctx);
>>> -    si_mark_atom_dirty(sctx, &sctx->clip_regs);
>>> +    if (!old_rs ||
>>> +        old_rs->clip_plane_enable != rs->clip_plane_enable ||
>>> +        old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
>>> +        si_mark_atom_dirty(sctx, &sctx->clip_regs);
>>> +
>>>       sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
>>>           rs->line_stipple_enable;
>>> -    sctx->do_update_shaders = true;
>>> +
>>> +    if (!old_rs ||
>>> +        old_rs->clip_plane_enable != rs->clip_plane_enable ||
>>> +        old_rs->rasterizer_discard != rs->rasterizer_discard ||
>>> +        old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
>>> +        old_rs->flatshade != rs->flatshade ||
>>> +        old_rs->two_side != rs->two_side ||
>>> +        old_rs->multisample_enable != rs->multisample_enable ||
>>> +        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
>>> +        old_rs->poly_smooth != rs->poly_smooth ||
>>> +        old_rs->line_smooth != rs->line_smooth ||
>>> +        old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
>>> +        old_rs->force_persample_interp != rs->force_persample_interp)
>>> +        sctx->do_update_shaders = true;
>>>   }
>>>   static void si_delete_rs_state(struct pipe_context *ctx, void *state)
>>>   {
>>>       struct si_context *sctx = (struct si_context *)ctx;
>>>       if (sctx->queued.named.rasterizer == state)
>>>           si_pm4_bind_state(sctx, poly_offset, NULL);
>>>       si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer
>>> *)state);
>>>   }
>>> @@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct
>>> pipe_context *ctx,
>>>           si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN,
>>> fui(state->depth.bounds_min));
>>>           si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX,
>>> fui(state->depth.bounds_max));
>>>       }
>>>       return dsa;
>>>   }
>>>   static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
>>>   {
>>>           struct si_context *sctx = (struct si_context *)ctx;
>>> +    struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
>>>           struct si_state_dsa *dsa = state;
>>>           if (!state)
>>>                   return;
>>>       si_pm4_bind_state(sctx, dsa, dsa);
>>>       if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
>>>              sizeof(struct si_dsa_stencil_ref_part)) != 0) {
>>>           sctx->stencil_ref.dsa_part = dsa->stencil_ref;
>>>           si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
>>>       }
>>> -    sctx->do_update_shaders = true;
>>> +
>>> +    if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
>>> +        sctx->do_update_shaders = true;
>>>   }
>>>   static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
>>>   {
>>>       struct si_context *sctx = (struct si_context *)ctx;
>>>       si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
>>>   }
>>>   static void *si_create_db_flush_dsa(struct si_context *sctx)
>>>   {
>>> @@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct
>>> si_context *sctx)
>>>   static void si_set_framebuffer_state(struct pipe_context *ctx,
>>>                        const struct pipe_framebuffer_state *state)
>>>   {
>>>       struct si_context *sctx = (struct si_context *)ctx;
>>>       struct pipe_constant_buffer constbuf = {0};
>>>       struct r600_surface *surf = NULL;
>>>       struct r600_texture *rtex;
>>>       bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
>>>       unsigned old_nr_samples = sctx->framebuffer.nr_samples;
>>> +    unsigned old_colorbuf_enabled_4bit =
>>> sctx->framebuffer.colorbuf_enabled_4bit;
>>>       bool unbound = false;
>>>       int i;
>>>       /* Fast path for linear <-> sRGB changes and no-op changes. */
>>>       if (state->nr_cbufs &&
>>>           state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
>>>           state->zsbuf == sctx->framebuffer.state.zsbuf) {
>>>           struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
>>>           /* See if the surfaces are equivalent. */
>>> @@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct
>>> pipe_context *ctx,
>>>           surf = (struct r600_surface*)state->zsbuf;
>>>           rtex = (struct r600_texture*)surf->base.texture;
>>>           if (!surf->depth_initialized) {
>>>               si_init_depth_surface(sctx, surf);
>>>           }
>>>           r600_context_add_resource_size(ctx, surf->base.texture);
>>>       }
>>>       si_update_poly_offset_state(sctx);
>>> -    si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>>       si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
>>> +    if (old_colorbuf_enabled_4bit !=
>>> sctx->framebuffer.colorbuf_enabled_4bit)
>>> +        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>
>
> si_emit_cb_render_state uses more info from sctx->framebuffer when RB+ is
> used.

You're right. I'll fix that.

Marek


More information about the mesa-dev mailing list