[Mesa-dev] [PATCH 8/8] radeonsi: don't update dependent states if it has no effect
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed Jun 7 18:57:42 UTC 2017
On 06/07/2017 02:34 PM, Nicolai Hähnle wrote:
> On 06.06.2017 21:16, Samuel Pitoiset wrote:
>> I really like the idea. :-)
>
> Seconded!
>
>
>> Though, I have two general comments:
>>
>> 1) I think it would be better to introduce some sort of compare helper
>> functions for the different state changes. Also, for correctness it
>> might be safer to do the opposite checks (if someone introduce a new
>> field and forget to update the relevant part).
>
> What do you mean by opposite checks, and how would it help?
It won't help, ignore this comment.
Though, it would be nice to have something which checks if a new field
is missed somewhere.
>
> I've also been occasionally thinking about how we track the various
> dirtiness bits. It would certainly be nice if we could have some way of
> getting more confidence that no dependencies are missing, to make
> re-juggling these kinds of optimizations easier. The test coverage in
> typical test suites for these things is probably quite bad.
>
> It would be cool if we could either generate some of the dependency
> tracking automatically, or at least have a static verifier.
>
> Anyway, it's still definitely worth making the changes of this patch.
>
>
>> 2) I would suggest to introduce uses_instance_divisors in a separate
>> patch.
>
> Yeah, it wouldn't hurt to split this up a bit.
>
> Two more comments below.
>
>
>>
>> Btw, I'm not going to review patch 4 because I'm not familiar enough
>> with this part.
>>
>> On 06/05/2017 06:51 PM, Marek Olšák wrote:
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> This and the previous commit decrease IB sizes and the number of
>>> si_update_shaders invocations as follows:
>>>
>>> IB size si_update_shader calls
>>> Borderlands 2 -10% -27%
>>> Deus Ex: MD -5% -11%
>>> Talos Principle -8% -30%
>>> ---
>>> src/gallium/drivers/radeonsi/si_state.c | 68
>>> ++++++++++++++++++++++---
>>> src/gallium/drivers/radeonsi/si_state.h | 1 +
>>> src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++--
>>> 3 files changed, 80 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>>> b/src/gallium/drivers/radeonsi/si_state.c
>>> index 5e5f564..323ec5e 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>>> @@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct
>>> pipe_context *ctx,
>>> static void *si_create_blend_state(struct pipe_context *ctx,
>>> const struct pipe_blend_state *state)
>>> {
>>> return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
>>> }
>>> static void si_bind_blend_state(struct pipe_context *ctx, void *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> - si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
>>> - si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> - sctx->do_update_shaders = true;
>>> + struct si_state_blend *old_blend = sctx->queued.named.blend;
>>> + struct si_state_blend *blend = (struct si_state_blend *)state;
>>> +
>>> + if (!state)
>>> + return;
>
> Does this ever happen? If it does, shouldn't we still bind the NULL state?
>
>
>>> +
>>> + if (!old_blend ||
>>> + old_blend->cb_target_mask != blend->cb_target_mask ||
>>> + old_blend->dual_src_blend != blend->dual_src_blend)
>>> + si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> +
>>> + si_pm4_bind_state(sctx, blend, state);
>>> +
>>> + if (!old_blend ||
>>> + old_blend->cb_target_mask != blend->cb_target_mask ||
>>> + old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
>>> + old_blend->alpha_to_one != blend->alpha_to_one ||
>>> + old_blend->dual_src_blend != blend->dual_src_blend ||
>>> + old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
>>> + old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
>>> + sctx->do_update_shaders = true;
>>> }
>>> static void si_delete_blend_state(struct pipe_context *ctx, void
>>> *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
>>> }
>>> static void si_set_blend_color(struct pipe_context *ctx,
>>> const struct pipe_blend_color *state)
>>> @@ -914,24 +932,41 @@ static void si_bind_rs_state(struct
>>> pipe_context *ctx, void *state)
>>> }
>>> sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
>>> sctx->current_vs_state |=
>>> S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
>>> r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable,
>>> rs->clip_halfz);
>>> si_pm4_bind_state(sctx, rasterizer, rs);
>>> si_update_poly_offset_state(sctx);
>>> - si_mark_atom_dirty(sctx, &sctx->clip_regs);
>>> + if (!old_rs ||
>>> + old_rs->clip_plane_enable != rs->clip_plane_enable ||
>>> + old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
>>> + si_mark_atom_dirty(sctx, &sctx->clip_regs);
>>> +
>>> sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
>>> rs->line_stipple_enable;
>>> - sctx->do_update_shaders = true;
>>> +
>>> + if (!old_rs ||
>>> + old_rs->clip_plane_enable != rs->clip_plane_enable ||
>>> + old_rs->rasterizer_discard != rs->rasterizer_discard ||
>>> + old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
>>> + old_rs->flatshade != rs->flatshade ||
>>> + old_rs->two_side != rs->two_side ||
>>> + old_rs->multisample_enable != rs->multisample_enable ||
>>> + old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
>>> + old_rs->poly_smooth != rs->poly_smooth ||
>>> + old_rs->line_smooth != rs->line_smooth ||
>>> + old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
>>> + old_rs->force_persample_interp != rs->force_persample_interp)
>>> + sctx->do_update_shaders = true;
>>> }
>>> static void si_delete_rs_state(struct pipe_context *ctx, void *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> if (sctx->queued.named.rasterizer == state)
>>> si_pm4_bind_state(sctx, poly_offset, NULL);
>>> si_pm4_delete_state(sctx, rasterizer, (struct
>>> si_state_rasterizer *)state);
>>> }
>>> @@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct
>>> pipe_context *ctx,
>>> si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN,
>>> fui(state->depth.bounds_min));
>>> si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX,
>>> fui(state->depth.bounds_max));
>>> }
>>> return dsa;
>>> }
>>> static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> + struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
>>> struct si_state_dsa *dsa = state;
>>> if (!state)
>>> return;
>>> si_pm4_bind_state(sctx, dsa, dsa);
>>> if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
>>> sizeof(struct si_dsa_stencil_ref_part)) != 0) {
>>> sctx->stencil_ref.dsa_part = dsa->stencil_ref;
>>> si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
>>> }
>>> - sctx->do_update_shaders = true;
>>> +
>>> + if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
>>> + sctx->do_update_shaders = true;
>>> }
>>> static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
>>> }
>>> static void *si_create_db_flush_dsa(struct si_context *sctx)
>>> {
>>> @@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct
>>> si_context *sctx)
>>> static void si_set_framebuffer_state(struct pipe_context *ctx,
>>> const struct pipe_framebuffer_state *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> struct pipe_constant_buffer constbuf = {0};
>>> struct r600_surface *surf = NULL;
>>> struct r600_texture *rtex;
>>> bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
>>> unsigned old_nr_samples = sctx->framebuffer.nr_samples;
>>> + unsigned old_colorbuf_enabled_4bit =
>>> sctx->framebuffer.colorbuf_enabled_4bit;
>>> bool unbound = false;
>>> int i;
>>> /* Fast path for linear <-> sRGB changes and no-op changes. */
>>> if (state->nr_cbufs &&
>>> state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
>>> state->zsbuf == sctx->framebuffer.state.zsbuf) {
>>> struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
>>> /* See if the surfaces are equivalent. */
>>> @@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct
>>> pipe_context *ctx,
>>> surf = (struct r600_surface*)state->zsbuf;
>>> rtex = (struct r600_texture*)surf->base.texture;
>>> if (!surf->depth_initialized) {
>>> si_init_depth_surface(sctx, surf);
>>> }
>>> r600_context_add_resource_size(ctx, surf->base.texture);
>>> }
>>> si_update_poly_offset_state(sctx);
>>> - si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
>>> + if (old_colorbuf_enabled_4bit !=
>>> sctx->framebuffer.colorbuf_enabled_4bit)
>>> + si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>
> si_emit_cb_render_state uses more info from sctx->framebuffer when RB+
> is used.
>
> Cheers,
> Nicolai
>
>
>>> +
>>> if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
>>> si_mark_atom_dirty(sctx, &sctx->msaa_config);
>>> if (sctx->framebuffer.nr_samples != old_nr_samples) {
>>> si_mark_atom_dirty(sctx, &sctx->msaa_config);
>>> si_mark_atom_dirty(sctx, &sctx->db_render_state);
>>> /* Set sample locations as fragment shader constants. */
>>> switch (sctx->framebuffer.nr_samples) {
>>> case 1:
>>> @@ -3727,20 +3768,23 @@ static void *si_create_vertex_elements(struct
>>> pipe_context *ctx,
>>> unsigned data_format, num_format;
>>> int first_non_void;
>>> unsigned vbo_index = elements[i].vertex_buffer_index;
>>> unsigned char swizzle[4];
>>> if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
>>> FREE(v);
>>> return NULL;
>>> }
>>> + if (elements[i].instance_divisor)
>>> + v->uses_instance_divisors = true;
>>> +
>>> if (!used[vbo_index]) {
>>> v->first_vb_use_mask |= 1 << i;
>>> used[vbo_index] = true;
>>> }
>>> desc = util_format_description(elements[i].src_format);
>>> first_non_void =
>>> util_format_get_first_non_void_channel(elements[i].src_format);
>>> data_format = si_translate_buffer_dataformat(ctx->screen,
>>> desc, first_non_void);
>>> num_format = si_translate_buffer_numformat(ctx->screen,
>>> desc, first_non_void);
>>> channel = first_non_void >= 0 ?
>>> &desc->channel[first_non_void] : NULL;
>>> @@ -3840,25 +3884,33 @@ static void *si_create_vertex_elements(struct
>>> pipe_context *ctx,
>>> S_008F0C_DATA_FORMAT(data_format);
>>> }
>>> memcpy(v->elements, elements, sizeof(struct
>>> pipe_vertex_element) * count);
>>> return v;
>>> }
>>> static void si_bind_vertex_elements(struct pipe_context *ctx, void
>>> *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> + struct si_vertex_element *old = sctx->vertex_elements;
>>> struct si_vertex_element *v = (struct si_vertex_element*)state;
>>> sctx->vertex_elements = v;
>>> sctx->vertex_buffers_dirty = true;
>>> - sctx->do_update_shaders = true;
>>> +
>>> + if (v &&
>>> + (!old ||
>>> + old->count != v->count ||
>>> + old->uses_instance_divisors != v->uses_instance_divisors ||
>>> + v->uses_instance_divisors || /* we don't check which
>>> divisors changed */
>>> + memcmp(old->fix_fetch, v->fix_fetch,
>>> sizeof(v->fix_fetch[0]) * v->count)))
>>> + sctx->do_update_shaders = true;
>>> }
>>> static void si_delete_vertex_element(struct pipe_context *ctx, void
>>> *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> if (sctx->vertex_elements == state)
>>> sctx->vertex_elements = NULL;
>>> FREE(state);
>>> }
>>> diff --git a/src/gallium/drivers/radeonsi/si_state.h
>>> b/src/gallium/drivers/radeonsi/si_state.h
>>> index 275f830..4da51be 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state.h
>>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>>> @@ -102,20 +102,21 @@ struct si_vertex_element
>>> {
>>> unsigned count;
>>> unsigned first_vb_use_mask;
>>> /* Vertex buffer descriptor list size aligned for optimal
>>> prefetch. */
>>> unsigned desc_list_byte_size;
>>> uint8_t fix_fetch[SI_MAX_ATTRIBS];
>>> uint32_t rsrc_word3[SI_MAX_ATTRIBS];
>>> uint32_t format_size[SI_MAX_ATTRIBS];
>>> struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
>>> + bool uses_instance_divisors;
>>> };
>>> union si_state {
>>> struct {
>>> struct si_state_blend *blend;
>>> struct si_state_rasterizer *rasterizer;
>>> struct si_state_dsa *dsa;
>>> struct si_pm4_state *poly_offset;
>>> struct si_pm4_state *ls;
>>> struct si_pm4_state *hs;
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> index c21f855..677a6de 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> @@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct
>>> pipe_context *ctx, void *state)
>>> r600_update_vs_writes_viewport_index(&sctx->b,
>>> si_get_vs_info(sctx));
>>> si_set_active_descriptors_for_shader(sctx, sel);
>>> si_update_streamout_state(sctx);
>>> si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
>>> si_get_vs(sctx)->cso, si_get_vs_state(sctx));
>>> }
>>> static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
>>> {
>>> struct si_context *sctx = (struct si_context *)ctx;
>>> + struct si_shader_selector *old_sel = sctx->ps_shader.cso;
>>> struct si_shader_selector *sel = state;
>>> /* skip if supplied shader is one already in use */
>>> - if (sctx->ps_shader.cso == sel)
>>> + if (old_sel == sel)
>>> return;
>>> sctx->ps_shader.cso = sel;
>>> sctx->ps_shader.current = sel ? sel->first_variant : NULL;
>>> sctx->do_update_shaders = true;
>>> - if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
>>> - si_update_tess_uses_prim_id(sctx);
>>> - si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> +
>>> + if (sel) {
>>> + if (sctx->ia_multi_vgt_param_key.u.uses_tess)
>>> + si_update_tess_uses_prim_id(sctx);
>>> +
>>> + if (!old_sel ||
>>> + old_sel->info.colors_written != sel->info.colors_written)
>>> + si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> + }
>>> si_set_active_descriptors_for_shader(sctx, sel);
>>> }
>>> static void si_delete_shader(struct si_context *sctx, struct
>>> si_shader *shader)
>>> {
>>> if (shader->is_optimized) {
>>> util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
>>> &shader->optimized_ready);
>>> util_queue_fence_destroy(&shader->optimized_ready);
>>> }
>>> @@ -3081,20 +3088,23 @@ static void
>>> si_update_vgt_shader_config(struct si_context *sctx)
>>> si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
>>> }
>>> bool si_update_shaders(struct si_context *sctx)
>>> {
>>> struct pipe_context *ctx = (struct pipe_context*)sctx;
>>> struct si_compiler_ctx_state compiler_state;
>>> struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>>> struct si_shader *old_vs = si_get_vs_state(sctx);
>>> bool old_clip_disable = old_vs ?
>>> old_vs->key.opt.hw_vs.clip_disable : false;
>>> + struct si_shader *old_ps = sctx->ps_shader.current;
>>> + unsigned old_spi_shader_col_format =
>>> + old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
>>> int r;
>>> compiler_state.tm = sctx->tm;
>>> compiler_state.debug = sctx->b.debug;
>>> compiler_state.is_debug_context = sctx->is_debug;
>>> /* Update stages before GS. */
>>> if (sctx->tes_shader.cso) {
>>> if (!sctx->tf_ring) {
>>> si_init_tess_factor_ring(sctx);
>>> @@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx)
>>> S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) !=
>>> PIPE_FUNC_ALWAYS);
>>> if (si_pm4_state_changed(sctx, ps) ||
>>> si_pm4_state_changed(sctx, vs) ||
>>> sctx->sprite_coord_enable != rs->sprite_coord_enable ||
>>> sctx->flatshade != rs->flatshade) {
>>> sctx->sprite_coord_enable = rs->sprite_coord_enable;
>>> sctx->flatshade = rs->flatshade;
>>> si_mark_atom_dirty(sctx, &sctx->spi_map);
>>> }
>>> - if (sctx->screen->b.rbplus_allowed &&
>>> si_pm4_state_changed(sctx, ps))
>>> + if (sctx->screen->b.rbplus_allowed &&
>>> + si_pm4_state_changed(sctx, ps) &&
>>> + (!old_ps ||
>>> + old_spi_shader_col_format !=
>>> + sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
>>> si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>>> if (sctx->ps_db_shader_control != db_shader_control) {
>>> sctx->ps_db_shader_control = db_shader_control;
>>> si_mark_atom_dirty(sctx, &sctx->db_render_state);
>>> }
>>> if (sctx->smoothing_enabled !=
>>> sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
>>> sctx->smoothing_enabled =
>>> sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
>>> si_mark_atom_dirty(sctx, &sctx->msaa_config);
>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
More information about the mesa-dev
mailing list