[Mesa-dev] [PATCH 8/8] radeonsi: don't update dependent states if it has no effect

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Jun 6 19:16:36 UTC 2017


I really like the idea. :-)

Though, I have two general comments:

1) I think it would be better to introduce some sort of compare helper 
functions for the different state changes. Also, for correctness it 
might be safer to do the opposite checks (if someone introduce a new 
field and forget to update the relevant part).

2) I would suggest to introduce uses_instance_divisors in a separate patch.

Btw, I'm not going to review patch 4 because I'm not familiar enough 
with this part.

On 06/05/2017 06:51 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This and the previous commit decrease IB sizes and the number of
> si_update_shaders invocations as follows:
> 
>                   IB size   si_update_shader calls
> Borderlands 2      -10%            -27%
> Deus Ex: MD         -5%            -11%
> Talos Principle     -8%            -30%
> ---
>   src/gallium/drivers/radeonsi/si_state.c         | 68 ++++++++++++++++++++++---
>   src/gallium/drivers/radeonsi/si_state.h         |  1 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++--
>   3 files changed, 80 insertions(+), 13 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 5e5f564..323ec5e 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>   
>   static void *si_create_blend_state(struct pipe_context *ctx,
>   				   const struct pipe_blend_state *state)
>   {
>   	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
>   }
>   
>   static void si_bind_blend_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> -	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
> -	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> -	sctx->do_update_shaders = true;
> +	struct si_state_blend *old_blend = sctx->queued.named.blend;
> +	struct si_state_blend *blend = (struct si_state_blend *)state;
> +
> +	if (!state)
> +		return;
> +
> +	if (!old_blend ||
> +	     old_blend->cb_target_mask != blend->cb_target_mask ||
> +	     old_blend->dual_src_blend != blend->dual_src_blend)
> +		si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +
> +	si_pm4_bind_state(sctx, blend, state);
> +
> +	if (!old_blend ||
> +	    old_blend->cb_target_mask != blend->cb_target_mask ||
> +	    old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
> +	    old_blend->alpha_to_one != blend->alpha_to_one ||
> +	    old_blend->dual_src_blend != blend->dual_src_blend ||
> +	    old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
> +	    old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_blend_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
>   }
>   
>   static void si_set_blend_color(struct pipe_context *ctx,
>   			       const struct pipe_blend_color *state)
> @@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
>   	}
>   
>   	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
>   	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
>   
>   	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
>   
>   	si_pm4_bind_state(sctx, rasterizer, rs);
>   	si_update_poly_offset_state(sctx);
>   
> -	si_mark_atom_dirty(sctx, &sctx->clip_regs);
> +	if (!old_rs ||
> +	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
> +	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
> +		si_mark_atom_dirty(sctx, &sctx->clip_regs);
> +
>   	sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
>   		rs->line_stipple_enable;
> -	sctx->do_update_shaders = true;
> +
> +	if (!old_rs ||
> +	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
> +	    old_rs->rasterizer_discard != rs->rasterizer_discard ||
> +	    old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
> +	    old_rs->flatshade != rs->flatshade ||
> +	    old_rs->two_side != rs->two_side ||
> +	    old_rs->multisample_enable != rs->multisample_enable ||
> +	    old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
> +	    old_rs->poly_smooth != rs->poly_smooth ||
> +	    old_rs->line_smooth != rs->line_smooth ||
> +	    old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
> +	    old_rs->force_persample_interp != rs->force_persample_interp)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_rs_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	if (sctx->queued.named.rasterizer == state)
>   		si_pm4_bind_state(sctx, poly_offset, NULL);
>   	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
>   }
> @@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
>   		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
>   		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
>   	}
>   
>   	return dsa;
>   }
>   
>   static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
>   {
>           struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
>           struct si_state_dsa *dsa = state;
>   
>           if (!state)
>                   return;
>   
>   	si_pm4_bind_state(sctx, dsa, dsa);
>   
>   	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
>   		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
>   		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
>   		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
>   	}
> -	sctx->do_update_shaders = true;
> +
> +	if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
>   }
>   
>   static void *si_create_db_flush_dsa(struct si_context *sctx)
>   {
> @@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct si_context *sctx)
>   
>   static void si_set_framebuffer_state(struct pipe_context *ctx,
>   				     const struct pipe_framebuffer_state *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	struct pipe_constant_buffer constbuf = {0};
>   	struct r600_surface *surf = NULL;
>   	struct r600_texture *rtex;
>   	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
>   	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
> +	unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
>   	bool unbound = false;
>   	int i;
>   
>   	/* Fast path for linear <-> sRGB changes and no-op changes. */
>   	if (state->nr_cbufs &&
>   	    state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
>   	    state->zsbuf == sctx->framebuffer.state.zsbuf) {
>   		struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
>   
>   		/* See if the surfaces are equivalent. */
> @@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   		surf = (struct r600_surface*)state->zsbuf;
>   		rtex = (struct r600_texture*)surf->base.texture;
>   
>   		if (!surf->depth_initialized) {
>   			si_init_depth_surface(sctx, surf);
>   		}
>   		r600_context_add_resource_size(ctx, surf->base.texture);
>   	}
>   
>   	si_update_poly_offset_state(sctx);
> -	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>   	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
>   
> +	if (old_colorbuf_enabled_4bit != sctx->framebuffer.colorbuf_enabled_4bit)
> +		si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +
>   	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
>   		si_mark_atom_dirty(sctx, &sctx->msaa_config);
>   
>   	if (sctx->framebuffer.nr_samples != old_nr_samples) {
>   		si_mark_atom_dirty(sctx, &sctx->msaa_config);
>   		si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   
>   		/* Set sample locations as fragment shader constants. */
>   		switch (sctx->framebuffer.nr_samples) {
>   		case 1:
> @@ -3727,20 +3768,23 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>   		unsigned data_format, num_format;
>   		int first_non_void;
>   		unsigned vbo_index = elements[i].vertex_buffer_index;
>   		unsigned char swizzle[4];
>   
>   		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
>   			FREE(v);
>   			return NULL;
>   		}
>   
> +		if (elements[i].instance_divisor)
> +			v->uses_instance_divisors = true;
> +
>   		if (!used[vbo_index]) {
>   			v->first_vb_use_mask |= 1 << i;
>   			used[vbo_index] = true;
>   		}
>   
>   		desc = util_format_description(elements[i].src_format);
>   		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
>   		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
>   		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
>   		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
> @@ -3840,25 +3884,33 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>   				   S_008F0C_DATA_FORMAT(data_format);
>   	}
>   	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
>   
>   	return v;
>   }
>   
>   static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_vertex_element *old = sctx->vertex_elements;
>   	struct si_vertex_element *v = (struct si_vertex_element*)state;
>   
>   	sctx->vertex_elements = v;
>   	sctx->vertex_buffers_dirty = true;
> -	sctx->do_update_shaders = true;
> +
> +	if (v &&
> +	    (!old ||
> +	     old->count != v->count ||
> +	     old->uses_instance_divisors != v->uses_instance_divisors ||
> +	     v->uses_instance_divisors || /* we don't check which divisors changed */
> +	     memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	if (sctx->vertex_elements == state)
>   		sctx->vertex_elements = NULL;
>   	FREE(state);
>   }
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 275f830..4da51be 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -102,20 +102,21 @@ struct si_vertex_element
>   {
>   	unsigned			count;
>   	unsigned			first_vb_use_mask;
>   	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
>   	unsigned			desc_list_byte_size;
>   
>   	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
>   	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
>   	uint32_t			format_size[SI_MAX_ATTRIBS];
>   	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
> +	bool				uses_instance_divisors;
>   };
>   
>   union si_state {
>   	struct {
>   		struct si_state_blend		*blend;
>   		struct si_state_rasterizer	*rasterizer;
>   		struct si_state_dsa		*dsa;
>   		struct si_pm4_state		*poly_offset;
>   		struct si_pm4_state		*ls;
>   		struct si_pm4_state		*hs;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index c21f855..677a6de 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
>   	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
>   	si_set_active_descriptors_for_shader(sctx, sel);
>   	si_update_streamout_state(sctx);
>   	si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
>   			    si_get_vs(sctx)->cso, si_get_vs_state(sctx));
>   }
>   
>   static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_shader_selector *old_sel = sctx->ps_shader.cso;
>   	struct si_shader_selector *sel = state;
>   
>   	/* skip if supplied shader is one already in use */
> -	if (sctx->ps_shader.cso == sel)
> +	if (old_sel == sel)
>   		return;
>   
>   	sctx->ps_shader.cso = sel;
>   	sctx->ps_shader.current = sel ? sel->first_variant : NULL;
>   	sctx->do_update_shaders = true;
> -	if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
> -		si_update_tess_uses_prim_id(sctx);
> -	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +
> +	if (sel) {
> +		if (sctx->ia_multi_vgt_param_key.u.uses_tess)
> +			si_update_tess_uses_prim_id(sctx);
> +
> +		if (!old_sel ||
> +		    old_sel->info.colors_written != sel->info.colors_written)
> +			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +	}
>   	si_set_active_descriptors_for_shader(sctx, sel);
>   }
>   
>   static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
>   {
>   	if (shader->is_optimized) {
>   		util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
>   				    &shader->optimized_ready);
>   		util_queue_fence_destroy(&shader->optimized_ready);
>   	}
> @@ -3081,20 +3088,23 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
>   	si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
>   }
>   
>   bool si_update_shaders(struct si_context *sctx)
>   {
>   	struct pipe_context *ctx = (struct pipe_context*)sctx;
>   	struct si_compiler_ctx_state compiler_state;
>   	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>   	struct si_shader *old_vs = si_get_vs_state(sctx);
>   	bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
> +	struct si_shader *old_ps = sctx->ps_shader.current;
> +	unsigned old_spi_shader_col_format =
> +		old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
>   	int r;
>   
>   	compiler_state.tm = sctx->tm;
>   	compiler_state.debug = sctx->b.debug;
>   	compiler_state.is_debug_context = sctx->is_debug;
>   
>   	/* Update stages before GS. */
>   	if (sctx->tes_shader.cso) {
>   		if (!sctx->tf_ring) {
>   			si_init_tess_factor_ring(sctx);
> @@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx)
>   			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
>   
>   		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
>   		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
>   		    sctx->flatshade != rs->flatshade) {
>   			sctx->sprite_coord_enable = rs->sprite_coord_enable;
>   			sctx->flatshade = rs->flatshade;
>   			si_mark_atom_dirty(sctx, &sctx->spi_map);
>   		}
>   
> -		if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
> +		if (sctx->screen->b.rbplus_allowed &&
> +		    si_pm4_state_changed(sctx, ps) &&
> +		    (!old_ps ||
> +		     old_spi_shader_col_format !=
> +		     sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
>   			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>   
>   		if (sctx->ps_db_shader_control != db_shader_control) {
>   			sctx->ps_db_shader_control = db_shader_control;
>   			si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   		}
>   
>   		if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
>   			sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
>   			si_mark_atom_dirty(sctx, &sctx->msaa_config);
> 


More information about the mesa-dev mailing list