[Mesa-dev] [PATCH 1/7] radeonsi: don't update dependent states if it has no effect (v2)

Nicolai Hähnle nhaehnle at gmail.com
Thu Jun 8 07:46:20 UTC 2017


On 07.06.2017 21:50, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This and the previous commit decrease IB sizes and the number of
> si_update_shaders invocations as follows:

Referencing the previous commit makes no sense anymore if it's the first 
in the series. Apart from that, this patch is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


> 
>                   IB size   si_update_shaders calls
> Borderlands 2      -10%            -27%
> Deus Ex: MD         -5%            -11%
> Talos Principle     -8%            -30%
> 
> v2: always dirty cb_render_state in set_framebuffer_state
> ---
>   src/gallium/drivers/radeonsi/si_state.c         | 63 ++++++++++++++++++++++---
>   src/gallium/drivers/radeonsi/si_state.h         |  1 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++++++++--
>   3 files changed, 76 insertions(+), 12 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 3f471a3..c7bc7b0 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>   
>   static void *si_create_blend_state(struct pipe_context *ctx,
>   				   const struct pipe_blend_state *state)
>   {
>   	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
>   }
>   
>   static void si_bind_blend_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> -	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
> -	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> -	sctx->do_update_shaders = true;
> +	struct si_state_blend *old_blend = sctx->queued.named.blend;
> +	struct si_state_blend *blend = (struct si_state_blend *)state;
> +
> +	if (!state)
> +		return;
> +
> +	if (!old_blend ||
> +	     old_blend->cb_target_mask != blend->cb_target_mask ||
> +	     old_blend->dual_src_blend != blend->dual_src_blend)
> +		si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +
> +	si_pm4_bind_state(sctx, blend, state);
> +
> +	if (!old_blend ||
> +	    old_blend->cb_target_mask != blend->cb_target_mask ||
> +	    old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
> +	    old_blend->alpha_to_one != blend->alpha_to_one ||
> +	    old_blend->dual_src_blend != blend->dual_src_blend ||
> +	    old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
> +	    old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_blend_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
>   }
>   
>   static void si_set_blend_color(struct pipe_context *ctx,
>   			       const struct pipe_blend_color *state)
> @@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
>   	}
>   
>   	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
>   	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
>   
>   	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
>   
>   	si_pm4_bind_state(sctx, rasterizer, rs);
>   	si_update_poly_offset_state(sctx);
>   
> -	si_mark_atom_dirty(sctx, &sctx->clip_regs);
> +	if (!old_rs ||
> +	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
> +	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
> +		si_mark_atom_dirty(sctx, &sctx->clip_regs);
> +
>   	sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
>   		rs->line_stipple_enable;
> -	sctx->do_update_shaders = true;
> +
> +	if (!old_rs ||
> +	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
> +	    old_rs->rasterizer_discard != rs->rasterizer_discard ||
> +	    old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
> +	    old_rs->flatshade != rs->flatshade ||
> +	    old_rs->two_side != rs->two_side ||
> +	    old_rs->multisample_enable != rs->multisample_enable ||
> +	    old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
> +	    old_rs->poly_smooth != rs->poly_smooth ||
> +	    old_rs->line_smooth != rs->line_smooth ||
> +	    old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
> +	    old_rs->force_persample_interp != rs->force_persample_interp)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_rs_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	if (sctx->queued.named.rasterizer == state)
>   		si_pm4_bind_state(sctx, poly_offset, NULL);
>   	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
>   }
> @@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
>   		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
>   		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
>   	}
>   
>   	return dsa;
>   }
>   
>   static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
>   {
>           struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
>           struct si_state_dsa *dsa = state;
>   
>           if (!state)
>                   return;
>   
>   	si_pm4_bind_state(sctx, dsa, dsa);
>   
>   	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
>   		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
>   		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
>   		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
>   	}
> -	sctx->do_update_shaders = true;
> +
> +	if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
>   }
>   
>   static void *si_create_db_flush_dsa(struct si_context *sctx)
>   {
> @@ -3686,20 +3724,23 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>   		unsigned data_format, num_format;
>   		int first_non_void;
>   		unsigned vbo_index = elements[i].vertex_buffer_index;
>   		unsigned char swizzle[4];
>   
>   		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
>   			FREE(v);
>   			return NULL;
>   		}
>   
> +		if (elements[i].instance_divisor)
> +			v->uses_instance_divisors = true;
> +
>   		if (!used[vbo_index]) {
>   			v->first_vb_use_mask |= 1 << i;
>   			used[vbo_index] = true;
>   		}
>   
>   		desc = util_format_description(elements[i].src_format);
>   		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
>   		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
>   		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
>   		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
> @@ -3799,25 +3840,33 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>   				   S_008F0C_DATA_FORMAT(data_format);
>   	}
>   	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
>   
>   	return v;
>   }
>   
>   static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_vertex_element *old = sctx->vertex_elements;
>   	struct si_vertex_element *v = (struct si_vertex_element*)state;
>   
>   	sctx->vertex_elements = v;
>   	sctx->vertex_buffers_dirty = true;
> -	sctx->do_update_shaders = true;
> +
> +	if (v &&
> +	    (!old ||
> +	     old->count != v->count ||
> +	     old->uses_instance_divisors != v->uses_instance_divisors ||
> +	     v->uses_instance_divisors || /* we don't check which divisors changed */
> +	     memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
> +		sctx->do_update_shaders = true;
>   }
>   
>   static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
>   
>   	if (sctx->vertex_elements == state)
>   		sctx->vertex_elements = NULL;
>   	FREE(state);
>   }
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 275f830..4da51be 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -102,20 +102,21 @@ struct si_vertex_element
>   {
>   	unsigned			count;
>   	unsigned			first_vb_use_mask;
>   	/* Vertex buffer descriptor list size aligned for optimal prefetch. */
>   	unsigned			desc_list_byte_size;
>   
>   	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
>   	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
>   	uint32_t			format_size[SI_MAX_ATTRIBS];
>   	struct pipe_vertex_element	elements[SI_MAX_ATTRIBS];
> +	bool				uses_instance_divisors;
>   };
>   
>   union si_state {
>   	struct {
>   		struct si_state_blend		*blend;
>   		struct si_state_rasterizer	*rasterizer;
>   		struct si_state_dsa		*dsa;
>   		struct si_pm4_state		*poly_offset;
>   		struct si_pm4_state		*ls;
>   		struct si_pm4_state		*hs;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index c21f855..677a6de 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
>   	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
>   	si_set_active_descriptors_for_shader(sctx, sel);
>   	si_update_streamout_state(sctx);
>   	si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
>   			    si_get_vs(sctx)->cso, si_get_vs_state(sctx));
>   }
>   
>   static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
>   {
>   	struct si_context *sctx = (struct si_context *)ctx;
> +	struct si_shader_selector *old_sel = sctx->ps_shader.cso;
>   	struct si_shader_selector *sel = state;
>   
>   	/* skip if supplied shader is one already in use */
> -	if (sctx->ps_shader.cso == sel)
> +	if (old_sel == sel)
>   		return;
>   
>   	sctx->ps_shader.cso = sel;
>   	sctx->ps_shader.current = sel ? sel->first_variant : NULL;
>   	sctx->do_update_shaders = true;
> -	if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
> -		si_update_tess_uses_prim_id(sctx);
> -	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +
> +	if (sel) {
> +		if (sctx->ia_multi_vgt_param_key.u.uses_tess)
> +			si_update_tess_uses_prim_id(sctx);
> +
> +		if (!old_sel ||
> +		    old_sel->info.colors_written != sel->info.colors_written)
> +			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
> +	}
>   	si_set_active_descriptors_for_shader(sctx, sel);
>   }
>   
>   static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
>   {
>   	if (shader->is_optimized) {
>   		util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
>   				    &shader->optimized_ready);
>   		util_queue_fence_destroy(&shader->optimized_ready);
>   	}
> @@ -3081,20 +3088,23 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
>   	si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
>   }
>   
>   bool si_update_shaders(struct si_context *sctx)
>   {
>   	struct pipe_context *ctx = (struct pipe_context*)sctx;
>   	struct si_compiler_ctx_state compiler_state;
>   	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>   	struct si_shader *old_vs = si_get_vs_state(sctx);
>   	bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
> +	struct si_shader *old_ps = sctx->ps_shader.current;
> +	unsigned old_spi_shader_col_format =
> +		old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
>   	int r;
>   
>   	compiler_state.tm = sctx->tm;
>   	compiler_state.debug = sctx->b.debug;
>   	compiler_state.is_debug_context = sctx->is_debug;
>   
>   	/* Update stages before GS. */
>   	if (sctx->tes_shader.cso) {
>   		if (!sctx->tf_ring) {
>   			si_init_tess_factor_ring(sctx);
> @@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx)
>   			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
>   
>   		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
>   		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
>   		    sctx->flatshade != rs->flatshade) {
>   			sctx->sprite_coord_enable = rs->sprite_coord_enable;
>   			sctx->flatshade = rs->flatshade;
>   			si_mark_atom_dirty(sctx, &sctx->spi_map);
>   		}
>   
> -		if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
> +		if (sctx->screen->b.rbplus_allowed &&
> +		    si_pm4_state_changed(sctx, ps) &&
> +		    (!old_ps ||
> +		     old_spi_shader_col_format !=
> +		     sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
>   			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>   
>   		if (sctx->ps_db_shader_control != db_shader_control) {
>   			sctx->ps_db_shader_control = db_shader_control;
>   			si_mark_atom_dirty(sctx, &sctx->db_render_state);
>   		}
>   
>   		if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
>   			sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
>   			si_mark_atom_dirty(sctx, &sctx->msaa_config);
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list