[Mesa-dev] [PATCH 8/8] radeonsi: don't update dependent states if it has no effect
Marek Olšák
maraeo at gmail.com
Mon Jun 5 16:51:01 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
This and the previous commit decrease IB sizes and the number of
si_update_shaders invocations as follows:
IB size si_update_shader calls
Borderlands 2 -10% -27%
Deus Ex: MD -5% -11%
Talos Principle -8% -30%
---
src/gallium/drivers/radeonsi/si_state.c | 68 ++++++++++++++++++++++---
src/gallium/drivers/radeonsi/si_state.h | 1 +
src/gallium/drivers/radeonsi/si_state_shaders.c | 24 +++++++--
3 files changed, 80 insertions(+), 13 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5e5f564..323ec5e 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -596,23 +596,41 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
static void *si_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
}
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
- si_mark_atom_dirty(sctx, &sctx->cb_render_state);
- sctx->do_update_shaders = true;
+ struct si_state_blend *old_blend = sctx->queued.named.blend;
+ struct si_state_blend *blend = (struct si_state_blend *)state;
+
+ if (!state)
+ return;
+
+ if (!old_blend ||
+ old_blend->cb_target_mask != blend->cb_target_mask ||
+ old_blend->dual_src_blend != blend->dual_src_blend)
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+ si_pm4_bind_state(sctx, blend, state);
+
+ if (!old_blend ||
+ old_blend->cb_target_mask != blend->cb_target_mask ||
+ old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
+ old_blend->alpha_to_one != blend->alpha_to_one ||
+ old_blend->dual_src_blend != blend->dual_src_blend ||
+ old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+ old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
+ sctx->do_update_shaders = true;
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
}
static void si_set_blend_color(struct pipe_context *ctx,
const struct pipe_blend_color *state)
@@ -914,24 +932,41 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
}
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ if (!old_rs ||
+ old_rs->clip_plane_enable != rs->clip_plane_enable ||
+ old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
+
sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
rs->line_stipple_enable;
- sctx->do_update_shaders = true;
+
+ if (!old_rs ||
+ old_rs->clip_plane_enable != rs->clip_plane_enable ||
+ old_rs->rasterizer_discard != rs->rasterizer_discard ||
+ old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
+ old_rs->flatshade != rs->flatshade ||
+ old_rs->two_side != rs->two_side ||
+ old_rs->multisample_enable != rs->multisample_enable ||
+ old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
+ old_rs->poly_smooth != rs->poly_smooth ||
+ old_rs->line_smooth != rs->line_smooth ||
+ old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
+ old_rs->force_persample_interp != rs->force_persample_interp)
+ sctx->do_update_shaders = true;
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
if (sctx->queued.named.rasterizer == state)
si_pm4_bind_state(sctx, poly_offset, NULL);
si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
}
@@ -1055,33 +1090,36 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
}
return dsa;
}
static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
struct si_state_dsa *dsa = state;
if (!state)
return;
si_pm4_bind_state(sctx, dsa, dsa);
if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
sizeof(struct si_dsa_stencil_ref_part)) != 0) {
sctx->stencil_ref.dsa_part = dsa->stencil_ref;
si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
}
- sctx->do_update_shaders = true;
+
+ if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
+ sctx->do_update_shaders = true;
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
}
static void *si_create_db_flush_dsa(struct si_context *sctx)
{
@@ -2437,20 +2475,21 @@ static void si_framebuffer_cache_flush(struct si_context *sctx)
static void si_set_framebuffer_state(struct pipe_context *ctx,
const struct pipe_framebuffer_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_constant_buffer constbuf = {0};
struct r600_surface *surf = NULL;
struct r600_texture *rtex;
bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
unsigned old_nr_samples = sctx->framebuffer.nr_samples;
+ unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
bool unbound = false;
int i;
/* Fast path for linear <-> sRGB changes and no-op changes. */
if (state->nr_cbufs &&
state->nr_cbufs == sctx->framebuffer.state.nr_cbufs &&
state->zsbuf == sctx->framebuffer.state.zsbuf) {
struct pipe_surface **cbufs = sctx->framebuffer.state.cbufs;
/* See if the surfaces are equivalent. */
@@ -2593,23 +2632,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
surf = (struct r600_surface*)state->zsbuf;
rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
r600_context_add_resource_size(ctx, surf->base.texture);
}
si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+ if (old_colorbuf_enabled_4bit != sctx->framebuffer.colorbuf_enabled_4bit)
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
si_mark_atom_dirty(sctx, &sctx->msaa_config);
si_mark_atom_dirty(sctx, &sctx->db_render_state);
/* Set sample locations as fragment shader constants. */
switch (sctx->framebuffer.nr_samples) {
case 1:
@@ -3727,20 +3768,23 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
unsigned data_format, num_format;
int first_non_void;
unsigned vbo_index = elements[i].vertex_buffer_index;
unsigned char swizzle[4];
if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
FREE(v);
return NULL;
}
+ if (elements[i].instance_divisor)
+ v->uses_instance_divisors = true;
+
if (!used[vbo_index]) {
v->first_vb_use_mask |= 1 << i;
used[vbo_index] = true;
}
desc = util_format_description(elements[i].src_format);
first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
@@ -3840,25 +3884,33 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
S_008F0C_DATA_FORMAT(data_format);
}
memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
return v;
}
static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_vertex_element *old = sctx->vertex_elements;
struct si_vertex_element *v = (struct si_vertex_element*)state;
sctx->vertex_elements = v;
sctx->vertex_buffers_dirty = true;
- sctx->do_update_shaders = true;
+
+ if (v &&
+ (!old ||
+ old->count != v->count ||
+ old->uses_instance_divisors != v->uses_instance_divisors ||
+ v->uses_instance_divisors || /* we don't check which divisors changed */
+ memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
+ sctx->do_update_shaders = true;
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
if (sctx->vertex_elements == state)
sctx->vertex_elements = NULL;
FREE(state);
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 275f830..4da51be 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -102,20 +102,21 @@ struct si_vertex_element
{
unsigned count;
unsigned first_vb_use_mask;
/* Vertex buffer descriptor list size aligned for optimal prefetch. */
unsigned desc_list_byte_size;
uint8_t fix_fetch[SI_MAX_ATTRIBS];
uint32_t rsrc_word3[SI_MAX_ATTRIBS];
uint32_t format_size[SI_MAX_ATTRIBS];
struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
+ bool uses_instance_divisors;
};
union si_state {
struct {
struct si_state_blend *blend;
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
struct si_pm4_state *poly_offset;
struct si_pm4_state *ls;
struct si_pm4_state *hs;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c21f855..677a6de 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2307,32 +2307,39 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
si_get_vs(sctx)->cso, si_get_vs_state(sctx));
}
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *old_sel = sctx->ps_shader.cso;
struct si_shader_selector *sel = state;
/* skip if supplied shader is one already in use */
- if (sctx->ps_shader.cso == sel)
+ if (old_sel == sel)
return;
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
sctx->do_update_shaders = true;
- if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
- si_update_tess_uses_prim_id(sctx);
- si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+ if (sel) {
+ if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+ si_update_tess_uses_prim_id(sctx);
+
+ if (!old_sel ||
+ old_sel->info.colors_written != sel->info.colors_written)
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+ }
si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
if (shader->is_optimized) {
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
&shader->optimized_ready);
util_queue_fence_destroy(&shader->optimized_ready);
}
@@ -3081,20 +3088,23 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
}
bool si_update_shaders(struct si_context *sctx)
{
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_compiler_ctx_state compiler_state;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_shader *old_vs = si_get_vs_state(sctx);
bool old_clip_disable = old_vs ? old_vs->key.opt.hw_vs.clip_disable : false;
+ struct si_shader *old_ps = sctx->ps_shader.current;
+ unsigned old_spi_shader_col_format =
+ old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
int r;
compiler_state.tm = sctx->tm;
compiler_state.debug = sctx->b.debug;
compiler_state.is_debug_context = sctx->is_debug;
/* Update stages before GS. */
if (sctx->tes_shader.cso) {
if (!sctx->tf_ring) {
si_init_tess_factor_ring(sctx);
@@ -3205,21 +3215,25 @@ bool si_update_shaders(struct si_context *sctx)
S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
sctx->sprite_coord_enable != rs->sprite_coord_enable ||
sctx->flatshade != rs->flatshade) {
sctx->sprite_coord_enable = rs->sprite_coord_enable;
sctx->flatshade = rs->flatshade;
si_mark_atom_dirty(sctx, &sctx->spi_map);
}
- if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
+ if (sctx->screen->b.rbplus_allowed &&
+ si_pm4_state_changed(sctx, ps) &&
+ (!old_ps ||
+ old_spi_shader_col_format !=
+ sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format))
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
if (sctx->ps_db_shader_control != db_shader_control) {
sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->msaa_config);
--
2.7.4
More information about the mesa-dev
mailing list