Mesa (master): radeonsi: don't restore states at the beginning of IBs if they're shadowed
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jul 22 16:58:21 UTC 2020
Module: Mesa
Branch: master
Commit: b8892bc81820884cd42ada8699d0c28cb8e39dda
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b8892bc81820884cd42ada8699d0c28cb8e39dda
Author: Marek Olšák <marek.olsak at amd.com>
Date: Tue Mar 10 18:46:17 2020 -0400
radeonsi: don't restore states at the beginning of IBs if they're shadowed
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5798>
---
src/gallium/drivers/radeonsi/si_gfx_cs.c | 139 ++++++++++++++++---------------
src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
src/gallium/drivers/radeonsi/si_pipe.h | 2 +-
src/gallium/drivers/radeonsi/si_pm4.c | 17 +++-
src/gallium/drivers/radeonsi/si_pm4.h | 2 +-
5 files changed, 92 insertions(+), 70 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 59f6c0141ca..6a50dcae242 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -251,7 +251,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
if (ctx->current_saved_cs)
si_saved_cs_reference(&ctx->current_saved_cs, NULL);
- si_begin_new_gfx_cs(ctx);
+ si_begin_new_gfx_cs(ctx, false);
ctx->gfx_flush_in_progress = false;
}
@@ -383,7 +383,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */
}
-void si_begin_new_gfx_cs(struct si_context *ctx)
+void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
{
if (ctx->is_debug)
si_begin_gfx_cs_debug(ctx);
@@ -411,9 +411,12 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
RADEON_PRIO_DESCRIPTORS);
}
- ctx->cs_shader_state.initialized = false;
si_add_all_descriptors_to_bo_list(ctx);
- si_shader_pointers_mark_dirty(ctx);
+
+ if (first_cs || !ctx->shadowed_regs) {
+ si_shader_pointers_mark_dirty(ctx);
+ ctx->cs_shader_state.initialized = false;
+ }
if (!ctx->has_graphics) {
ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
@@ -428,7 +431,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
/* set all valid group as dirty so they get reemited on
* next draw command
*/
- si_pm4_reset_emitted(ctx);
+ si_pm4_reset_emitted(ctx, first_cs);
/* The CS initialization should be emitted before everything else. */
if (ctx->cs_preamble_state)
@@ -453,8 +456,9 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
/* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
bool has_clear_state = ctx->screen->info.has_clear_state;
- if (has_clear_state) {
- ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
+ if (has_clear_state || ctx->shadowed_regs) {
+ ctx->framebuffer.dirty_cbufs =
+ u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
/* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */
ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL;
} else {
@@ -462,39 +466,72 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->framebuffer.dirty_zsbuf = true;
}
/* This should always be marked as dirty to set the framebuffer scissor
- * at least. */
+ * at least.
+ *
+ * Even with shadowed registers, we have to add buffers to the buffer list.
+ * All of these do that.
+ */
si_mark_atom_dirty(ctx, &ctx->atoms.s.framebuffer);
-
- si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
- /* CLEAR_STATE sets zeros. */
- if (!has_clear_state || ctx->clip_state.any_nonzeros)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state);
- ctx->sample_locs_num_samples = 0;
- si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config);
- /* CLEAR_STATE sets 0xffff. */
- if (!has_clear_state || ctx->sample_mask != 0xffff)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state);
- /* CLEAR_STATE sets zeros. */
- if (!has_clear_state || ctx->blend_color.any_nonzeros)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
- if (ctx->chip_class >= GFX9)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
- if (!ctx->screen->use_ngg_streamout)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
- /* CLEAR_STATE disables all window rectangles. */
- if (!has_clear_state || ctx->num_window_rectangles > 0)
- si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
-
- si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
- si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports);
+ if (first_cs || !ctx->shadowed_regs) {
+ /* These don't add any buffers, so skip them with shadowing. */
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
+ /* CLEAR_STATE sets zeros. */
+ if (!has_clear_state || ctx->clip_state.any_nonzeros)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state);
+ ctx->sample_locs_num_samples = 0;
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config);
+ /* CLEAR_STATE sets 0xffff. */
+ if (!has_clear_state || ctx->sample_mask != 0xffff)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state);
+ /* CLEAR_STATE sets zeros. */
+ if (!has_clear_state || ctx->blend_color.any_nonzeros)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state);
+ if (ctx->chip_class >= GFX9)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
+ if (!ctx->screen->use_ngg_streamout)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
+ /* CLEAR_STATE disables all window rectangles. */
+ if (!has_clear_state || ctx->num_window_rectangles > 0)
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband);
+ si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors);
+
+ /* Invalidate various draw states so that they are emitted before
+ * the first draw call. */
+ si_invalidate_draw_sh_constants(ctx);
+ ctx->last_index_size = -1;
+ ctx->last_primitive_restart_en = -1;
+ ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
+ ctx->last_prim = -1;
+ ctx->last_multi_vgt_param = -1;
+ ctx->last_vs_state = ~0;
+ ctx->last_ls = NULL;
+ ctx->last_tcs = NULL;
+ ctx->last_tes_sh_base = -1;
+ ctx->last_num_tcs_input_cp = -1;
+ ctx->last_ls_hs_config = -1; /* impossible value */
+ ctx->last_binning_enabled = -1;
+
+ if (has_clear_state) {
+ si_set_tracked_regs_to_clear_state(ctx);
+ } else {
+ /* Set all register values to unknown. */
+ ctx->tracked_regs.reg_saved = 0;
+ ctx->last_gs_out_prim = -1; /* unknown */
+ }
+
+ /* 0xffffffff is an impossible value to register SPI_PS_INPUT_CNTL_n */
+ memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
+ }
+
si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state);
if (ctx->scratch_buffer) {
si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b);
@@ -510,24 +547,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
assert(!ctx->gfx_cs->prev_dw);
ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
-
- /* Invalidate various draw states so that they are emitted before
- * the first draw call. */
- si_invalidate_draw_sh_constants(ctx);
- ctx->last_index_size = -1;
- ctx->last_primitive_restart_en = -1;
- ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
- ctx->last_prim = -1;
- ctx->last_multi_vgt_param = -1;
- ctx->last_vs_state = ~0;
- ctx->last_ls = NULL;
- ctx->last_tcs = NULL;
- ctx->last_tes_sh_base = -1;
- ctx->last_num_tcs_input_cp = -1;
- ctx->last_ls_hs_config = -1; /* impossible value */
- ctx->last_binning_enabled = -1;
ctx->small_prim_cull_info_dirty = ctx->small_prim_cull_info_buf != NULL;
-
ctx->prim_discard_compute_ib_initialized = false;
/* Compute-based primitive discard:
@@ -540,17 +560,4 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->index_ring_base = ctx->index_ring_size_per_ib;
ctx->index_ring_offset = 0;
-
- if (!ctx->shadowed_regs) {
- if (has_clear_state) {
- si_set_tracked_regs_to_clear_state(ctx);
- } else {
- /* Set all register values to unknown. */
- ctx->tracked_regs.reg_saved = 0;
- ctx->last_gs_out_prim = -1; /* unknown */
- }
- }
-
- /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
- memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 40c7270e46e..aa76cb81325 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -676,7 +676,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
si_init_cp_reg_shadowing(sctx);
}
- si_begin_new_gfx_cs(sctx);
+ si_begin_new_gfx_cs(sctx, true);
assert(sctx->gfx_cs->current.cdw == sctx->initial_gfx_cs_size);
/* Initialize per-context buffers. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index c59bbd5463b..490dc13d30f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1420,7 +1420,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen);
void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence);
void si_allocate_gds(struct si_context *ctx);
void si_set_tracked_regs_to_clear_state(struct si_context *ctx);
-void si_begin_new_gfx_cs(struct si_context *ctx);
+void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs);
void si_need_gfx_cs_space(struct si_context *ctx);
void si_unref_sdma_uploads(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index f3446267970..d1d003af1d4 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -119,8 +119,23 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
state->atom.emit(sctx);
}
-void si_pm4_reset_emitted(struct si_context *sctx)
+void si_pm4_reset_emitted(struct si_context *sctx, bool first_cs)
{
+ if (!first_cs && sctx->shadowed_regs) {
+ /* Only dirty states that contain buffers, so that they are
+ * added to the buffer list on the next draw call.
+ */
+ for (unsigned i = 0; i < SI_NUM_STATES; i++) {
+ struct si_pm4_state *state = sctx->emitted.array[i];
+
+ if (state && state->shader) {
+ sctx->emitted.array[i] = NULL;
+ sctx->dirty_states |= 1 << i;
+ }
+ }
+ return;
+ }
+
memset(&sctx->emitted, 0, sizeof(sctx->emitted));
sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
}
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 772512596f1..f8edea4d0cb 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -61,6 +61,6 @@ void si_pm4_clear_state(struct si_pm4_state *state);
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
-void si_pm4_reset_emitted(struct si_context *sctx);
+void si_pm4_reset_emitted(struct si_context *sctx, bool first_cs);
#endif
More information about the mesa-commit
mailing list