[Mesa-dev] [PATCH 2/4] radeonsi/gfx9: apply the scissor bug workaround to si_emit_streamout_end
Marek Olšák
maraeo at gmail.com
Wed Apr 17 23:39:11 UTC 2019
From: Marek Olšák <marek.olsak at amd.com>
si_emit_streamout_end is called directly, it's not a state.
Cc: 19.0 <mesa-stable at lists.freedesktop.org>
---
src/gallium/drivers/radeonsi/si_pipe.c | 2 ++
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_state_draw.c | 2 +-
src/gallium/drivers/radeonsi/si_state_streamout.c | 10 ++++++++++
4 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5caeb57..43c4914 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1069,20 +1069,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
sscreen->info.max_se >= 2 &&
!(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
sscreen->assume_no_z_fights =
driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
sscreen->commutative_blend_add =
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
sscreen->clear_db_cache_before_clear =
driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
+ sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
+ sscreen->info.family == CHIP_RAVEN;
sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
sscreen->info.family <= CHIP_POLARIS12) ||
sscreen->info.family == CHIP_VEGA10 ||
sscreen->info.family == CHIP_RAVEN;
sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
sscreen->info.family == CHIP_RAVEN;
sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;
/* Only enable primitive binning on APUs by default. */
sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 301d386..ee53192 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -463,20 +463,21 @@ struct si_screen {
unsigned eqaa_force_coverage_samples;
unsigned eqaa_force_z_samples;
unsigned eqaa_force_color_samples;
bool has_clear_state;
bool has_distributed_tess;
bool has_draw_indirect_multi;
bool has_out_of_order_rast;
bool assume_no_z_fights;
bool commutative_blend_add;
bool clear_db_cache_before_clear;
+ bool has_gfx9_scissor_bug;
bool has_msaa_sample_loc_bug;
bool has_ls_vgpr_init_bug;
bool has_dcc_constant_encode;
bool dpbb_allowed;
bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;
/* Whether shaders are monolithic (1-part) or separate (3-part). */
bool use_monolithic_shaders;
bool record_llvm_ir;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2a514f1..e2fba41 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1220,21 +1220,21 @@ static void si_get_draw_start_count(struct si_context *sctx,
}
static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
unsigned skip_atom_mask)
{
unsigned num_patches = 0;
/* Vega10/Raven scissor bug workaround. When any context register is
* written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
* registers must be written too.
*/
- bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
+ bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug &&
!si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
bool context_roll = false; /* set correctly for GFX9 only */
context_roll |= si_emit_rasterizer_prim_state(sctx);
if (sctx->tes_shader.cso)
context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
if (handle_scissor_bug &&
(info->count_from_stream_output ||
sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 2bf6862..de0b051 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -269,20 +269,21 @@ static void si_emit_streamout_begin(struct si_context *sctx)
sctx->streamout.begin_emitted = true;
}
void si_emit_streamout_end(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
struct si_streamout_target **t = sctx->streamout.targets;
unsigned i;
uint64_t va;
+ bool context_reg_changed = false;
si_flush_vgt_streamout(sctx);
for (i = 0; i < sctx->streamout.num_targets; i++) {
if (!t[i])
continue;
va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
@@ -296,25 +297,34 @@ void si_emit_streamout_end(struct si_context *sctx)
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
t[i]->buf_filled_size,
RADEON_USAGE_WRITE,
RADEON_PRIO_SO_FILLED_SIZE);
/* Zero the buffer size. The counters (primitives generated,
* primitives emitted) may be enabled even if there is not
* buffer bound. This ensures that the primitives-emitted query
* won't increment. */
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+ context_reg_changed = true;
t[i]->buf_filled_size_valid = true;
}
sctx->streamout.begin_emitted = false;
+
+ /* If we caused a context roll (= changed context registers),
+ * we need to apply the scissor bug workaround.
+ */
+ if (sctx->screen->has_gfx9_scissor_bug && context_reg_changed) {
+ sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
+ }
}
/* STREAMOUT CONFIG DERIVED STATE
*
* Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
* The buffer mask is an independent state, so no writes occur if there
* are no buffers bound.
*/
static void si_emit_streamout_enable(struct si_context *sctx)
--
2.7.4
More information about the mesa-dev
mailing list