Mesa (master): radeonsi: track context rolls better for the Vega scissor bug workaround

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Oct 16 22:58:09 UTC 2018


Module: Mesa
Branch: master
Commit: fcc70e4855c3bde3cadce9d0d1abf8da7106f643
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fcc70e4855c3bde3cadce9d0d1abf8da7106f643

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Oct  6 22:44:36 2018 -0400

radeonsi: track context rolls better for the Vega scissor bug workaround

We should get fewer context rolls with the SET_CONTEXT_REG optimization,
but it would have been for nothing if the scissor state rolled the context
anyway. Don't emit the scissor state if there is no context roll.

---

 src/gallium/drivers/radeonsi/si_pipe.h           |  1 +
 src/gallium/drivers/radeonsi/si_state.c          | 31 +++++++++++++++++------
 src/gallium/drivers/radeonsi/si_state.h          | 17 +++----------
 src/gallium/drivers/radeonsi/si_state_binning.c  |  7 ++++++
 src/gallium/drivers/radeonsi/si_state_draw.c     | 32 ++++++++++++++----------
 src/gallium/drivers/radeonsi/si_state_shaders.c  | 23 +++++++++++++++++
 src/gallium/drivers/radeonsi/si_state_viewport.c |  3 +++
 7 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 7ae17435ab..6edc06cece 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1023,6 +1023,7 @@ struct si_context {
 	unsigned			num_resident_handles;
 	uint64_t			num_alloc_tex_transfer_bytes;
 	unsigned			last_tex_ps_draw_ratio; /* for query */
+	unsigned			context_roll_counter;
 
 	/* Queries. */
 	/* Maintain the list of active queries for pausing between IBs. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b63e70092d..8b2e6e57f4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -88,9 +88,6 @@ static void si_emit_cb_render_state(struct si_context *sctx)
 	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
 		cb_target_mask = 0;
 
-	radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
-				   SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
-
 	/* GFX9: Flush DFSM when CB_TARGET_MASK changes.
 	 * I think we don't have to do anything between IBs.
 	 */
@@ -102,6 +99,10 @@ static void si_emit_cb_render_state(struct si_context *sctx)
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
 	}
 
+	unsigned initial_cdw = cs->current.cdw;
+	radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
+				   SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
+
 	if (sctx->chip_class >= VI) {
 		/* DCC MSAA workaround for blending.
 		 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
@@ -252,6 +253,8 @@ static void si_emit_cb_render_state(struct si_context *sctx)
 					    sx_ps_downconvert, sx_blend_opt_epsilon,
 					    sx_blend_opt_control);
 	}
+	if (initial_cdw != cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 /*
@@ -773,6 +776,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
 	clipdist_mask &= rs->clip_plane_enable;
 	culldist_mask |= clipdist_mask;
 
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 	radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
 		SI_TRACKED_PA_CL_VS_OUT_CNTL,
 		vs_sel->pa_cl_vs_out_cntl |
@@ -784,6 +788,9 @@ static void si_emit_clip_regs(struct si_context *sctx)
 		rs->pa_cl_clip_cntl |
 		ucp_mask |
 		S_028810_CLIP_DISABLE(window_space));
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 /*
@@ -1352,6 +1359,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
 {
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned db_shader_control, db_render_control, db_count_control;
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 
 	/* DB_RENDER_CONTROL */
 	if (sctx->dbcb_depth_copy_enabled ||
@@ -1434,6 +1442,9 @@ static void si_emit_db_render_state(struct si_context *sctx)
 
 	radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
 				   SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 /*
@@ -3489,6 +3500,8 @@ static void si_emit_msaa_config(struct si_context *sctx)
 		}
 	}
 
+	unsigned initial_cdw = cs->current.cdw;
+
 	/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
 	radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL,
 				    SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl,
@@ -3500,10 +3513,14 @@ static void si_emit_msaa_config(struct si_context *sctx)
 	radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1,
 				   SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
 
-	/* GFX9: Flush DFSM when the AA mode changes. */
-	if (sctx->screen->dfsm_allowed) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+	if (initial_cdw != cs->current.cdw) {
+		sctx->context_roll_counter++;
+
+		/* GFX9: Flush DFSM when the AA mode changes. */
+		if (sctx->screen->dfsm_allowed) {
+			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+		}
 	}
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f52296d111..83589e6918 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -171,17 +171,13 @@ union si_state {
 #define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name))
 #define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
 
-static inline unsigned si_states_that_roll_context(void)
+static inline unsigned si_states_that_always_roll_context(void)
 {
 	return (SI_STATE_BIT(blend) |
 		SI_STATE_BIT(rasterizer) |
 		SI_STATE_BIT(dsa) |
 		SI_STATE_BIT(poly_offset) |
-		SI_STATE_BIT(es) |
-		SI_STATE_BIT(gs) |
-		SI_STATE_BIT(vgt_shader_config) |
-		SI_STATE_BIT(vs) |
-		SI_STATE_BIT(ps));
+		SI_STATE_BIT(vgt_shader_config));
 }
 
 union si_state_atoms {
@@ -216,25 +212,18 @@ union si_state_atoms {
 			         sizeof(struct si_atom)))
 #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct si_atom*))
 
-static inline unsigned si_atoms_that_roll_context(void)
+static inline unsigned si_atoms_that_always_roll_context(void)
 {
 	return (SI_ATOM_BIT(streamout_begin) |
 		SI_ATOM_BIT(streamout_enable) |
 		SI_ATOM_BIT(framebuffer) |
 		SI_ATOM_BIT(msaa_sample_locs) |
-		SI_ATOM_BIT(db_render_state) |
-		SI_ATOM_BIT(dpbb_state) |
-		SI_ATOM_BIT(msaa_config) |
 		SI_ATOM_BIT(sample_mask) |
-		SI_ATOM_BIT(cb_render_state) |
 		SI_ATOM_BIT(blend_color) |
-		SI_ATOM_BIT(clip_regs) |
 		SI_ATOM_BIT(clip_state) |
-		SI_ATOM_BIT(guardband) |
 		SI_ATOM_BIT(scissors) |
 		SI_ATOM_BIT(viewports) |
 		SI_ATOM_BIT(stencil_ref) |
-		SI_ATOM_BIT(spi_map) |
 		SI_ATOM_BIT(scratch_state));
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index 4aad94d95f..70c129242d 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -310,6 +310,8 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
 
 static void si_emit_dpbb_disable(struct si_context *sctx)
 {
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
 	radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0,
 		SI_TRACKED_PA_SC_BINNER_CNTL_0,
 		S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
@@ -318,6 +320,8 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
 				   SI_TRACKED_DB_DFSM_CONTROL,
 				   S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
 				   S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 void si_emit_dpbb_state(struct si_context *sctx)
@@ -419,6 +423,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
 	if (bin_size.y >= 32)
 		bin_size_extend.y = util_logbase2(bin_size.y) - 5;
 
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 	radeon_opt_set_context_reg(
 		sctx, R_028C44_PA_SC_BINNER_CNTL_0,
 		SI_TRACKED_PA_SC_BINNER_CNTL_0,
@@ -436,4 +441,6 @@ void si_emit_dpbb_state(struct si_context *sctx)
 				   SI_TRACKED_DB_DFSM_CONTROL,
 				   S_028060_PUNCHOUT_MODE(punchout_mode) |
 				   S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 69f723e4e4..83eb646b79 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1189,26 +1189,26 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
 			       unsigned skip_atom_mask)
 {
 	unsigned num_patches = 0;
+	/* Vega10/Raven scissor bug workaround. When any context register is
+	 * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
+	 * registers must be written too.
+	 */
+	bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
+				  !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
 	bool context_roll = false; /* set correctly for GFX9 only */
 
 	context_roll |= si_emit_rasterizer_prim_state(sctx);
 	if (sctx->tes_shader.cso)
 		context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
-	if (info->count_from_stream_output)
+
+	if (handle_scissor_bug &&
+	    (info->count_from_stream_output ||
+	     sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
+	     sctx->dirty_states & si_states_that_always_roll_context() ||
+	     si_prim_restart_index_changed(sctx, info)))
 		context_roll = true;
 
-	/* Vega10/Raven scissor bug workaround. When any context register is
-	 * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
-	 * registers must be written too.
-	 */
-	if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
-	    (context_roll ||
-	     sctx->dirty_atoms & si_atoms_that_roll_context() ||
-	     sctx->dirty_states & si_states_that_roll_context() ||
-	     si_prim_restart_index_changed(sctx, info))) {
-		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-		si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
-	}
+	sctx->context_roll_counter = 0;
 
 	/* Emit state atoms. */
 	unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
@@ -1231,6 +1231,12 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
 	}
 	sctx->dirty_states = 0;
 
+	if (handle_scissor_bug &&
+	    (context_roll || sctx->context_roll_counter)) {
+		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+		sctx->atoms.s.scissors.emit(sctx);
+	}
+
 	/* Emit draw states. */
 	si_emit_vs_state(sctx, info);
 	si_emit_draw_registers(sctx, info, num_patches);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2bdac33586..ad7d21e781 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -561,6 +561,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 static void si_emit_shader_es(struct si_context *sctx)
 {
 	struct si_shader *shader = sctx->queued.named.es->shader;
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 
 	if (!shader)
 		return;
@@ -578,6 +579,9 @@ static void si_emit_shader_es(struct si_context *sctx)
 		radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   shader->vgt_vertex_reuse_block_cntl);
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -762,6 +766,8 @@ static void gfx9_get_gs_info(struct si_shader_selector *es,
 static void si_emit_shader_gs(struct si_context *sctx)
 {
 	struct si_shader *shader = sctx->queued.named.gs->shader;
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
 	if (!shader)
 		return;
 
@@ -822,6 +828,9 @@ static void si_emit_shader_gs(struct si_context *sctx)
 						   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 						   shader->vgt_vertex_reuse_block_cntl);
 	}
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -957,6 +966,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 static void si_emit_shader_vs(struct si_context *sctx)
 {
 	struct si_shader *shader = sctx->queued.named.vs->shader;
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
 	if (!shader)
 		return;
 
@@ -994,6 +1005,9 @@ static void si_emit_shader_vs(struct si_context *sctx)
 		radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   shader->vgt_vertex_reuse_block_cntl);
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 /**
@@ -1156,6 +1170,8 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
 static void si_emit_shader_ps(struct si_context *sctx)
 {
 	struct si_shader *shader = sctx->queued.named.ps->shader;
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
 	if (!shader)
 		return;
 
@@ -1181,6 +1197,9 @@ static void si_emit_shader_ps(struct si_context *sctx)
 	radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
 				   SI_TRACKED_CB_SHADER_MASK,
 				   shader->ctx_reg.ps.cb_shader_mask);
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 static void si_shader_ps(struct si_shader *shader)
@@ -2849,9 +2868,13 @@ static void si_emit_spi_map(struct si_context *sctx)
 	/* R_028644_SPI_PS_INPUT_CNTL_0 */
 	/* Dota 2: Only ~16% of SPI map updates set different values. */
 	/* Talos: Only ~9% of SPI map updates set different values. */
+	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 	radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
 				    spi_ps_input_cntl,
 				    sctx->tracked_regs.spi_ps_input_cntl, num_interp);
+
+	if (initial_cdw != sctx->gfx_cs->current.cdw)
+		sctx->context_roll_counter++;
 }
 
 /**
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index 819c773ba8..587422e50c 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -258,6 +258,7 @@ static void si_emit_guardband(struct si_context *ctx)
 	 * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
 	 * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
 	 */
+	unsigned initial_cdw = ctx->gfx_cs->current.cdw;
 	radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
 				    SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
 				    fui(guardband_y), fui(discard_y),
@@ -271,6 +272,8 @@ static void si_emit_guardband(struct si_context *ctx)
 				   S_028BE4_PIX_CENTER(rs->half_pixel_center) |
 				   S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
 						       vp_as_scissor.quant_mode));
+	if (initial_cdw != ctx->gfx_cs->current.cdw)
+		ctx->context_roll_counter++;
 }
 
 static void si_emit_scissors(struct si_context *ctx)




More information about the mesa-commit mailing list