[Mesa-dev] [PATCH 4/4] radeonsi/gfx9: clean up context roll tracking logic

Wed Apr 17 23:39:13 UTC 2019

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h           |  2 +-
 src/gallium/drivers/radeonsi/si_state.c          |  8 +++----
 src/gallium/drivers/radeonsi/si_state_binning.c  |  4 ++--
 src/gallium/drivers/radeonsi/si_state_draw.c     | 30 +++++++++++-------------
 src/gallium/drivers/radeonsi/si_state_shaders.c  | 10 ++++----
 src/gallium/drivers/radeonsi/si_state_viewport.c |  2 +-
 6 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ee53192..c0211f5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1063,21 +1063,21 @@ struct si_context {
 	unsigned			num_vs_flushes;
 	unsigned			num_ps_flushes;
 	unsigned			num_cs_flushes;
 	unsigned			num_cb_cache_flushes;
 	unsigned			num_db_cache_flushes;
 	unsigned			num_L2_invalidates;
 	unsigned			num_L2_writebacks;
 	unsigned			num_resident_handles;
 	uint64_t			num_alloc_tex_transfer_bytes;
 	unsigned			last_tex_ps_draw_ratio; /* for query */
-	unsigned			context_roll_counter;
+	unsigned			context_roll;
 
 	/* Queries. */
 	/* Maintain the list of active queries for pausing between IBs. */
 	int				num_occlusion_queries;
 	int				num_perfect_occlusion_queries;
 	struct list_head		active_queries;
 	unsigned			num_cs_dw_queries_suspend;
 
 	/* Render condition. */
 	struct pipe_query		*render_cond;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 757c17f..bc7e777 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -249,21 +249,21 @@ static void si_emit_cb_render_state(struct si_context *sctx)
 			}
 		}
 
 		/* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */
 		radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT,
 					    SI_TRACKED_SX_PS_DOWNCONVERT,
 					    sx_ps_downconvert, sx_blend_opt_epsilon,
 					    sx_blend_opt_control);
 	}
 	if (initial_cdw != cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 /*
  * Blender functions
  */
 
 static uint32_t si_translate_blend_function(int blend_func)
 {
 	switch (blend_func) {
 	case PIPE_BLEND_ADD:
@@ -786,21 +786,21 @@ static void si_emit_clip_regs(struct si_context *sctx)
 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
 		clipdist_mask | (culldist_mask << 8));
 	radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL,
 		SI_TRACKED_PA_CL_CLIP_CNTL,
 		rs->pa_cl_clip_cntl |
 		ucp_mask |
 		S_028810_CLIP_DISABLE(window_space));
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 /*
  * inferred state between framebuffer and rasterizer
  */
 static void si_update_poly_offset_state(struct si_context *sctx)
 {
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
 	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
@@ -1448,21 +1448,21 @@ static void si_emit_db_render_state(struct si_context *sctx)
 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
 	if (sctx->screen->has_rbplus &&
 	    !sctx->screen->rbplus_allowed)
 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
 	radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
 				   SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 /*
  * format translation
  */
 static uint32_t si_translate_colorformat(enum pipe_format format)
 {
 	const struct util_format_description *desc = util_format_description(format);
 	if (!desc)
 		return V_028C70_COLOR_INVALID;
@@ -3537,21 +3537,21 @@ static void si_emit_msaa_config(struct si_context *sctx)
 				    SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl,
 				    sc_aa_config);
 	/* R_028804_DB_EQAA */
 	radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA,
 				   db_eqaa);
 	/* R_028A4C_PA_SC_MODE_CNTL_1 */
 	radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1,
 				   SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
 
 	if (initial_cdw != cs->current.cdw) {
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 
 		/* GFX9: Flush DFSM when the AA mode changes. */
 		if (sctx->screen->dfsm_allowed) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
 		}
 	}
 }
 
 void si_update_ps_iter_samples(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index 3516e56..5c6c2e6 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -314,21 +314,21 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
 
 	radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0,
 		SI_TRACKED_PA_SC_BINNER_CNTL_0,
 		S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
 		S_028C44_DISABLE_START_OF_PRIM(1));
 	radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
 				   SI_TRACKED_DB_DFSM_CONTROL,
 				   S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
 				   S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 void si_emit_dpbb_state(struct si_context *sctx)
 {
 	struct si_screen *sscreen = sctx->screen;
 	struct si_state_blend *blend = sctx->queued.named.blend;
 	struct si_state_dsa *dsa = sctx->queued.named.dsa;
 	unsigned db_shader_control = sctx->ps_db_shader_control;
 
 	assert(sctx->chip_class >= GFX9);
@@ -436,12 +436,12 @@ void si_emit_dpbb_state(struct si_context *sctx)
 		S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
 		S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
 		S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
 		S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
 		S_028C44_OPTIMAL_BIN_SELECTION(1));
 	radeon_opt_set_context_reg(sctx, R_028060_DB_DFSM_CONTROL,
 				   SI_TRACKED_DB_DFSM_CONTROL,
 				   S_028060_PUNCHOUT_MODE(punchout_mode) |
 				   S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b673c2f..7a51b7c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -59,21 +59,21 @@ static unsigned si_conv_pipe_prim(unsigned mode)
 	return prim_conv[mode];
 }
 
 /**
  * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
  * LS.LDS_SIZE is shared by all 3 shader stages.
  *
  * The information about LDS and other non-compile-time parameters is then
  * written to userdata SGPRs.
  */
-static bool si_emit_derived_tess_state(struct si_context *sctx,
+static void si_emit_derived_tess_state(struct si_context *sctx,
 				       const struct pipe_draw_info *info,
 				       unsigned *num_patches)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	struct si_shader *ls_current;
 	struct si_shader_selector *ls;
 	/* The TES pointer will only be used for sctx->last_tcs.
 	 * It would be wrong to think that TCS = TES. */
 	struct si_shader_selector *tcs =
 		sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
@@ -298,23 +298,22 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
 
 	if (sctx->last_ls_hs_config != ls_hs_config) {
 		if (sctx->chip_class >= CIK) {
 			radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
 						   ls_hs_config);
 		} else {
 			radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
 					       ls_hs_config);
 		}
 		sctx->last_ls_hs_config = ls_hs_config;
-		return true; /* true if the context rolls */
+		sctx->context_roll = true;
 	}
-	return false;
 }
 
 static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
 {
 	switch (info->mode) {
 	case PIPE_PRIM_PATCHES:
 		return info->count / info->vertices_per_patch;
 	case PIPE_PRIM_POLYGON:
 		return info->count >= 3;
 	case SI_PRIM_RECTANGLE_LIST:
@@ -534,44 +533,44 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 		     (info->instance_count > 1 &&
 		      (info->count_from_stream_output ||
 		       si_num_prims_for_vertices(info) <= 1))))
 			sctx->flags |= SI_CONTEXT_VGT_FLUSH;
 	}
 
 	return ia_multi_vgt_param;
 }
 
 /* rast_prim is the primitive type after GS. */
-static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
+static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	enum pipe_prim_type rast_prim = sctx->current_rast_prim;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
 	/* Skip this if not rendering lines. */
 	if (!util_prim_is_lines(rast_prim))
-		return false;
+		return;
 
 	if (rast_prim == sctx->last_rast_prim &&
 	    rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
-		return false;
+		return;
 
 	/* For lines, reset the stipple pattern at each primitive. Otherwise,
 	 * reset the stipple pattern at each packet (line strips, line loops).
 	 */
 	radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
 		rs->pa_sc_line_stipple |
 		S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
 
 	sctx->last_rast_prim = rast_prim;
 	sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
-	return true; /* true if the context rolls */
+	sctx->context_roll = true;
 }
 
 static void si_emit_vs_state(struct si_context *sctx,
 			     const struct pipe_draw_info *info)
 {
 	sctx->current_vs_state &= C_VS_STATE_INDEXED;
 	sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->index_size);
 
 	if (sctx->num_vs_blit_sgprs) {
 		/* Re-emit the state after we leave u_blitter. */
@@ -893,21 +892,21 @@ static void si_emit_surface_sync(struct si_context *sctx,
 		radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
 		radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
 		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
 		radeon_emit(cs, 0);               /* CP_COHER_BASE */
 		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
 	}
 
 	/* ACQUIRE_MEM has an implicit context roll if the current context
 	 * is busy. */
 	if (sctx->has_graphics)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint32_t flags = sctx->flags;
 
 	if (!sctx->has_graphics) {
 		/* Only process compute flags. */
 		flags &= SI_CONTEXT_INV_ICACHE |
@@ -1222,24 +1221,23 @@ static void si_get_draw_start_count(struct si_context *sctx,
 		*start = info->start;
 		*count = info->count;
 	}
 }
 
 static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
 			       unsigned skip_atom_mask)
 {
 	unsigned num_patches = 0;
 
-	sctx->context_roll_counter |= si_emit_rasterizer_prim_state(sctx);
+	si_emit_rasterizer_prim_state(sctx);
 	if (sctx->tes_shader.cso)
-		sctx->context_roll_counter |=
-			si_emit_derived_tess_state(sctx, info, &num_patches);
+		si_emit_derived_tess_state(sctx, info, &num_patches);
 
 	/* Emit state atoms. */
 	unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
 	while (mask)
 		sctx->atoms.array[u_bit_scan(&mask)].emit(sctx);
 
 	sctx->dirty_atoms &= skip_atom_mask;
 
 	/* Emit states. */
 	mask = sctx->dirty_states;
@@ -1453,29 +1451,29 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 	if (!si_upload_vertex_buffer_descriptors(sctx))
 		goto return_cleanup;
 
 	/* Vega10/Raven scissor bug workaround. When any context register is
 	 * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
 	 * registers must be written too.
 	 */
 	bool handle_scissor_bug = sctx->screen->has_gfx9_scissor_bug &&
 				  !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
 
-	/* If this is > 0 after all the non-draw packets, a context roll occured. */
-	sctx->context_roll_counter = 0;
+	/* If this is true after all the non-draw packets, a context roll occured. */
+	sctx->context_roll = false;
 
 	if (handle_scissor_bug &&
 	    (info->count_from_stream_output ||
 	     sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
 	     sctx->dirty_states & si_states_that_always_roll_context() ||
 	     si_prim_restart_index_changed(sctx, info)))
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 
 	/* Use optimal packet order based on whether we need to sync the pipeline. */
 	if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 				      SI_CONTEXT_FLUSH_AND_INV_DB |
 				      SI_CONTEXT_PS_PARTIAL_FLUSH |
 				      SI_CONTEXT_CS_PARTIAL_FLUSH))) {
 		/* If we have to wait for idle, set all states first, so that all
 		 * SET packets are processed in parallel with previous draw calls.
 		 * Then draw and prefetch at the end. This ensures that the time
 		 * the CUs are idle is very short.
@@ -1490,21 +1488,21 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 
 		/* Emit all states except possibly render condition. */
 		si_emit_all_states(sctx, info, masked_atoms);
 		si_emit_cache_flush(sctx);
 		/* <-- CUs are idle here. */
 
 		if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
 			sctx->atoms.s.render_cond.emit(sctx);
 		sctx->dirty_atoms = 0;
 
-		if (handle_scissor_bug && sctx->context_roll_counter) {
+		if (handle_scissor_bug && sctx->context_roll) {
 			sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 			sctx->atoms.s.scissors.emit(sctx);
 		}
 
 		si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
 		/* <-- CUs are busy here. */
 
 		/* Start prefetches after the draw has been started. Both will run
 		 * in parallel, but starting the draw first is more important.
 		 */
@@ -1519,21 +1517,21 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 
 		/* Only prefetch the API VS and VBO descriptors. */
 		if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
 			cik_emit_prefetch_L2(sctx, true);
 
 		if (!si_upload_graphics_shader_descriptors(sctx))
 			return;
 
 		si_emit_all_states(sctx, info, 0);
 
-		if (handle_scissor_bug && sctx->context_roll_counter) {
+		if (handle_scissor_bug && sctx->context_roll) {
 			sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
 			sctx->atoms.s.scissors.emit(sctx);
 		}
 
 		si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
 
 		/* Prefetch the remaining shaders after the draw has been
 		 * started. */
 		if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
 			cik_emit_prefetch_L2(sctx, false);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 5bdfd4f..d00bb17 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -569,21 +569,21 @@ static void si_emit_shader_es(struct si_context *sctx)
 		radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
 					   SI_TRACKED_VGT_TF_PARAM,
 					   shader->vgt_tf_param);
 
 	if (shader->vgt_vertex_reuse_block_cntl)
 		radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   shader->vgt_vertex_reuse_block_cntl);
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	unsigned num_user_sgprs;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 	unsigned oc_lds_en;
 
@@ -818,21 +818,21 @@ static void si_emit_shader_gs(struct si_context *sctx)
 			radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
 						   SI_TRACKED_VGT_TF_PARAM,
 						   shader->vgt_tf_param);
 		if (shader->vgt_vertex_reuse_block_cntl)
 			radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 						   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 						   shader->vgt_vertex_reuse_block_cntl);
 	}
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_shader_selector *sel = shader->selector;
 	const ubyte *num_components = sel->info.num_stream_output_components;
 	unsigned gs_num_invocations = sel->gs_num_invocations;
 	struct si_pm4_state *pm4;
 	uint64_t va;
 	unsigned max_stream = sel->max_gs_stream;
@@ -995,21 +995,21 @@ static void si_emit_shader_vs(struct si_context *sctx)
 		radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM,
 					   SI_TRACKED_VGT_TF_PARAM,
 					   shader->vgt_tf_param);
 
 	if (shader->vgt_vertex_reuse_block_cntl)
 		radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
 					   shader->vgt_vertex_reuse_block_cntl);
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 /**
  * Compute the state for \p shader, which will run as a vertex shader on the
  * hardware.
  *
  * If \p gs is non-NULL, it points to the geometry shader for which this shader
  * is the copy shader.
  */
 static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
@@ -1187,21 +1187,21 @@ static void si_emit_shader_ps(struct si_context *sctx)
 	radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT,
 				    SI_TRACKED_SPI_SHADER_Z_FORMAT,
 				    shader->ctx_reg.ps.spi_shader_z_format,
 				    shader->ctx_reg.ps.spi_shader_col_format);
 
 	radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
 				   SI_TRACKED_CB_SHADER_MASK,
 				   shader->ctx_reg.ps.cb_shader_mask);
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 static void si_shader_ps(struct si_shader *shader)
 {
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct si_pm4_state *pm4;
 	unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
 	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	uint64_t va;
 	unsigned input_ena = shader->config.spi_ps_input_ena;
@@ -2863,21 +2863,21 @@ static void si_emit_spi_map(struct si_context *sctx)
 
 	/* R_028644_SPI_PS_INPUT_CNTL_0 */
 	/* Dota 2: Only ~16% of SPI map updates set different values. */
 	/* Talos: Only ~9% of SPI map updates set different values. */
 	unsigned initial_cdw = sctx->gfx_cs->current.cdw;
 	radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
 				    spi_ps_input_cntl,
 				    sctx->tracked_regs.spi_ps_input_cntl, num_interp);
 
 	if (initial_cdw != sctx->gfx_cs->current.cdw)
-		sctx->context_roll_counter++;
+		sctx->context_roll = true;
 }
 
 /**
  * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
  */
 static void si_init_config_add_vgt_flush(struct si_context *sctx)
 {
 	if (sctx->init_config_has_vgt_flush)
 		return;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index a9a1be7..1ec6921 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -276,21 +276,21 @@ static void si_emit_guardband(struct si_context *ctx)
 	radeon_opt_set_context_reg(ctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
 				   SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
 				   S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
 				   S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4));
 	radeon_opt_set_context_reg(ctx, R_028BE4_PA_SU_VTX_CNTL,
 				   SI_TRACKED_PA_SU_VTX_CNTL,
 				   S_028BE4_PIX_CENTER(rs->half_pixel_center) |
 				   S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
 						       vp_as_scissor.quant_mode));
 	if (initial_cdw != ctx->gfx_cs->current.cdw)
-		ctx->context_roll_counter++;
+		ctx->context_roll = true;
 }
 
 static void si_emit_scissors(struct si_context *ctx)
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
 	struct pipe_scissor_state *states = ctx->scissors.states;
 	unsigned mask = ctx->scissors.dirty_mask;
 	bool scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
 
 	/* The simple case: Only 1 viewport is active. */
-- 
2.7.4