Mesa (master): radeonsi: don't use CLEAR_STATE on SI

Marek Olšák mareko at kemper.freedesktop.org
Fri Aug 18 14:04:18 UTC 2017


Module: Mesa
Branch: master
Commit: 13aa8d3da92bd83abc8d5212764a15b1217e93bd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=13aa8d3da92bd83abc8d5212764a15b1217e93bd

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Aug 15 17:51:05 2017 +0200

radeonsi: don't use CLEAR_STATE on SI

This fixes random hangs with Unigine Valley.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102201

Fixes: 064550238ef0 ("radeonsi: use CLEAR_STATE to initialize some registers")
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeonsi/si_hw_context.c | 20 ++++++++----
 src/gallium/drivers/radeonsi/si_pipe.c       |  4 +++
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 +
 src/gallium/drivers/radeonsi/si_state.c      | 48 ++++++++++++++++++++++++++--
 4 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 3582cd711b..46e6073938 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -232,27 +232,33 @@ void si_begin_new_cs(struct si_context *ctx)
 		ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
 
 	/* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
-	ctx->framebuffer.dirty_cbufs =
-		u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
-	/* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */
-	ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL;
+	bool has_clear_state = ctx->screen->has_clear_state;
+	if (has_clear_state) {
+		ctx->framebuffer.dirty_cbufs =
+			 u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
+		/* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */
+		ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL;
+	} else {
+		ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, 8);
+		ctx->framebuffer.dirty_zsbuf = true;
+	}
 	/* This should always be marked as dirty to set the framebuffer scissor
 	 * at least. */
 	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
 
 	si_mark_atom_dirty(ctx, &ctx->clip_regs);
 	/* CLEAR_STATE sets zeros. */
-	if (ctx->clip_state.any_nonzeros)
+	if (!has_clear_state || ctx->clip_state.any_nonzeros)
 		si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
 	ctx->msaa_sample_locs.nr_samples = 0;
 	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
 	si_mark_atom_dirty(ctx, &ctx->msaa_config);
 	/* CLEAR_STATE sets 0xffff. */
-	if (ctx->sample_mask.sample_mask != 0xffff)
+	if (!has_clear_state || ctx->sample_mask.sample_mask != 0xffff)
 		si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
 	si_mark_atom_dirty(ctx, &ctx->cb_render_state);
 	/* CLEAR_STATE sets zeros. */
-	if (ctx->blend_color.any_nonzeros)
+	if (!has_clear_state || ctx->blend_color.any_nonzeros)
 		si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
 	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index cac1d01559..80a77a8f1f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1067,6 +1067,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 	sscreen->tess_offchip_block_dw_size =
 		sscreen->b.family == CHIP_HAWAII ? 4096 : 8192;
 
+	/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
+	 * on SI. */
+	sscreen->has_clear_state = sscreen->b.chip_class >= CIK;
+
 	sscreen->has_distributed_tess =
 		sscreen->b.chip_class >= VI &&
 		sscreen->b.info.max_se >= 2;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ee0ab1b37b..8d822874d5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -87,6 +87,7 @@ struct si_screen {
 	struct r600_common_screen	b;
 	unsigned			gs_table_depth;
 	unsigned			tess_offchip_block_dw_size;
+	bool				has_clear_state;
 	bool				has_distributed_tess;
 	bool				has_draw_indirect_multi;
 	bool				has_ds_bpermute;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2c413a4c36..8010df6584 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4424,8 +4424,12 @@ static void si_init_config(struct si_context *sctx)
 	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
 	unsigned raster_config, raster_config_1;
 	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
+	bool has_clear_state = sscreen->has_clear_state;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 
+	/* Only SI can disable CLEAR_STATE for now. */
+	assert(has_clear_state || sscreen->b.chip_class == SI);
+
 	if (!pm4)
 		return;
 
@@ -4434,11 +4438,15 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
 	si_pm4_cmd_end(pm4, false);
 
-	si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
-	si_pm4_cmd_add(pm4, 0);
-	si_pm4_cmd_end(pm4, false);
+	if (has_clear_state) {
+		si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
+		si_pm4_cmd_add(pm4, 0);
+		si_pm4_cmd_end(pm4, false);
+	}
 
 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+	if (!has_clear_state)
+		si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
 
 	/* FIXME calculate these values somehow ??? */
 	if (sctx->b.chip_class <= VI) {
@@ -4446,7 +4454,15 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
 	}
 
+	if (!has_clear_state) {
+		si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
+		si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+		si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+	}
+
 	si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+	if (!has_clear_state)
+		si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
 	if (sctx->b.chip_class < CIK)
 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
@@ -4454,6 +4470,9 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
 	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
 
+	if (!has_clear_state)
+		si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+
 	switch (sctx->screen->b.family) {
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
@@ -4564,6 +4583,26 @@ static void si_init_config(struct si_context *sctx)
 			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
 	}
 
+	if (!has_clear_state) {
+		si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+		si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
+			       S_028230_ER_TRI(0xA) |
+			       S_028230_ER_POINT(0xA) |
+			       S_028230_ER_RECT(0xA) |
+			       /* Required by DX10_DIAMOND_TEST_ENA: */
+			       S_028230_ER_LINE_LR(0x1A) |
+			       S_028230_ER_LINE_RL(0x26) |
+			       S_028230_ER_LINE_TB(0xA) |
+			       S_028230_ER_LINE_BT(0xA));
+		/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
+		si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+		si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
+		si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+		si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+		si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
+	}
+
 	if (sctx->b.chip_class >= GFX9) {
 		si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
 		si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
@@ -4640,6 +4679,9 @@ static void si_init_config(struct si_context *sctx)
 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
 
 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
+	} else if (!has_clear_state) {
+		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
 	}
 
 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);




More information about the mesa-commit mailing list