[Mesa-dev] [PATCH 34/53] r600/eg: workaround bug with tess shader and dynamic GPRs.

Dave Airlie airlied at gmail.com
Sun Nov 29 22:20:43 PST 2015


From: Dave Airlie <airlied at redhat.com>

When using tessellation on eg/ni chipsets, we must disable
dynamic GPRs to workaround a hw bug where the GPU hangs
when too many things get queued.

This implements something like the r600 code to emit
the transition between static and dynamic GPRs, and to
statically allocate GPRs when tessellation is enabled.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/evergreen_compute.c |   6 +-
 src/gallium/drivers/r600/evergreen_state.c   | 222 ++++++++++++++++++++-------
 src/gallium/drivers/r600/r600_hw_context.c   |   2 +-
 src/gallium/drivers/r600/r600_pipe.h         |   8 +-
 src/gallium/drivers/r600/r600_state_common.c |   7 +
 5 files changed, 185 insertions(+), 60 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 010d109..c07cee1 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -432,6 +432,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	 */
 	r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
 
+	/* emit config state */
+	if (ctx->b.chip_class == EVERGREEN)
+		r600_emit_atom(ctx, &ctx->config_state.atom);
+
 	ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
 	r600_flush_emit(ctx);
 
@@ -791,7 +795,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 
 	/* Config Registers */
 	if (ctx->b.chip_class < CAYMAN)
-		evergreen_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
+		evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family,
 					   ctx->screen->b.info.drm_minor);
 	else
 		cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index edc6f28..b3109c7 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -869,6 +869,33 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
 						    tex->width0, tex->height0, 0);
 }
 
+static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+	struct r600_config_state *a = (struct r600_config_state*)atom;
+
+	radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
+	if (a->dyn_gpr_enabled) {
+		radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+	} else {
+		radeon_emit(cs, a->sq_gpr_resource_mgmt_1);
+		radeon_emit(cs, a->sq_gpr_resource_mgmt_2);
+		radeon_emit(cs, a->sq_gpr_resource_mgmt_3);
+	}
+	radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8));
+	if (a->dyn_gpr_enabled) {
+		radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+				       S_028838_PS_GPRS(0x1e) |
+				       S_028838_VS_GPRS(0x1e) |
+				       S_028838_GS_GPRS(0x1e) |
+				       S_028838_ES_GPRS(0x1e) |
+				       S_028838_HS_GPRS(0x1e) |
+				       S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+	}
+}
+
 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
@@ -2553,10 +2580,10 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 	eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
 }
 
-void evergreen_init_common_regs(struct r600_command_buffer *cb,
-	enum chip_class ctx_chip_class,
-	enum radeon_family ctx_family,
-	int ctx_drm_minor)
+void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb,
+				enum chip_class ctx_chip_class,
+				enum radeon_family ctx_family,
+				int ctx_drm_minor)
 {
 	int ps_prio;
 	int vs_prio;
@@ -2567,31 +2594,23 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
 	int cs_prio;
 	int ls_prio;
 
-	int num_ps_gprs;
-	int num_vs_gprs;
-	int num_gs_gprs;
-	int num_es_gprs;
-	int num_hs_gprs;
-	int num_ls_gprs;
-	int num_temp_gprs;
-
 	unsigned tmp;
 
 	ps_prio = 0;
 	vs_prio = 1;
 	gs_prio = 2;
 	es_prio = 3;
-	hs_prio = 0;
-	ls_prio = 0;
+	hs_prio = 3;
+	ls_prio = 3;
 	cs_prio = 0;
 
-	num_ps_gprs = 93;
-	num_vs_gprs = 46;
-	num_temp_gprs = 4;
-	num_gs_gprs = 31;
-	num_es_gprs = 31;
-	num_hs_gprs = 23;
-	num_ls_gprs = 23;
+	rctx->default_gprs[R600_HW_STAGE_PS] = 93;
+	rctx->default_gprs[R600_HW_STAGE_VS] = 46;
+	rctx->r6xx_num_clause_temp_gprs = 4;
+	rctx->default_gprs[R600_HW_STAGE_GS] = 31;
+	rctx->default_gprs[R600_HW_STAGE_ES] = 31;
+	rctx->default_gprs[EG_HW_STAGE_HS] = 23;
+	rctx->default_gprs[EG_HW_STAGE_LS] = 23;
 
 	tmp = 0;
 	switch (ctx_family) {
@@ -2614,40 +2633,12 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
 	tmp |= S_008C00_GS_PRIO(gs_prio);
 	tmp |= S_008C00_ES_PRIO(es_prio);
 
-	/* enable dynamic GPR resource management */
-	if (ctx_drm_minor >= 7) {
-		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
-		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-		/* always set temp clauses */
-		r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-		r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
-		r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
-		r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-		r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
-		r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
-					S_028838_PS_GPRS(0x1e) |
-					S_028838_VS_GPRS(0x1e) |
-					S_028838_GS_GPRS(0x1e) |
-					S_028838_ES_GPRS(0x1e) |
-					S_028838_HS_GPRS(0x1e) |
-					S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
-	} else {
-		r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
-		r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-
-		tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
-		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-		r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-
-		tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
-		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-		r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
+	r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1);
+	r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
 
-		tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
-		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
-		r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
-	}
+	r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+	r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+	r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
 
 	/* The cs checker requires this register to be set. */
 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
@@ -2694,7 +2685,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 	r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 
-	evergreen_init_common_regs(cb, rctx->b.chip_class,
+	evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
 				   rctx->b.family, rctx->screen->b.info.drm_minor);
 
 	family = rctx->b.family;
@@ -3685,7 +3676,11 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	 * or piglit regression).
 	 * !!!
 	 */
-
+	if (rctx->b.chip_class == EVERGREEN) {
+		r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11);
+		if (rctx->screen->b.info.drm_minor >= 7)
+			rctx->config_state.dyn_gpr_enabled = true;
+	}
 	r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
 	/* shader const */
 	r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
@@ -3891,3 +3886,118 @@ void evergreen_set_lds_alloc(struct r600_context *rctx,
 {
 	radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
 }
+
+/* on evergreen if you are running tessellation you need to disable dynamic
+   GPRs to workaround a hardware bug.*/
+bool evergreen_adjust_gprs(struct r600_context *rctx)
+{
+	unsigned num_gprs[EG_NUM_HW_STAGES];
+	unsigned def_gprs[EG_NUM_HW_STAGES];
+	unsigned cur_gprs[EG_NUM_HW_STAGES];
+	unsigned new_gprs[EG_NUM_HW_STAGES];
+	unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+	unsigned max_gprs;
+	unsigned i;
+	unsigned total_gprs;
+	unsigned tmp[3];
+	bool rework = false, set_default = false, set_dirty = false;
+	max_gprs = 0;
+	for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+		def_gprs[i] = rctx->default_gprs[i];
+		max_gprs += def_gprs[i];
+	}
+	max_gprs += def_num_clause_temp_gprs * 2;
+
+	/* if we have no TESS and dyn gpr is enabled then do nothing. */
+	if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) {
+		if (rctx->config_state.dyn_gpr_enabled)
+			return true;
+
+		/* transition back to dyn gpr enabled state */
+		rctx->config_state.dyn_gpr_enabled = true;
+		r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+		rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+		return true;
+	}
+
+
+	/* gather required shader gprs */
+	for (i = 0; i < EG_NUM_HW_STAGES; i++)
+		num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
+
+	cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+	cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+	cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+	cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+	cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+	cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+
+	total_gprs = 0;
+	for (i = 0; i < EG_NUM_HW_STAGES; i++)	{
+		new_gprs[i] = num_gprs[i];
+		total_gprs += num_gprs[i];
+	}
+
+	if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
+		return false;
+
+	for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+		if (new_gprs[i] > cur_gprs[i]) {
+			rework = true;
+			break;
+		}
+	}
+
+	if (rctx->config_state.dyn_gpr_enabled) {
+		set_dirty = true;
+		rctx->config_state.dyn_gpr_enabled = false;
+	}
+
+	if (rework) {
+		set_default = true;
+		for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+			if (new_gprs[i] > def_gprs[i])
+				set_default = false;
+		}
+
+		if (set_default) {
+			for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+				new_gprs[i] = def_gprs[i];
+			}
+		} else {
+			unsigned ps_value = max_gprs;
+
+			ps_value -= (def_num_clause_temp_gprs * 2);
+			for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
+				ps_value -= new_gprs[i];
+
+			new_gprs[R600_HW_STAGE_PS] = ps_value;
+		}
+
+		tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
+			S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
+			S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
+
+		tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
+			S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
+
+		tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
+			S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
+
+		if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
+		    rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
+		    rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
+			rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
+			rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
+			rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
+			set_dirty = true;
+		}
+	}
+
+
+	if (set_dirty) {
+		r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+		rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+	}
+	return true;
+}
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index b7845b5..90b99e8 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -310,7 +310,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
 	ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
 	r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
-	if (ctx->b.chip_class < EVERGREEN) {
+	if (ctx->b.chip_class <= EVERGREEN) {
 		r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
 	}
 	r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 78f3a59..04248b4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -206,6 +206,8 @@ struct r600_config_state {
 	struct r600_atom atom;
 	unsigned sq_gpr_resource_mgmt_1;
 	unsigned sq_gpr_resource_mgmt_2;
+	unsigned sq_gpr_resource_mgmt_3;
+	bool dyn_gpr_enabled;
 };
 
 struct r600_stencil_ref
@@ -441,6 +443,7 @@ struct r600_context {
 	boolean				has_vertex_cache;
 	boolean				keep_tiling_flags;
 	unsigned			default_gprs[EG_NUM_HW_STAGES];
+	unsigned                        current_gprs[EG_NUM_HW_STAGES];
 	unsigned			r6xx_num_clause_temp_gprs;
 
 	/* Miscellaneous state objects. */
@@ -603,7 +606,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 				     const struct pipe_sampler_view *state,
 				     unsigned width0, unsigned height0,
 				     unsigned force_level);
-void evergreen_init_common_regs(struct r600_command_buffer *cb,
+void evergreen_init_common_regs(struct r600_context *ctx,
+				struct r600_command_buffer *cb,
 				enum chip_class ctx_chip_class,
 				enum radeon_family ctx_family,
 				int ctx_drm_minor);
@@ -634,7 +638,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
 void evergreen_init_color_surface_rat(struct r600_context *rctx,
 					struct r600_surface *surf);
 void evergreen_update_db_shader_control(struct r600_context * rctx);
-
+bool evergreen_adjust_gprs(struct r600_context *rctx);
 /* r600_blit.c */
 void r600_init_blit_functions(struct r600_context *rctx);
 void r600_decompress_depth_textures(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ab3313f..351aca9 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1624,6 +1624,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		}
 	}
 
+	if (rctx->b.chip_class == EVERGREEN) {
+		if (!evergreen_adjust_gprs(rctx)) {
+			/* discard rendering */
+			return false;
+		}
+	}
+
 	blend_disable = (rctx->dual_src_blend &&
 			rctx->ps_shader->current->nr_ps_color_outputs < 2);
 
-- 
2.5.0



More information about the mesa-dev mailing list