[Mesa-dev] [PATCH 03/53] r600: make adjust_gprs use hw stages.

Dave Airlie airlied at gmail.com
Sun Nov 29 22:20:12 PST 2015


From: Dave Airlie <airlied at redhat.com>

This changes the r600 specific GPR adjustment code
to use the stage defines, and arrays.

This is prep work for the tess changes later.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/r600_pipe.h  |   2 +-
 src/gallium/drivers/r600/r600_state.c | 117 ++++++++++++++++++----------------
 2 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 36fa1c9..0e57efe 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -432,7 +432,7 @@ struct r600_context {
 	/* Hardware info. */
 	boolean				has_vertex_cache;
 	boolean				keep_tiling_flags;
-	unsigned			default_ps_gprs, default_vs_gprs;
+	unsigned			default_gprs[EG_NUM_HW_STAGES];
 	unsigned			r6xx_num_clause_temp_gprs;
 
 	/* Miscellaneous state objects. */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c2d4abc..a16d4bd 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2044,57 +2044,62 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 /* Adjust GPR allocation on R6xx/R7xx */
 bool r600_adjust_gprs(struct r600_context *rctx)
 {
-	unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr;
-	unsigned num_vs_gprs, num_es_gprs, num_gs_gprs;
-	unsigned new_num_ps_gprs = num_ps_gprs;
-	unsigned new_num_vs_gprs, new_num_es_gprs, new_num_gs_gprs;
-	unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
-	unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
-	unsigned cur_num_gs_gprs = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
-	unsigned cur_num_es_gprs = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
-	unsigned def_num_ps_gprs = rctx->default_ps_gprs;
-	unsigned def_num_vs_gprs = rctx->default_vs_gprs;
-	unsigned def_num_gs_gprs = 0;
-	unsigned def_num_es_gprs = 0;
+	unsigned num_gprs[R600_NUM_HW_STAGES];
+	unsigned new_gprs[R600_NUM_HW_STAGES];
+	unsigned cur_gprs[R600_NUM_HW_STAGES];
+	unsigned def_gprs[R600_NUM_HW_STAGES];
 	unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
-	/* hardware will reserve twice num_clause_temp_gprs */
-	unsigned max_gprs = def_num_gs_gprs + def_num_es_gprs + def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2;
+	unsigned max_gprs;
 	unsigned tmp, tmp2;
+	unsigned i;
+	bool need_recalc = false, use_default = true;
+
+	/* hardware will reserve twice num_clause_temp_gprs */
+	max_gprs = def_num_clause_temp_gprs * 2;
+	for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+		def_gprs[i] = rctx->default_gprs[i];
+		max_gprs += def_gprs[i];
+	}
 
+	cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+	cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+	cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+	cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+
+	num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr;
 	if (rctx->gs_shader) {
-		num_es_gprs = rctx->vs_shader->current->shader.bc.ngpr;
-		num_gs_gprs = rctx->gs_shader->current->shader.bc.ngpr;
-		num_vs_gprs = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
+		num_gprs[R600_HW_STAGE_ES] = rctx->vs_shader->current->shader.bc.ngpr;
+		num_gprs[R600_HW_STAGE_GS] = rctx->gs_shader->current->shader.bc.ngpr;
+		num_gprs[R600_HW_STAGE_VS] = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr;
 	} else {
-		num_es_gprs = 0;
-		num_gs_gprs = 0;
-		num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr;
+		num_gprs[R600_HW_STAGE_ES] = 0;
+		num_gprs[R600_HW_STAGE_GS] = 0;
+		num_gprs[R600_HW_STAGE_VS] = rctx->vs_shader->current->shader.bc.ngpr;
+	}
+
+	for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+		new_gprs[i] = num_gprs[i];
+		if (new_gprs[i] > cur_gprs[i])
+			need_recalc = true;
+		if (new_gprs[i] > def_gprs[i])
+			use_default = false;
 	}
-	new_num_vs_gprs = num_vs_gprs;
-	new_num_es_gprs = num_es_gprs;
-	new_num_gs_gprs = num_gs_gprs;
 
 	/* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */
-	if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs ||
-	    new_num_es_gprs > cur_num_es_gprs || new_num_gs_gprs > cur_num_gs_gprs) {
-		/* try to use switch back to default */
-		if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs ||
-		    new_num_gs_gprs > def_num_gs_gprs || new_num_es_gprs > def_num_es_gprs) {
-			/* always privilege vs stage so that at worst we have the
-			 * pixel stage producing wrong output (not the vertex
-			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
-			new_num_vs_gprs = num_vs_gprs;
-			new_num_gs_gprs = num_gs_gprs;
-			new_num_es_gprs = num_es_gprs;
-		} else {
-			new_num_ps_gprs = def_num_ps_gprs;
-			new_num_vs_gprs = def_num_vs_gprs;
-			new_num_es_gprs = def_num_es_gprs;
-			new_num_gs_gprs = def_num_gs_gprs;
-		}
-	} else {
+	if (!need_recalc)
 		return true;
+
+	/* try to use switch back to default */
+	if (!use_default) {
+		/* always privilege vs stage so that at worst we have the
+		 * pixel stage producing wrong output (not the vertex
+		 * stage) */
+		new_gprs[R600_HW_STAGE_PS] = max_gprs - def_num_clause_temp_gprs * 2;
+		for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++)
+			new_gprs[R600_HW_STAGE_PS] -= new_gprs[i];
+	} else {
+		for (i = 0; i < 4; i++)
+			new_gprs[i] = def_gprs[i];
 	}
 
 	/* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <=
@@ -2103,21 +2108,22 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 	 * it will lockup. So in this case just discard the draw command
 	 * and don't change the current gprs repartitions.
 	 */
-	if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs ||
-	    num_gs_gprs > new_num_gs_gprs || num_es_gprs > new_num_es_gprs) {
-		R600_ERR("shaders require too many register (%d + %d + %d + %d) "
-			 "for a combined maximum of %d\n",
-			 num_ps_gprs, num_vs_gprs, num_es_gprs, num_gs_gprs, max_gprs);
-		return false;
+	for (i = 0; i < R600_NUM_HW_STAGES; i++) {
+		if (num_gprs[i] > new_gprs[i]) {
+			R600_ERR("shaders require too many register (%d + %d + %d + %d) "
+				 "for a combined maximum of %d\n",
+				 num_gprs[R600_HW_STAGE_PS], num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], num_gprs[R600_HW_STAGE_GS], max_gprs);
+			return false;
+		}
 	}
 
 	/* in some case we endup recomputing the current value */
-	tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
-		S_008C04_NUM_VS_GPRS(new_num_vs_gprs) |
+	tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
+		S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
 		S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
 
-	tmp2 = S_008C08_NUM_ES_GPRS(new_num_es_gprs) |
-		S_008C08_NUM_GS_GPRS(new_num_gs_gprs);
+	tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
+		S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
 	if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
 		rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
 		rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
@@ -2285,8 +2291,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
 		break;
 	}
 
-	rctx->default_ps_gprs = num_ps_gprs;
-	rctx->default_vs_gprs = num_vs_gprs;
+	rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs;
+	rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs;
+	rctx->default_gprs[R600_HW_STAGE_GS] = 0;
+	rctx->default_gprs[R600_HW_STAGE_ES] = 0;
+
 	rctx->r6xx_num_clause_temp_gprs = num_temp_gprs;
 
 	/* SQ_CONFIG */
-- 
2.5.0



More information about the mesa-dev mailing list