Mesa (master): radeonsi: hardcode shader WAVE_LIMIT to the maximum value

Fri Oct 20 23:03:55 UTC 2017

Module: Mesa
Branch: master
Commit: 557de3b9ae2e0a1889442bb48f731d69c993a027
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=557de3b9ae2e0a1889442bb48f731d69c993a027

Author: Andres Rodriguez <andresx7 at gmail.com>
Date:   Fri Oct 20 18:02:13 2017 -0400

radeonsi: hardcode shader WAVE_LIMIT to the maximum value

This is part of a cooperative scheduling approach used by radv. All
drivers in the stack must opt-in to resource arbitration, otherwise GL
based apps will be able to ignore system priorities.

We always hardcode the field to its maximum value, instead of attempting
to calculate an approximate usage. In testing, there were no benefits to
using anything other than the maximum.

Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/gallium/drivers/radeonsi/si_state.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ae45e1a6b2..8e3717e97f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4994,11 +4994,15 @@ static void si_init_config(struct si_context *sctx)
 
 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= GFX9) {
-			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+				       S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
 		} else {
-			si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
-			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-			si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+			si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+				       S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+				       S_00B41C_WAVE_LIMIT(0x3F));
+			si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+				       S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
 
 			/* If this is 0, Bonaire can hang even if GS isn't being used.
 			 * Other chips are unaffected. These are suboptimal values,
@@ -5008,7 +5012,8 @@ static void si_init_config(struct si_context *sctx)
 				       S_028A44_ES_VERTS_PER_SUBGRP(64) |
 				       S_028A44_GS_PRIMS_PER_SUBGRP(4));
 		}
-		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+			       S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
 
 		/* Compute LATE_ALLOC_VS.LIMIT. */
 		unsigned num_cu_per_sh = sscreen->b.info.num_good_compute_units /
@@ -5040,10 +5045,12 @@ static void si_init_config(struct si_context *sctx)
 
 		/* VS can't execute on one CU if the limit is > 2. */
 		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
-			       S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff));
+			       S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) |
+			       S_00B118_WAVE_LIMIT(0x3F));
 		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
 			       S_00B11C_LIMIT(late_alloc_limit));
-		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+			       S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
 	}
 
 	if (sctx->b.chip_class >= VI) {