[Mesa-dev] [PATCH 088/140] radeonsi/gfx9: trivial shader and ring changes

Marek Olšák maraeo at gmail.com
Mon Mar 20 22:43:38 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0696582..0087eeb 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -20,20 +20,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *      Christian König <christian.koenig at amd.com>
  *      Marek Olšák <maraeo at gmail.com>
  */
 
 #include "si_pipe.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "radeon/r600_cs.h"
 
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 
 #include "util/disk_cache.h"
@@ -456,42 +457,42 @@ static void si_shader_ls(struct si_shader *shader)
 
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			   S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
 			   S_00B528_DX10_CLAMP(1) |
 			   S_00B528_FLOAT_MODE(shader->config.float_mode);
 	shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
-static void si_shader_hs(struct si_shader *shader)
+static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	uint64_t va;
 
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
 		return;
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
 	si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
 		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
 		       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		       S_00B428_DX10_CLAMP(1) |
 		       S_00B428_FLOAT_MODE(shader->config.float_mode));
 	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
 		       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
-		       S_00B42C_OC_LDS_EN(1) |
+		       S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) |
 		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	unsigned num_user_sgprs;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 	unsigned oc_lds_en;
@@ -923,21 +924,21 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen,
 	switch (shader->selector->type) {
 	case PIPE_SHADER_VERTEX:
 		if (shader->key.as_ls)
 			si_shader_ls(shader);
 		else if (shader->key.as_es)
 			si_shader_es(sscreen, shader);
 		else
 			si_shader_vs(sscreen, shader, NULL);
 		break;
 	case PIPE_SHADER_TESS_CTRL:
-		si_shader_hs(shader);
+		si_shader_hs(sscreen, shader);
 		break;
 	case PIPE_SHADER_TESS_EVAL:
 		if (shader->key.as_es)
 			si_shader_es(sscreen, shader);
 		else
 			si_shader_vs(sscreen, shader, NULL);
 		break;
 	case PIPE_SHADER_GEOMETRY:
 		si_shader_gs(shader);
 		break;
@@ -2117,22 +2118,25 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 
 	min_esgs_ring_size = align(min_esgs_ring_size, alignment);
 	esgs_ring_size = align(esgs_ring_size, alignment);
 	gsvs_ring_size = align(gsvs_ring_size, alignment);
 
 	esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
 	gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
 
 	/* Some rings don't have to be allocated if shaders don't use them.
 	 * (e.g. no varyings between ES and GS or GS and VS)
+	 *
+	 * GFX9 doesn't have the ESGS ring.
 	 */
-	bool update_esgs = esgs_ring_size &&
+	bool update_esgs = sctx->b.chip_class <= VI &&
+			   esgs_ring_size &&
 			   (!sctx->esgs_ring ||
 			    sctx->esgs_ring->width0 < esgs_ring_size);
 	bool update_gsvs = gsvs_ring_size &&
 			   (!sctx->gsvs_ring ||
 			    sctx->gsvs_ring->width0 < gsvs_ring_size);
 
 	if (!update_esgs && !update_gsvs)
 		return true;
 
 	if (update_esgs) {
@@ -2156,23 +2160,25 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 		if (!sctx->gsvs_ring)
 			return false;
 	}
 
 	/* Create the "init_config_gs_rings" state. */
 	pm4 = CALLOC_STRUCT(si_pm4_state);
 	if (!pm4)
 		return false;
 
 	if (sctx->b.chip_class >= CIK) {
-		if (sctx->esgs_ring)
+		if (sctx->esgs_ring) {
+			assert(sctx->b.chip_class <= VI);
 			si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
 				       sctx->esgs_ring->width0 / 256);
+		}
 		if (sctx->gsvs_ring)
 			si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE,
 				       sctx->gsvs_ring->width0 / 256);
 	} else {
 		if (sctx->esgs_ring)
 			si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE,
 				       sctx->esgs_ring->width0 / 256);
 		if (sctx->gsvs_ring)
 			si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
 				       sctx->gsvs_ring->width0 / 256);
@@ -2187,20 +2193,21 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 		si_init_config_add_vgt_flush(sctx);
 		si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
 	}
 
 	/* Flush the context to re-emit both init_config states. */
 	sctx->b.initial_gfx_cs_size = 0; /* force flush */
 	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
 
 	/* Set ring bindings. */
 	if (sctx->esgs_ring) {
+		assert(sctx->b.chip_class <= VI);
 		si_set_ring_buffer(&sctx->b.b, SI_ES_RING_ESGS,
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   true, true, 4, 64, 0);
 		si_set_ring_buffer(&sctx->b.b, SI_GS_RING_ESGS,
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   false, false, 0, 0, 0);
 	}
 	if (sctx->gsvs_ring) {
 		si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
 				   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
@@ -2423,20 +2430,23 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
 	/* Append these registers to the init config state. */
 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= VI)
 			--max_offchip_buffers;
 
 		si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
 			       S_030938_SIZE(sctx->tf_ring->width0 / 4));
 		si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
 			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
+		if (sctx->b.chip_class >= GFX9)
+			si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
+				       r600_resource(sctx->tf_ring)->gpu_address >> 40);
 		si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
 		             S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
 		             S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
 	} else {
 		assert(offchip_granularity == V_03093C_X_8K_DWORDS);
 		si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
 			       S_008988_SIZE(sctx->tf_ring->width0 / 4));
 		si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
 			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
 		si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
-- 
2.7.4



More information about the mesa-dev mailing list