[Mesa-dev] [PATCH 12/13] radeonsi: make SI_SGPR_VERTEX_BUFFERS the last user SGPR input

Marek Olšák maraeo at gmail.com
Sat Feb 17 19:43:27 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

so that it can be removed and replaced with inline VBO descriptors,
and the pointer can be packed in unused bits of VBO descriptors.
This also removes the pointer from merged TES-GS where it's useless.
---
 src/gallium/drivers/radeonsi/si_descriptors.c   | 14 ++++++++--
 src/gallium/drivers/radeonsi/si_shader.c        | 16 +++++++-----
 src/gallium/drivers/radeonsi/si_shader.h        |  9 +++----
 src/gallium/drivers/radeonsi/si_state_shaders.c | 34 ++++++++++++++++++++-----
 4 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index f6bc3cf..7fdac23 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2181,23 +2181,33 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
 						 sh_base[PIPE_SHADER_TESS_CTRL]);
 		si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
 						 sh_base[PIPE_SHADER_GEOMETRY]);
 	}
 
 	sctx->shader_pointers_dirty &=
 		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
 	if (sctx->vertex_buffer_pointer_dirty) {
 		struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-		unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] +
-				     SI_SGPR_VERTEX_BUFFERS * 4;
 
+		/* Find the location of the VB descriptor pointer. */
+		/* TODO: In the future, the pointer will be packed in unused
+		 *       bits of the first 2 VB descriptors. */
+		unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
+		if (sctx->b.chip_class >= GFX9) {
+			if (sctx->tes_shader.cso)
+				sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
+			else if (sctx->gs_shader.cso)
+				sh_dw_offset = GFX9_GS_NUM_USER_SGPR;
+		}
+
+		unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
 		si_emit_shader_pointer_head(cs, sh_offset, 1);
 		si_emit_shader_pointer_body(sctx->screen, cs,
 					    sctx->vb_descriptors_buffer->gpu_address +
 					    sctx->vb_descriptors_offset);
 		sctx->vertex_buffer_pointer_dirty = false;
 	}
 
 	if (sctx->graphics_bindless_pointer_dirty) {
 		si_emit_global_shader_pointers(sctx,
 					       &sctx->bindless_descriptors);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 445d994..dfbb1f2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4541,22 +4541,20 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx,
 {
 	ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
 		ac_array_in_const32_addr_space(ctx->v4i32));
 	ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR,
 		ac_array_in_const32_addr_space(ctx->v8i32));
 }
 
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
 					    struct si_function_info *fninfo)
 {
-	ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
-		ac_array_in_const32_addr_space(ctx->v4i32));
 	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
 	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
 	add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
 	ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
 				   struct si_function_info *fninfo,
 				   unsigned *num_prolog_vgprs)
 {
@@ -4644,20 +4642,22 @@ static void create_function(struct si_shader_context *ctx)
 				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
 			}
 
 			/* VGPRs */
 			declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
 			break;
 		}
 
 		declare_per_stage_desc_pointers(ctx, &fninfo, true);
 		declare_vs_specific_input_sgprs(ctx, &fninfo);
+		ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+			ac_array_in_const32_addr_space(ctx->v4i32));
 
 		if (shader->key.as_es) {
 			ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		} else if (shader->key.as_ls) {
 			/* no extra parameters */
 		} else {
 			if (shader->is_gs_copy_shader) {
 				fninfo.num_params = ctx->param_rw_buffers + 1;
 				fninfo.num_sgpr_params = fninfo.num_params;
 			}
@@ -4716,20 +4716,24 @@ static void create_function(struct si_shader_context *ctx)
 						ctx->type == PIPE_SHADER_VERTEX);
 		declare_vs_specific_input_sgprs(ctx, &fninfo);
 
 		if (!HAVE_32BIT_POINTERS) {
 			declare_samplers_and_images(ctx, &fninfo,
 						    ctx->type == PIPE_SHADER_TESS_CTRL);
 		}
 		ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+		if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
+			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+		ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+			ac_array_in_const32_addr_space(ctx->v4i32));
 
 		/* VGPRs (first TCS, then VS) */
 		add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
 		add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_rel_ids);
 
 		if (ctx->type == PIPE_SHADER_VERTEX) {
 			declare_vs_input_vgprs(ctx, &fninfo,
 					       &num_prolog_vgprs);
 
 			/* LS return values are inputs to the TCS main shader part. */
@@ -4773,30 +4777,31 @@ static void create_function(struct si_shader_context *ctx)
 						(ctx->type == PIPE_SHADER_VERTEX ||
 						 ctx->type == PIPE_SHADER_TESS_EVAL));
 		if (ctx->type == PIPE_SHADER_VERTEX) {
 			declare_vs_specific_input_sgprs(ctx, &fninfo);
 		} else {
 			/* TESS_EVAL (and also GEOMETRY):
 			 * Declare as many input SGPRs as the VS has. */
 			ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 			ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-			if (!HAVE_32BIT_POINTERS)
-				add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 			ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 		}
 
 		if (!HAVE_32BIT_POINTERS) {
 			declare_samplers_and_images(ctx, &fninfo,
 						    ctx->type == PIPE_SHADER_GEOMETRY);
 		}
+		if (ctx->type == PIPE_SHADER_VERTEX) {
+			ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+				ac_array_in_const32_addr_space(ctx->v4i32));
+		}
 
 		/* VGPRs (first GS, then VS/TES) */
 		ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
 		ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
 		add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_prim_id);
 		add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_invocation_id);
 		ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
 
 		if (ctx->type == PIPE_SHADER_VERTEX) {
 			declare_vs_input_vgprs(ctx, &fninfo,
@@ -7307,21 +7312,20 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 		ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
 		ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
-		add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		if (!HAVE_32BIT_POINTERS)
 			add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 	} else {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1b1f650..471f2e9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -162,25 +162,21 @@ enum {
 #if !HAVE_32BIT_POINTERS
 	SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
 #endif
 	SI_SGPR_SAMPLERS_AND_IMAGES,
 #if !HAVE_32BIT_POINTERS
 	SI_SGPR_SAMPLERS_AND_IMAGES_HI,
 #endif
 	SI_NUM_RESOURCE_SGPRS,
 
 	/* all VS variants */
-	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
-#if !HAVE_32BIT_POINTERS
-	SI_SGPR_VERTEX_BUFFERS_HI,
-#endif
-	SI_SGPR_BASE_VERTEX,
+	SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_START_INSTANCE,
 	SI_SGPR_DRAWID,
 	SI_SGPR_VS_STATE_BITS,
 	SI_VS_NUM_USER_SGPR,
 
 	SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS,
 
 	/* TES */
 	SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_TES_OFFCHIP_ADDR,
@@ -202,20 +198,23 @@ enum {
 	/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
 	GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
 	GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
 	GFX9_MERGED_NUM_USER_SGPR,
 #endif
 
 	/* GFX9: Merged LS-HS (VS-TCS) only. */
 	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
 	GFX9_SGPR_TCS_OUT_OFFSETS,
 	GFX9_SGPR_TCS_OUT_LAYOUT,
+#if !HAVE_32BIT_POINTERS
+	GFX9_SGPR_align_for_vb_pointer,
+#endif
 	GFX9_TCS_NUM_USER_SGPR,
 
 	/* GS limits */
 	GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
 	GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
 	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2),
 
 	/* PS only */
 	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
 	SI_PS_NUM_USER_SGPR,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 182f7c0..4b1ff94 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -433,20 +433,31 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
 static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
 {
 	if (shader->pm4)
 		si_pm4_clear_state(shader->pm4);
 	else
 		shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
 	return shader->pm4;
 }
 
+static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
+{
+	/* Add the pointer to VBO descriptors. */
+	if (HAVE_32BIT_POINTERS) {
+		return num_always_on_user_sgprs + 1;
+	} else {
+		assert(num_always_on_user_sgprs % 2 == 0);
+		return num_always_on_user_sgprs + 2;
+	}
+}
+
 static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	unsigned vgpr_comp_cnt;
 	uint64_t va;
 
 	assert(sscreen->info.chip_class <= VI);
 
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
@@ -462,21 +473,21 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 	vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
 
 	si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
 
 	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			   S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
 		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
 			   S_00B528_DX10_CLAMP(1) |
 			   S_00B528_FLOAT_MODE(shader->config.float_mode);
-	shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
+	shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) |
 			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
 static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
 	uint64_t va;
 	unsigned ls_vgpr_comp_cnt = 0;
 
 	pm4 = si_get_shader_pm4_state(shader);
@@ -489,23 +500,26 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 	if (sscreen->info.chip_class >= GFX9) {
 		si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
 		si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
 
 		/* We need at least 2 components for LS.
 		 * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
 		 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
 		 */
 		ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
 
+		unsigned num_user_sgprs =
+			si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR);
+
 		shader->config.rsrc2 =
-			S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
-			S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
+			S_00B42C_USER_SGPR(num_user_sgprs) |
+			S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) |
 			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 	} else {
 		si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
 		si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 
 		shader->config.rsrc2 =
 			S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) |
 			S_00B42C_OC_LDS_EN(1) |
 			S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 	}
@@ -536,21 +550,21 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
 	pm4 = si_get_shader_pm4_state(shader);
 	if (!pm4)
 		return;
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
 	if (shader->selector->type == PIPE_SHADER_VERTEX) {
 		/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
 		vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
-		num_user_sgprs = SI_VS_NUM_USER_SGPR;
+		num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
 	} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
 		vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
 		num_user_sgprs = SI_TES_NUM_USER_SGPR;
 	} else
 		unreachable("invalid shader selector type");
 
 	oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
 
 	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
 		       shader->selector->esgs_itemsize / 4);
@@ -743,34 +757,40 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 		 */
 		if (sel->info.uses_invocationid)
 			gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
 		else if (sel->info.uses_primid)
 			gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
 		else if (input_prim >= PIPE_PRIM_TRIANGLES)
 			gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
 		else
 			gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
 
+		unsigned num_user_sgprs;
+		if (es_type == PIPE_SHADER_VERTEX)
+			num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR);
+		else
+			num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
+
 		gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
 
 		si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
 		si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, va >> 40);
 
 		si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
 			       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			       S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
 			       S_00B228_DX10_CLAMP(1) |
 			       S_00B228_FLOAT_MODE(shader->config.float_mode) |
 			       S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
 		si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
-			       S_00B22C_USER_SGPR(GFX9_GS_NUM_USER_SGPR) |
-			       S_00B22C_USER_SGPR_MSB(GFX9_GS_NUM_USER_SGPR >> 5) |
+			       S_00B22C_USER_SGPR(num_user_sgprs) |
+			       S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) |
 			       S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
 			       S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
 			       S_00B22C_LDS_SIZE(gs_info.lds_size) |
 			       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 
 		si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
 			       S_028A44_ES_VERTS_PER_SUBGRP(gs_info.es_verts_per_subgroup) |
 			       S_028A44_GS_PRIMS_PER_SUBGRP(gs_info.gs_prims_per_subgroup) |
 			       S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info.gs_inst_prims_in_subgroup));
 		si_pm4_set_reg(pm4, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
@@ -861,21 +881,21 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
 		/* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
 		 * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
 		 * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
 		 */
 		vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0);
 
 		if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
 			num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
 					 info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
 		} else {
-			num_user_sgprs = SI_VS_NUM_USER_SGPR;
+			num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
 		}
 	} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
 		vgpr_comp_cnt = enable_prim_id ? 3 : 2;
 		num_user_sgprs = SI_TES_NUM_USER_SGPR;
 	} else
 		unreachable("invalid shader selector type");
 
 	/* VS is required to export at least one param. */
 	nparams = MAX2(shader->info.nr_param_exports, 1);
 	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
-- 
2.7.4



More information about the mesa-dev mailing list