Mesa (master): radv: rework vertex/export shader output handling

Dave Airlie airlied at kemper.freedesktop.org
Tue Mar 28 07:40:59 UTC 2017


Module: Mesa
Branch: master
Commit: 931a8d0c9a15df462f14ab40f9ae31c8ecf75376
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=931a8d0c9a15df462f14ab40f9ae31c8ecf75376

Author: Dave Airlie <airlied at redhat.com>
Date:   Tue Mar 28 06:13:09 2017 +1000

radv: rework vertex/export shader output handling

In order to faciliate adding tess support, split the vs/es
output info into a separate block, so we make it easier to
have the tess shaders export the same info.

Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied at redhat.com>

---

 src/amd/common/ac_nir_to_llvm.c  | 54 +++++++++++++++++----------------
 src/amd/common/ac_nir_to_llvm.h  | 30 ++++++++++++-------
 src/amd/vulkan/radv_cmd_buffer.c | 64 ++++++++++++++++++++++------------------
 src/amd/vulkan/radv_pipeline.c   |  8 ++---
 4 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6e36c192c3..cfbdeae1a3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
 			int length = glsl_get_length(variable->type);
 			if (idx == VARYING_SLOT_CLIP_DIST0) {
 				if (ctx->stage == MESA_SHADER_VERTEX)
-					ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
+					ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1;
 				ctx->num_output_clips = length;
 			} else if (idx == VARYING_SLOT_CULL_DIST0) {
 				if (ctx->stage == MESA_SHADER_VERTEX)
-					ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
+					ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1;
 				ctx->num_output_culls = length;
 			}
 			if (length > 4)
@@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 }
 
 static void
-handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
+handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
+		       struct ac_vs_output_info *outinfo)
 {
 	uint32_t param_count = 0;
 	unsigned target;
@@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 						       (1ull << VARYING_SLOT_CULL_DIST0) |
 						       (1ull << VARYING_SLOT_CULL_DIST1));
 
-	ctx->shader_info->vs.prim_id_output = 0xffffffff;
-	ctx->shader_info->vs.layer_output = 0xffffffff;
+	outinfo->prim_id_output = 0xffffffff;
+	outinfo->layer_output = 0xffffffff;
 	if (clip_mask) {
 		LLVMValueRef slots[8];
 		unsigned j;
 
-		if (ctx->shader_info->vs.cull_dist_mask)
-			ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips;
+		if (outinfo->cull_dist_mask)
+			outinfo->cull_dist_mask <<= ctx->num_output_clips;
 
 		i = VARYING_SLOT_CLIP_DIST0;
 		for (j = 0; j < ctx->num_output_clips; j++)
@@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 			   i == VARYING_SLOT_CULL_DIST1) {
 			continue;
 		} else if (i == VARYING_SLOT_PSIZ) {
-			ctx->shader_info->vs.writes_pointsize = true;
+			outinfo->writes_pointsize = true;
 			psize_value = values[0];
 			continue;
 		} else if (i == VARYING_SLOT_LAYER) {
-			ctx->shader_info->vs.writes_layer = true;
+			outinfo->writes_layer = true;
 			layer_value = values[0];
-			ctx->shader_info->vs.layer_output = param_count;
+			outinfo->layer_output = param_count;
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			param_count++;
 		} else if (i == VARYING_SLOT_VIEWPORT) {
-			ctx->shader_info->vs.writes_viewport_index = true;
+			outinfo->writes_viewport_index = true;
 			viewport_index_value = values[0];
 			continue;
 		} else if (i == VARYING_SLOT_PRIMITIVE_ID) {
-			ctx->shader_info->vs.prim_id_output = param_count;
+			outinfo->prim_id_output = param_count;
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			param_count++;
 		} else if (i >= VARYING_SLOT_VAR0) {
-			ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
+			outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			param_count++;
 		}
@@ -4560,9 +4561,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		pos_args[0].out[3] = ctx->f32one;  /* W */
 	}
 
-	uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
-			 (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
-			 (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
+	uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
+			 (outinfo->writes_layer == true ? 4 : 0) |
+			 (outinfo->writes_viewport_index == true ? 8 : 0));
 	if (mask) {
 		pos_args[1].enabled_channels = mask;
 		pos_args[1].valid_mask = 0;
@@ -4574,11 +4575,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		pos_args[1].out[2] = ctx->f32zero; /* Z */
 		pos_args[1].out[3] = ctx->f32zero;  /* W */
 
-		if (ctx->shader_info->vs.writes_pointsize == true)
+		if (outinfo->writes_pointsize == true)
 			pos_args[1].out[0] = psize_value;
-		if (ctx->shader_info->vs.writes_layer == true)
+		if (outinfo->writes_layer == true)
 			pos_args[1].out[2] = layer_value;
-		if (ctx->shader_info->vs.writes_viewport_index == true)
+		if (outinfo->writes_viewport_index == true)
 			pos_args[1].out[3] = viewport_index_value;
 	}
 	for (i = 0; i < 4; i++) {
@@ -4598,12 +4599,13 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 		ac_build_export(&ctx->ac, &pos_args[i]);
 	}
 
-	ctx->shader_info->vs.pos_exports = num_pos_exports;
-	ctx->shader_info->vs.param_exports = param_count;
+	outinfo->pos_exports = num_pos_exports;
+	outinfo->param_exports = param_count;
 }
 
 static void
-handle_es_outputs_post(struct nir_to_llvm_context *ctx)
+handle_es_outputs_post(struct nir_to_llvm_context *ctx,
+		       struct ac_es_output_info *outinfo)
 {
 	int j;
 	uint64_t max_output_written = 0;
@@ -4638,7 +4640,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
 					       1, 1, true, true);
 		}
 	}
-	ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
+	outinfo->esgs_itemsize = (max_output_written + 1) * 16;
 }
 
 static void
@@ -4761,9 +4763,9 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
 	switch (ctx->stage) {
 	case MESA_SHADER_VERTEX:
 		if (ctx->options->key.vs.as_es)
-			handle_es_outputs_post(ctx);
+			handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
 		else
-			handle_vs_outputs_post(ctx);
+			handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
 		break;
 	case MESA_SHADER_FRAGMENT:
 		handle_fs_outputs_post(ctx);
@@ -5170,7 +5172,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 		}
 		idx += slot_inc;
 	}
-	handle_vs_outputs_post(ctx);
+	handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
 }
 
 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index b4c4a73a1c..15afbd7745 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -91,6 +91,23 @@ struct ac_userdata_locations {
 	struct ac_userdata_info shader_data[AC_UD_MAX_UD];
 };
 
+struct ac_vs_output_info {
+	uint8_t clip_dist_mask;
+	uint8_t cull_dist_mask;
+	bool writes_pointsize;
+	bool writes_layer;
+	bool writes_viewport_index;
+	uint32_t prim_id_output;
+	uint32_t layer_output;
+	uint32_t export_mask;
+	unsigned param_exports;
+	unsigned pos_exports;
+};
+
+struct ac_es_output_info {
+	uint32_t esgs_itemsize;
+};
+
 struct ac_shader_variant_info {
 	struct ac_userdata_locations user_sgprs_locs;
 	unsigned num_user_sgprs;
@@ -98,19 +115,10 @@ struct ac_shader_variant_info {
 	unsigned num_input_vgprs;
 	union {
 		struct {
-			unsigned param_exports;
-			unsigned pos_exports;
+			struct ac_vs_output_info outinfo;
+			struct ac_es_output_info es_info;
 			unsigned vgpr_comp_cnt;
-			uint32_t export_mask;
-			bool writes_pointsize;
-			bool writes_layer;
-			bool writes_viewport_index;
 			bool as_es;
-			uint8_t clip_dist_mask;
-			uint8_t cull_dist_mask;
-			uint32_t esgs_itemsize;
-			uint32_t prim_id_output;
-			uint32_t layer_output;
 		} vs;
 		struct {
 			unsigned num_interp;
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index eb2a7b0dde..ce34204b8a 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -502,7 +502,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
 static void
 radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
 		struct radv_pipeline *pipeline,
-		struct radv_shader_variant *shader)
+		struct radv_shader_variant *shader,
+		struct ac_vs_output_info *outinfo)
 {
 	struct radeon_winsys *ws = cmd_buffer->device->ws;
 	uint64_t va = ws->buffer_get_va(shader->bo);
@@ -510,19 +511,19 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
 
 	ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
 
-	export_count = MAX2(1, shader->info.vs.param_exports);
+	export_count = MAX2(1, outinfo->param_exports);
 	radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
 			       S_0286C4_VS_EXPORT_COUNT(export_count - 1));
 
 	radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT,
 			       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-			       S_02870C_POS1_EXPORT_FORMAT(shader->info.vs.pos_exports > 1 ?
+			       S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
 							   V_02870C_SPI_SHADER_4COMP :
 							   V_02870C_SPI_SHADER_NONE) |
-			       S_02870C_POS2_EXPORT_FORMAT(shader->info.vs.pos_exports > 2 ?
+			       S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
 							   V_02870C_SPI_SHADER_4COMP :
 							   V_02870C_SPI_SHADER_NONE) |
-			       S_02870C_POS3_EXPORT_FORMAT(shader->info.vs.pos_exports > 3 ?
+			       S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
 							   V_02870C_SPI_SHADER_4COMP :
 							   V_02870C_SPI_SHADER_NONE));
 
@@ -540,17 +541,17 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
 			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
 
 	unsigned clip_dist_mask, cull_dist_mask, total_mask;
-	clip_dist_mask = shader->info.vs.clip_dist_mask;
-	cull_dist_mask = shader->info.vs.cull_dist_mask;
+	clip_dist_mask = outinfo->clip_dist_mask;
+	cull_dist_mask = outinfo->cull_dist_mask;
 	total_mask = clip_dist_mask | cull_dist_mask;
 
 	radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
-			       S_02881C_USE_VTX_POINT_SIZE(shader->info.vs.writes_pointsize) |
-			       S_02881C_USE_VTX_RENDER_TARGET_INDX(shader->info.vs.writes_layer) |
-			       S_02881C_USE_VTX_VIEWPORT_INDX(shader->info.vs.writes_viewport_index) |
-			       S_02881C_VS_OUT_MISC_VEC_ENA(shader->info.vs.writes_pointsize ||
-							    shader->info.vs.writes_layer ||
-							    shader->info.vs.writes_viewport_index) |
+			       S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+			       S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+			       S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+			       S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize ||
+							    outinfo->writes_layer ||
+							    outinfo->writes_viewport_index) |
 			       S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
 			       S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
 			       pipeline->graphics.raster.pa_cl_vs_out_cntl |
@@ -558,12 +559,13 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
 			       clip_dist_mask);
 
 	radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
-			       S_028AB4_REUSE_OFF(shader->info.vs.writes_viewport_index));
+			       S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
 }
 
 static void
 radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
-		struct radv_shader_variant *shader)
+		struct radv_shader_variant *shader,
+		struct ac_es_output_info *outinfo)
 {
 	struct radeon_winsys *ws = cmd_buffer->device->ws;
 	uint64_t va = ws->buffer_get_va(shader->bo);
@@ -571,7 +573,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
 	ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
 
 	radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-			       shader->info.vs.esgs_itemsize / 4);
+			       outinfo->esgs_itemsize / 4);
 	radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
 	radeon_emit(cmd_buffer->cs, va >> 8);
 	radeon_emit(cmd_buffer->cs, va >> 40);
@@ -590,9 +592,9 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
 	vs = pipeline->shaders[MESA_SHADER_VERTEX];
 
 	if (vs->info.vs.as_es)
-		radv_emit_hw_es(cmd_buffer, vs);
+		radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
 	else
-		radv_emit_hw_vs(cmd_buffer, pipeline, vs);
+		radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
 
 	radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
 }
@@ -666,7 +668,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
 	radeon_emit(cmd_buffer->cs, gs->rsrc1);
 	radeon_emit(cmd_buffer->cs, gs->rsrc2);
 
-	radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader);
+	radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
 
 	struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
 							     AC_UD_GS_VS_RING_STRIDE_ENTRIES);
@@ -696,10 +698,14 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	unsigned ps_offset = 0;
 	unsigned z_order;
+	struct ac_vs_output_info *outinfo;
 	assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
 	ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
 	vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX];
+
+	outinfo = &vs->info.vs.outinfo;
+
 	va = ws->buffer_get_va(ps->bo);
 	ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
 
@@ -757,20 +763,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
 		ps_offset++;
 	}
 
-	if (ps->info.fs.prim_id_input && (vs->info.vs.prim_id_output != 0xffffffff)) {
+	if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
 		unsigned vs_offset, flat_shade;
 		unsigned val;
-		vs_offset = vs->info.vs.prim_id_output;
+		vs_offset = outinfo->prim_id_output;
 		flat_shade = true;
 		val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
 		radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
 		++ps_offset;
 	}
 
-	if (ps->info.fs.layer_input && (vs->info.vs.layer_output != 0xffffffff)) {
+	if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
 		unsigned vs_offset, flat_shade;
 		unsigned val;
-		vs_offset = vs->info.vs.layer_output;
+		vs_offset = outinfo->layer_output;
 		flat_shade = true;
 		val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
 		radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
@@ -785,20 +791,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
 			continue;
 
 
-		if (!(vs->info.vs.export_mask & (1u << i))) {
+		if (!(outinfo->export_mask & (1u << i))) {
 			radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
 					       S_028644_OFFSET(0x20));
 			++ps_offset;
 			continue;
 		}
 
-		vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1));
-		if (vs->info.vs.prim_id_output != 0xffffffff) {
-			if (vs_offset >= vs->info.vs.prim_id_output)
+		vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
+		if (outinfo->prim_id_output != 0xffffffff) {
+			if (vs_offset >= outinfo->prim_id_output)
 				vs_offset++;
 		}
-		if (vs->info.vs.layer_output != 0xffffffff) {
-			if (vs_offset >= vs->info.vs.layer_output)
+		if (outinfo->layer_output != 0xffffffff) {
+			if (vs_offset >= outinfo->layer_output)
 			  vs_offset++;
 		}
 		flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 1becb65055..2c710f4eb8 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1467,15 +1467,15 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
 	unsigned alignment = 256 * num_se;
 	/* The maximum size is 63.999 MB per SE. */
 	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
-
+	struct ac_es_output_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
 	struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-	struct ac_shader_variant_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info;
+
 	/* Calculate the minimum size. */
-	unsigned min_esgs_ring_size = align(es_info->vs.esgs_itemsize * gs_vertex_reuse *
+	unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
 					    wave_size, alignment);
 	/* These are recommended sizes, not minimum sizes. */
 	unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
-		es_info->vs.esgs_itemsize * gs_info->gs.vertices_in;
+		es_info->esgs_itemsize * gs_info->gs.vertices_in;
 	unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
 		gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);
 




More information about the mesa-commit mailing list