Mesa (master): radeonsi/gfx10: export primitives at the beginning of VS/TES

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jan 20 21:33:28 UTC 2020


Module: Mesa
Branch: master
Commit: a966729c840a96af993e9039d9cf09b9d8921c4b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a966729c840a96af993e9039d9cf09b9d8921c4b

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Jan  3 16:59:20 2020 -0500

radeonsi/gfx10: export primitives at the beginning of VS/TES

This decreases VGPR usage and will allow us to merge some IF blocks
in shaders.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c   | 80 ++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_shader.c          |  9 ++-
 src/gallium/drivers/radeonsi/si_shader_internal.h |  3 +
 3 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 2f6f9fe3cc2..b8c34634cbe 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -123,6 +123,16 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx,
 	}
 }
 
+bool gfx10_ngg_export_prim_early(struct si_shader *shader)
+{
+	struct si_shader_selector *sel = shader->selector;
+
+	assert(shader->key.as_ngg && !shader->key.as_es);
+
+	return sel->type != PIPE_SHADER_GEOMETRY &&
+	       !sel->info.writes_edgeflag;
+}
+
 void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
 {
 	ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
@@ -130,6 +140,49 @@ void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
 				      ngg_get_prim_cnt(ctx));
 }
 
+void gfx10_ngg_build_export_prim(struct si_shader_context *ctx,
+				 LLVMValueRef user_edgeflags[3])
+{
+	if (gfx10_is_ngg_passthrough(ctx->shader)) {
+		ac_build_ifcc(&ctx->ac, si_is_gs_thread(ctx), 6001);
+		{
+			struct ac_ngg_prim prim = {};
+
+			prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
+			ac_build_export_prim(&ctx->ac, &prim);
+		}
+		ac_build_endif(&ctx->ac, 6001);
+		return;
+	}
+
+	ac_build_ifcc(&ctx->ac, si_is_gs_thread(ctx), 6001);
+	{
+		struct ac_ngg_prim prim = {};
+
+		ngg_get_vertices_per_prim(ctx, &prim.num_vertices);
+
+		prim.isnull = ctx->ac.i1false;
+		prim.index[0] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16);
+		prim.index[1] = si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16);
+		prim.index[2] = si_unpack_param(ctx, ctx->gs_vtx23_offset, 0, 16);
+
+		for (unsigned i = 0; i < prim.num_vertices; ++i) {
+			prim.edgeflag[i] = ngg_get_initial_edgeflag(ctx, i);
+
+			if (ctx->shader->selector->info.writes_edgeflag) {
+				LLVMValueRef edge;
+
+				edge = LLVMBuildLoad(ctx->ac.builder, user_edgeflags[i], "");
+				edge = LLVMBuildAnd(ctx->ac.builder, prim.edgeflag[i], edge, "");
+				prim.edgeflag[i] = edge;
+			}
+		}
+
+		ac_build_export_prim(&ctx->ac, &prim);
+	}
+	ac_build_endif(&ctx->ac, 6001);
+}
+
 static void build_streamout_vertex(struct si_shader_context *ctx,
 				   LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
 				   unsigned stream, LLVMValueRef offset_vtx,
@@ -689,31 +742,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
 	}
 
 	/* Build the primitive export. */
-	ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
-	{
-		struct ac_ngg_prim prim = {};
-
-		if (gfx10_is_ngg_passthrough(ctx->shader)) {
-			prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
-		} else {
-			prim.num_vertices = num_vertices;
-			prim.isnull = ctx->ac.i1false;
-			memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
-
-			for (unsigned i = 0; i < num_vertices; ++i) {
-				prim.edgeflag[i] = ngg_get_initial_edgeflag(ctx, i);
-
-				if (sel->info.writes_edgeflag) {
-					tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
-					prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
-									tmp2, "");
-				}
-			}
-		}
-
-		ac_build_export_prim(&ctx->ac, &prim);
-	}
-	ac_build_endif(&ctx->ac, 6001);
+	if (!gfx10_ngg_export_prim_early(ctx->shader))
+		gfx10_ngg_build_export_prim(ctx, user_edgeflags);
 
 	/* Export per-vertex data (positions and parameters). */
 	ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 592a486424a..24f744ba5cd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2423,9 +2423,16 @@ static bool si_build_main_function(struct si_shader_context *ctx,
 
 			if ((ctx->type == PIPE_SHADER_VERTEX ||
 			     ctx->type == PIPE_SHADER_TESS_EVAL) &&
-			    shader->key.as_ngg && !shader->key.as_es)
+			    shader->key.as_ngg && !shader->key.as_es) {
 				gfx10_ngg_build_sendmsg_gs_alloc_req(ctx);
 
+				/* Build the primitive export at the beginning
+				 * of the shader if possible.
+				 */
+				if (gfx10_ngg_export_prim_early(shader))
+					gfx10_ngg_build_export_prim(ctx, NULL);
+			}
+
 			if (ctx->type == PIPE_SHADER_TESS_CTRL ||
 			    ctx->type == PIPE_SHADER_GEOMETRY) {
 				if (ctx->type == PIPE_SHADER_GEOMETRY && shader->key.as_ngg) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index b8d2ac84fca..6509edb8181 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -299,7 +299,10 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx,
 			    unsigned noutput, unsigned stream);
 void si_create_function(struct si_shader_context *ctx);
 
+bool gfx10_ngg_export_prim_early(struct si_shader *shader);
 void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx);
+void gfx10_ngg_build_export_prim(struct si_shader_context *ctx,
+				 LLVMValueRef user_edgeflags[3]);
 void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
 			     unsigned max_outputs,
 			     LLVMValueRef *addrs);



More information about the mesa-commit mailing list