Mesa (master): radeonsi/gfx10: enable NGG passthrough for eligible shaders

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 27 18:52:02 UTC 2019


Module: Mesa
Branch: master
Commit: aa3df12fc2fbe2963eb09cbd2a126b82d208f0ca
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa3df12fc2fbe2963eb09cbd2a126b82d208f0ca

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Mon Dec 16 19:09:21 2019 -0500

radeonsi/gfx10: enable NGG passthrough for eligible shaders

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 65 ++++++++++++++-----------
 src/gallium/drivers/radeonsi/si_pipe.h          |  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 10 ++--
 3 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 419a4021943..5aac4ceac25 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -98,6 +98,7 @@ struct ngg_prim {
 	LLVMValueRef isnull;
 	LLVMValueRef index[3];
 	LLVMValueRef edgeflag[3];
+	LLVMValueRef passthrough;
 };
 
 static void build_export_prim(struct si_shader_context *ctx,
@@ -107,17 +108,21 @@ static void build_export_prim(struct si_shader_context *ctx,
 	struct ac_export_args args;
 	LLVMValueRef tmp;
 
-	tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
-	args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
-	for (unsigned i = 0; i < prim->num_vertices; ++i) {
-		tmp = LLVMBuildShl(builder, prim->index[i],
-				   LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
-		args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-		tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
-		tmp = LLVMBuildShl(builder, tmp,
-				   LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
-		args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+	if (prim->passthrough) {
+		args.out[0] = prim->passthrough;
+	} else {
+		tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
+		args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
+
+		for (unsigned i = 0; i < prim->num_vertices; ++i) {
+			tmp = LLVMBuildShl(builder, prim->index[i],
+					   LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
+			args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+			tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
+			tmp = LLVMBuildShl(builder, tmp,
+					   LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
+			args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+		}
 	}
 
 	args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
@@ -729,25 +734,29 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
 	{
 		struct ngg_prim prim = {};
 
-		prim.num_vertices = num_vertices;
-		prim.isnull = ctx->ac.i1false;
-		memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+		if (gfx10_is_ngg_passthrough(ctx->shader)) {
+			prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
+		} else {
+			prim.num_vertices = num_vertices;
+			prim.isnull = ctx->ac.i1false;
+			memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+			for (unsigned i = 0; i < num_vertices; ++i) {
+				if (ctx->type != PIPE_SHADER_VERTEX) {
+					prim.edgeflag[i] = ctx->i1false;
+					continue;
+				}
 
-		for (unsigned i = 0; i < num_vertices; ++i) {
-			if (ctx->type != PIPE_SHADER_VERTEX) {
-				prim.edgeflag[i] = ctx->i1false;
-				continue;
-			}
+				tmp = LLVMBuildLShr(builder,
+						    ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
+						    LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+				prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
 
-			tmp = LLVMBuildLShr(builder,
-					    ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
-					    LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
-			prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-
-			if (sel->info.writes_edgeflag) {
-				tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
-				prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
-								tmp2, "");
+				if (sel->info.writes_edgeflag) {
+					tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
+					prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
+									tmp2, "");
+				}
 			}
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index bd0c777c148..ee9a75c9739 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -790,7 +790,7 @@ union si_vgt_param_key {
 	uint32_t index;
 };
 
-#define SI_NUM_VGT_STAGES_KEY_BITS 4
+#define SI_NUM_VGT_STAGES_KEY_BITS 5
 #define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
 
 /* The VGT_SHADER_STAGES key used to index the table of precomputed values.
@@ -801,6 +801,7 @@ union si_vgt_stages_key {
 #if UTIL_ARCH_LITTLE_ENDIAN
 		unsigned tess:1;
 		unsigned gs:1;
+		unsigned ngg_passthrough:1;
 		unsigned ngg:1; /* gfx10+ */
 		unsigned streamout:1; /* only used with NGG */
 		unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
@@ -808,6 +809,7 @@ union si_vgt_stages_key {
 		unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
 		unsigned streamout:1;
 		unsigned ngg:1;
+		unsigned ngg_passthrough:1;
 		unsigned gs:1;
 		unsigned tess:1;
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a81636801cc..6fa9c37bf86 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3872,9 +3872,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
 	}
 
 	if (key.u.ngg) {
-		stages |= S_028B54_PRIMGEN_EN(1);
-		if (key.u.streamout)
-			stages |= S_028B54_NGG_WAVE_ID_EN(1);
+		stages |= S_028B54_PRIMGEN_EN(1) |
+			  S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+			  S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
 	} else if (key.u.gs)
 		stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 
@@ -4027,6 +4027,10 @@ bool si_update_shaders(struct si_context *sctx)
 		}
 	}
 
+	/* This must be done after the shader variant is selected. */
+	if (sctx->ngg)
+		key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+
 	si_update_vgt_shader_config(sctx, key);
 
 	if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)




More information about the mesa-commit mailing list