Mesa (master): radv: enable lowering of GS intrinsics for the LLVM backend

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 8 06:44:13 UTC 2020


Module: Mesa
Branch: master
Commit: 9f005f1f850710ea456f9847b1d247aaa8f0d6d2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9f005f1f850710ea456f9847b1d247aaa8f0d6d2

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Fri Mar 13 11:23:07 2020 +0100

radv: enable lowering of GS intrinsics for the LLVM backend

This replaces emit_vertex with:
   if (vertex_count < max_vertices) {
      emit_vertex_with_counter vertex_count ...
      vertex_count += 1
   }

Which is exactly what NIR->LLVM was doing but at NIR level. This
pass is already called by ACO.

pipeline-db changes on GFX10:
Totals from affected shaders:
SGPRS: 1952 -> 1912 (-2.05 %)
VGPRS: 2112 -> 2044 (-3.22 %)
Code Size: 189368 -> 185620 (-1.98 %) bytes
Max Waves: 494 -> 491 (-0.61 %)

No pipeline-db changes on other generations.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4182>

---

 src/amd/llvm/ac_nir_to_llvm.c     |  9 ++++++
 src/amd/llvm/ac_shader_abi.h      |  5 ++++
 src/amd/vulkan/radv_nir_to_llvm.c | 62 +++++++++------------------------------
 src/amd/vulkan/radv_shader.c      |  3 +-
 4 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 00970638516..627f5d2d931 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3938,7 +3938,16 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_emit_vertex:
 		ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
 		break;
+	case nir_intrinsic_emit_vertex_with_counter: {
+		unsigned stream = nir_intrinsic_stream_id(instr);
+		LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+		ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
+						   next_vertex,
+						   ctx->abi->outputs);
+		break;
+	}
 	case nir_intrinsic_end_primitive:
+	case nir_intrinsic_end_primitive_with_counter:
 		ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
 		break;
 	case nir_intrinsic_load_tess_coord:
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index f73465b014f..18f85a7911c 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -80,6 +80,11 @@ struct ac_shader_abi {
 	void (*emit_primitive)(struct ac_shader_abi *abi,
 			       unsigned stream);
 
+	void (*emit_vertex_with_counter)(struct ac_shader_abi *abi,
+					 unsigned stream,
+					 LLVMValueRef vertexidx,
+					 LLVMValueRef *addrs);
+
 	LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
 				    unsigned location,
 				    unsigned driver_location,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 7cb8deddc10..900246d275e 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -876,39 +876,21 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
 
 static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
 				     unsigned stream,
+				     LLVMValueRef vertexidx,
 				     LLVMValueRef *addrs);
 
 static void
-visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
+visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
+			       LLVMValueRef vertexidx, LLVMValueRef *addrs)
 {
-	LLVMValueRef gs_next_vertex;
-	LLVMValueRef can_emit;
 	unsigned offset = 0;
 	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 
 	if (ctx->args->options->key.vs_common_out.as_ngg) {
-		gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
+		gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
 		return;
 	}
 
-	/* Write vertex attribute values to GSVS ring */
-	gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
-				       ctx->gs_next_vertex[stream],
-				       "");
-
-	/* If this thread has already emitted the declared maximum number of
-	 * vertices, don't emit any more: excessive vertex emissions are not
-	 * supposed to have any effect.
-	 */
-	can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
-				 LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-
-	bool use_kill = !ctx->args->shader_info->gs.writes_memory;
-	if (use_kill)
-		ac_build_kill_if_false(&ctx->ac, can_emit);
-	else
-		ac_build_ifcc(&ctx->ac, can_emit, 6505);
-
 	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
 		unsigned output_usage_mask =
 			ctx->args->shader_info->gs.output_usage_mask[i];
@@ -933,7 +915,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 
 			offset++;
 
-			voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
+			voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
 			voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
 
 			out_val = ac_to_integer(&ctx->ac, out_val);
@@ -949,16 +931,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 		}
 	}
 
-	gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex,
-				      ctx->ac.i32_1, "");
-	LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
-
 	ac_build_sendmsg(&ctx->ac,
 			 AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
 			 ctx->gs_wave_id);
-
-	if (!use_kill)
-		ac_build_endif(&ctx->ac, 6505);
 }
 
 static void
@@ -3309,25 +3284,11 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
 
 static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
 				     unsigned stream,
+				     LLVMValueRef vertexidx,
 				     LLVMValueRef *addrs)
 {
 	LLVMBuilderRef builder = ctx->ac.builder;
 	LLVMValueRef tmp;
-	const LLVMValueRef vertexidx =
-		LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
-
-	/* If this thread has already emitted the declared maximum number of
-	 * vertices, skip the write: excessive vertex emissions are not
-	 * supposed to have any effect.
-	 */
-	const LLVMValueRef can_emit =
-		LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
-			      LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-	ac_build_ifcc(&ctx->ac, can_emit, 9001);
-
-	tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
-	tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
-	LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
 
 	const LLVMValueRef vertexptr =
 		ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
@@ -3359,6 +3320,13 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
 	}
 	assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
 
+	/* Store the current number of emitted vertices to zero out remaining
+	 * primitive flags in case the geometry shader doesn't emit the maximum
+	 * number of vertices.
+	 */
+	tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+	LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
 	/* Determine and store whether this vertex completed a primitive. */
 	const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
 
@@ -3395,8 +3363,6 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
 	tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
 	tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
 	LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
-
-	ac_build_endif(&ctx->ac, 9001);
 }
 
 static void
@@ -3948,7 +3914,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 
 	ctx.abi.inputs = &ctx.inputs[0];
 	ctx.abi.emit_outputs = handle_shader_outputs_post;
-	ctx.abi.emit_vertex = visit_emit_vertex;
+	ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
 	ctx.abi.load_ubo = radv_load_ubo;
 	ctx.abi.load_ssbo = radv_load_ssbo;
 	ctx.abi.load_sampler_desc = radv_get_sampler_desc;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 537ab9f8b7f..bb88b368d05 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -453,8 +453,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 
 	nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
-	if (nir->info.stage == MESA_SHADER_GEOMETRY &&
-	    device->physical_device->use_aco)
+	if (nir->info.stage == MESA_SHADER_GEOMETRY)
 		nir_lower_gs_intrinsics(nir, true);
 
 	static const nir_lower_tex_options tex_options = {



More information about the mesa-commit mailing list