[Mesa-dev] [PATCH 09/15] radv: allow to use up to 4 GSVS ring buffers

Samuel Pitoiset samuel.pitoiset at gmail.com
Sat Oct 13 12:57:33 UTC 2018


For all streams. We basically just need to update the
base address and compute a stride for every stream.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/vulkan/radv_nir_to_llvm.c | 78 ++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 21 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index a7e37958c1..c8b8670f9c 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -78,7 +78,7 @@ struct radv_shader_context {
 	LLVMValueRef gs_vtx_offset[6];
 
 	LLVMValueRef esgs_ring;
-	LLVMValueRef gsvs_ring;
+	LLVMValueRef gsvs_ring[4];
 	LLVMValueRef hs_ring_tess_offchip;
 	LLVMValueRef hs_ring_tess_factor;
 
@@ -1747,7 +1747,8 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 			out_val = ac_to_integer(&ctx->ac, out_val);
 			out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
 
-			ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
+			ac_build_buffer_store_dword(&ctx->ac,
+						    ctx->gsvs_ring[stream],
 						    out_val, 1,
 						    voffset, ctx->gs2vs_offset, 0,
 						    1, 1, true, true);
@@ -3134,42 +3135,76 @@ ac_setup_rings(struct radv_shader_context *ctx)
 	}
 
 	if (ctx->is_gs_copy_shader) {
-		ctx->gsvs_ring =
+		ctx->gsvs_ring[0] =
 			ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
 					      LLVMConstInt(ctx->ac.i32,
 							   RING_GSVS_VS, false));
 	}
 
 	if (ctx->stage == MESA_SHADER_GEOMETRY) {
+		/* The conceptual layout of the GSVS ring is
+		 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
+		 * but the real memory layout is swizzled across
+		 * threads:
+		 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+		 *   t16v0c0 ..
+		 * Override the buffer descriptor accordingly.
+		 */
+		LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
+		uint64_t stream_offset = 0;
 		unsigned num_records = 64;
 		LLVMValueRef base_ring;
-		LLVMValueRef ring, tmp;
-		unsigned stride;
 
 		base_ring =
 			ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
 					      LLVMConstInt(ctx->ac.i32,
 							   RING_GSVS_GS, false));
 
-		stride = ctx->max_gsvs_emit_size;
+		for (unsigned stream = 0; stream < 4; stream++) {
+			unsigned num_components, stride;
+			LLVMValueRef ring, tmp;
 
-		ring = LLVMBuildBitCast(ctx->ac.builder, base_ring,
-					ctx->ac.v4i32, "");
+			num_components =
+				ctx->shader_info->info.gs.num_stream_output_components[stream];
 
-		tmp = LLVMBuildExtractElement(ctx->ac.builder, ring,
-					      ctx->ac.i32_1, "");
-		tmp = LLVMBuildOr(ctx->ac.builder, tmp,
-				  LLVMConstInt(ctx->ac.i32,
-					       S_008F04_STRIDE(stride), false), "");
-		ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp,
-					      ctx->ac.i32_1, "");
+			if (!num_components)
+				continue;
 
-		ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
-					      LLVMConstInt(ctx->ac.i32,
-							   num_records, false),
-					      LLVMConstInt(ctx->ac.i32, 2, false), "");
+			stride = 4 * num_components * ctx->gs_max_out_vertices;
+
+			/* Limit on the stride field for <= CIK. */
+			assert(stride < (1 << 14));
+
+			ring = LLVMBuildBitCast(ctx->ac.builder,
+						base_ring, v2i64, "");
+			tmp = LLVMBuildExtractElement(ctx->ac.builder,
+						      ring, ctx->ac.i32_0, "");
+			tmp = LLVMBuildAdd(ctx->ac.builder, tmp,
+					   LLVMConstInt(ctx->ac.i64,
+							stream_offset, 0), "");
+			ring = LLVMBuildInsertElement(ctx->ac.builder,
+						      ring, tmp, ctx->ac.i32_0, "");
 
-		ctx->gsvs_ring = ring;
+			stream_offset += stride * 64;
+
+			ring = LLVMBuildBitCast(ctx->ac.builder, ring,
+						ctx->ac.v4i32, "");
+
+			tmp = LLVMBuildExtractElement(ctx->ac.builder, ring,
+						      ctx->ac.i32_1, "");
+			tmp = LLVMBuildOr(ctx->ac.builder, tmp,
+					  LLVMConstInt(ctx->ac.i32,
+						       S_008F04_STRIDE(stride), false), "");
+			ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp,
+						      ctx->ac.i32_1, "");
+
+			ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
+						      LLVMConstInt(ctx->ac.i32,
+								   num_records, false),
+						      LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+			ctx->gsvs_ring[stream] = ring;
+		}
 	}
 
 	if (ctx->stage == MESA_SHADER_TESS_CTRL ||
@@ -3620,7 +3655,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
 
 			offset++;
 
-			value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring,
+			value = ac_build_buffer_load(&ctx->ac,
+						     ctx->gsvs_ring[0],
 						     1, ctx->ac.i32_0,
 						     vtx_offset, soffset,
 						     0, 1, 1, true, false);
-- 
2.19.1



More information about the mesa-dev mailing list