Mesa (master): radv: use typed buffer loads for vertex input fetches

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Mar 13 12:31:19 UTC 2019


Module: Mesa
Branch: master
Commit: a66b186bebf9b63897199b9b6e26d40977417f74
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a66b186bebf9b63897199b9b6e26d40977417f74

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Tue Feb 26 13:42:28 2019 +0100

radv: use typed buffer loads for vertex input fetches

This drastically reduces the number of SGPRs because the driver
now uses descriptors per vertex binding, instead of per vertex
attribute format.

29077 shaders in 15096 tests
Totals:
SGPRS: 1354285 -> 1282109 (-5.33 %)
VGPRS: 909896 -> 908800 (-0.12 %)
Spilled SGPRs: 24840 -> 24811 (-0.12 %)
Code Size: 49221144 -> 48986628 (-0.48 %) bytes
Max Waves: 243930 -> 244229 (0.12 %)

Totals from affected shaders:
SGPRS: 390648 -> 318472 (-18.48 %)
VGPRS: 288432 -> 287336 (-0.38 %)
Spilled SGPRs: 94 -> 65 (-30.85 %)
Code Size: 11548412 -> 11313896 (-2.03 %) bytes
Max Waves: 86460 -> 86759 (0.35 %)

This gives a really tiny boost.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_cmd_buffer.c  | 21 +++++++++++------
 src/amd/vulkan/radv_nir_to_llvm.c | 47 ++++++++++++++++++++++++++++++++-------
 src/amd/vulkan/radv_pipeline.c    | 37 +++---------------------------
 src/amd/vulkan/radv_private.h     |  5 +----
 4 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d8aceb8b082..06806ed6fce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1988,13 +1988,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
 {
 	if ((pipeline_is_dirty ||
 	    (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
-	    cmd_buffer->state.pipeline->vertex_elements.count &&
+	    cmd_buffer->state.pipeline->num_vertex_bindings &&
 	    radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
 		struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
 		unsigned vb_offset;
 		void *vb_ptr;
 		uint32_t i = 0;
-		uint32_t count = velems->count;
+		uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
 		uint64_t va;
 
 		/* allocate some descriptor state for vertex buffers */
@@ -2005,13 +2005,15 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
 		for (i = 0; i < count; i++) {
 			uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
 			uint32_t offset;
-			int vb = velems->binding[i];
-			struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
-			uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
+			struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
+			uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i];
+
+			if (!buffer)
+				continue;
 
 			va = radv_buffer_get_va(buffer->bo);
 
-			offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i];
+			offset = cmd_buffer->vertex_bindings[i].offset;
 			va += offset + buffer->offset;
 			desc[0] = va;
 			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
@@ -2019,7 +2021,12 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
 				desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
 			else
 				desc[2] = buffer->size - offset;
-			desc[3] = velems->rsrc_word3[i];
+			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+				  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 		}
 
 		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 20371759a97..3dd3e80f3b9 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
 
 	LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
 
+	alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
+
 	if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
 		alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
 	else
@@ -2035,7 +2037,7 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
 		alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
 	}
 
-	return alpha;
+	return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
 }
 
 static unsigned
@@ -2096,7 +2098,7 @@ radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
 
 	for (unsigned i = num_channels; i < 4; i++) {
 		chan[i] = i == 3 ? one : zero;
-		chan[i] = ac_to_float(&ctx->ac, chan[i]);
+		chan[i] = ac_to_integer(&ctx->ac, chan[i]);
 	}
 
 	return ac_build_gather_values(&ctx->ac, chan, 4);
@@ -2154,20 +2156,49 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 		} else
 			buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
 			                            ctx->abi.base_vertex, "");
-		t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
-
-		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
 		/* Adjust the number of channels to load based on the vertex
 		 * attribute format.
 		 */
 		unsigned num_format_channels = get_num_channels_from_data_format(data_format);
 		unsigned num_channels = MIN2(num_input_channels, num_format_channels);
+		unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
+		unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
+		unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
 
-		input = ac_build_buffer_load_format(&ctx->ac, t_list,
+		if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+			LLVMValueRef buffer_offset =
+				LLVMConstInt(ctx->ac.i32,
+					     attrib_offset / attrib_stride, false);
+
+			buffer_index = LLVMBuildAdd(ctx->ac.builder,
 						    buffer_index,
-						    ctx->ac.i32_0,
-						    num_channels, false, true);
+						    buffer_offset, "");
+
+			attrib_offset = attrib_offset % attrib_stride;
+		}
+
+		t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
+		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+
+		input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index,
+					      LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+					      ctx->ac.i32_0, ctx->ac.i32_0,
+					      num_channels,
+					      data_format, num_format,
+					      false, false, true);
+
+		if (ctx->options->key.vs.post_shuffle & (1 << attrib_index)) {
+			if (num_channels > 1) {
+				LLVMValueRef c[4];
+				c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
+				c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
+				c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
+				c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
+
+				input = ac_build_gather_values(&ctx->ac, c, 4);
+			}
+		}
 
 		input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
 							is_float);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 60510f97e0f..7f2f96c540a 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1244,25 +1244,6 @@ si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
 	}
 }
 
-static unsigned si_map_swizzle(unsigned swizzle)
-{
-	switch (swizzle) {
-	case VK_SWIZZLE_Y:
-		return V_008F0C_SQ_SEL_Y;
-	case VK_SWIZZLE_Z:
-		return V_008F0C_SQ_SEL_Z;
-	case VK_SWIZZLE_W:
-		return V_008F0C_SQ_SEL_W;
-	case VK_SWIZZLE_0:
-		return V_008F0C_SQ_SEL_0;
-	case VK_SWIZZLE_1:
-		return V_008F0C_SQ_SEL_1;
-	default: /* VK_SWIZZLE_X */
-		return V_008F0C_SQ_SEL_X;
-	}
-}
-
-
 static unsigned radv_dynamic_state_mask(VkDynamicState state)
 {
 	switch(state) {
@@ -3557,24 +3538,10 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
 			&vi_info->pVertexAttributeDescriptions[i];
 		unsigned loc = desc->location;
 		const struct vk_format_description *format_desc;
-		int first_non_void;
-		uint32_t num_format, data_format;
-		format_desc = vk_format_description(desc->format);
-		first_non_void = vk_format_get_first_non_void_channel(desc->format);
 
-		num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
-		data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+		format_desc = vk_format_description(desc->format);
 
-		velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
-			S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
-			S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
-			S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
-			S_008F0C_NUM_FORMAT(num_format) |
-			S_008F0C_DATA_FORMAT(data_format);
 		velems->format_size[loc] = format_desc->block.bits / 8;
-		velems->offset[loc] = desc->offset;
-		velems->binding[loc] = desc->binding;
-		velems->count = MAX2(velems->count, loc + 1);
 	}
 
 	for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
@@ -3582,6 +3549,8 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
 			&vi_info->pVertexBindingDescriptions[i];
 
 		pipeline->binding_stride[desc->binding] = desc->stride;
+		pipeline->num_vertex_bindings =
+			MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
 	}
 }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index c73bdaca0a3..39fa6110fde 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1342,11 +1342,7 @@ struct radv_prim_vertex_count {
 };
 
 struct radv_vertex_elements_info {
-	uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS];
 	uint32_t format_size[MAX_VERTEX_ATTRIBS];
-	uint32_t binding[MAX_VERTEX_ATTRIBS];
-	uint32_t offset[MAX_VERTEX_ATTRIBS];
-	uint32_t count;
 };
 
 struct radv_ia_multi_vgt_param_helpers {
@@ -1378,6 +1374,7 @@ struct radv_pipeline {
 	struct radv_vertex_elements_info             vertex_elements;
 
 	uint32_t                                     binding_stride[MAX_VBS];
+	uint8_t                                      num_vertex_bindings;
 
 	uint32_t user_data_0[MESA_SHADER_STAGES];
 	union {




More information about the mesa-commit mailing list