[Mesa-dev] [PATCH 3/3] radv: use typed buffer loads for vertex input fetches
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Tue Mar 12 19:25:38 UTC 2019
r-b for the series
On Tue, Feb 26, 2019 at 1:39 PM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> This drastically reduces the number of SGPRs because the driver
> now uses descriptors per vertex binding, instead of per vertex
> attribute format.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1354285 -> 1282109 (-5.33 %)
> VGPRS: 909896 -> 908800 (-0.12 %)
> Spilled SGPRs: 24840 -> 24811 (-0.12 %)
> Code Size: 49221144 -> 48986628 (-0.48 %) bytes
> Max Waves: 243930 -> 244229 (0.12 %)
>
> Totals from affected shaders:
> SGPRS: 390648 -> 318472 (-18.48 %)
> VGPRS: 288432 -> 287336 (-0.38 %)
> Spilled SGPRs: 94 -> 65 (-30.85 %)
> Code Size: 11548412 -> 11313896 (-2.03 %) bytes
> Max Waves: 86460 -> 86759 (0.35 %)
>
> This gives a really tiny boost.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/amd/vulkan/radv_cmd_buffer.c | 21 +++++++++-----
> src/amd/vulkan/radv_nir_to_llvm.c | 47 +++++++++++++++++++++++++------
> src/amd/vulkan/radv_pipeline.c | 37 ++----------------------
> src/amd/vulkan/radv_private.h | 5 +---
> 4 files changed, 57 insertions(+), 53 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index ad0b934ddfc..5ab93d11d68 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1985,13 +1985,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
> {
> if ((pipeline_is_dirty ||
> (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
> - cmd_buffer->state.pipeline->vertex_elements.count &&
> + cmd_buffer->state.pipeline->num_vertex_bindings &&
> radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
> struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
> unsigned vb_offset;
> void *vb_ptr;
> uint32_t i = 0;
> - uint32_t count = velems->count;
> + uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
> uint64_t va;
>
> /* allocate some descriptor state for vertex buffers */
> @@ -2002,13 +2002,15 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
> for (i = 0; i < count; i++) {
> uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
> uint32_t offset;
> - int vb = velems->binding[i];
> - struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
> - uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
> + struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
> + uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i];
> +
> + if (!buffer)
> + continue;
>
> va = radv_buffer_get_va(buffer->bo);
>
> - offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i];
> + offset = cmd_buffer->vertex_bindings[i].offset;
> va += offset + buffer->offset;
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
> @@ -2016,7 +2018,12 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
> desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
> else
> desc[2] = buffer->size - offset;
> - desc[3] = velems->rsrc_word3[i];
> + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
> }
>
> va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index 36f499be212..e6c8f3ecb92 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
>
> LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
>
> + alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
> +
> if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
> alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
> else
> @@ -2035,7 +2037,7 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
> alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
> }
>
> - return alpha;
> + return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
> }
>
> static unsigned
> @@ -2096,7 +2098,7 @@ radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
>
> for (unsigned i = num_channels; i < 4; i++) {
> chan[i] = i == 3 ? one : zero;
> - chan[i] = ac_to_float(&ctx->ac, chan[i]);
> + chan[i] = ac_to_integer(&ctx->ac, chan[i]);
> }
>
> return ac_build_gather_values(&ctx->ac, chan, 4);
> @@ -2154,20 +2156,49 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
> } else
> buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
> ctx->abi.base_vertex, "");
> - t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
> -
> - t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
>
> /* Adjust the number of channels to load based on the vertex
> * attribute format.
> */
> unsigned num_format_channels = get_num_channels_from_data_format(data_format);
> unsigned num_channels = MIN2(num_input_channels, num_format_channels);
> + unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
> + unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
> + unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
>
> - input = ac_build_buffer_load_format(&ctx->ac, t_list,
> + if (attrib_stride != 0 && attrib_offset > attrib_stride) {
> + LLVMValueRef buffer_offset =
> + LLVMConstInt(ctx->ac.i32,
> + attrib_offset / attrib_stride, false);
> +
> + buffer_index = LLVMBuildAdd(ctx->ac.builder,
> buffer_index,
> - ctx->ac.i32_0,
> - num_channels, false, true);
> + buffer_offset, "");
> +
> + attrib_offset = attrib_offset % attrib_stride;
> + }
> +
> + t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
> + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
> +
> + input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index,
> + LLVMConstInt(ctx->ac.i32, attrib_offset, false),
> + ctx->ac.i32_0, ctx->ac.i32_0,
> + num_channels,
> + data_format, num_format,
> + false, false, true);
> +
> + if (ctx->options->key.vs.post_shuffle & (1 << attrib_index)) {
> + if (num_channels > 1) {
> + LLVMValueRef c[4];
> + c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
> + c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
> + c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
> + c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
> +
> + input = ac_build_gather_values(&ctx->ac, c, 4);
> + }
> + }
>
> input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
> is_float);
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 5fd57932102..30c3f60790e 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1244,25 +1244,6 @@ si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
> }
> }
>
> -static unsigned si_map_swizzle(unsigned swizzle)
> -{
> - switch (swizzle) {
> - case VK_SWIZZLE_Y:
> - return V_008F0C_SQ_SEL_Y;
> - case VK_SWIZZLE_Z:
> - return V_008F0C_SQ_SEL_Z;
> - case VK_SWIZZLE_W:
> - return V_008F0C_SQ_SEL_W;
> - case VK_SWIZZLE_0:
> - return V_008F0C_SQ_SEL_0;
> - case VK_SWIZZLE_1:
> - return V_008F0C_SQ_SEL_1;
> - default: /* VK_SWIZZLE_X */
> - return V_008F0C_SQ_SEL_X;
> - }
> -}
> -
> -
> static unsigned radv_dynamic_state_mask(VkDynamicState state)
> {
> switch(state) {
> @@ -3557,24 +3538,10 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
> &vi_info->pVertexAttributeDescriptions[i];
> unsigned loc = desc->location;
> const struct vk_format_description *format_desc;
> - int first_non_void;
> - uint32_t num_format, data_format;
> - format_desc = vk_format_description(desc->format);
> - first_non_void = vk_format_get_first_non_void_channel(desc->format);
>
> - num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
> - data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
> + format_desc = vk_format_description(desc->format);
>
> - velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
> - S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
> - S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
> - S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
> - S_008F0C_NUM_FORMAT(num_format) |
> - S_008F0C_DATA_FORMAT(data_format);
> velems->format_size[loc] = format_desc->block.bits / 8;
> - velems->offset[loc] = desc->offset;
> - velems->binding[loc] = desc->binding;
> - velems->count = MAX2(velems->count, loc + 1);
> }
>
> for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
> @@ -3582,6 +3549,8 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
> &vi_info->pVertexBindingDescriptions[i];
>
> pipeline->binding_stride[desc->binding] = desc->stride;
> + pipeline->num_vertex_bindings =
> + MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
> }
> }
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index c73bdaca0a3..39fa6110fde 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1342,11 +1342,7 @@ struct radv_prim_vertex_count {
> };
>
> struct radv_vertex_elements_info {
> - uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS];
> uint32_t format_size[MAX_VERTEX_ATTRIBS];
> - uint32_t binding[MAX_VERTEX_ATTRIBS];
> - uint32_t offset[MAX_VERTEX_ATTRIBS];
> - uint32_t count;
> };
>
> struct radv_ia_multi_vgt_param_helpers {
> @@ -1378,6 +1374,7 @@ struct radv_pipeline {
> struct radv_vertex_elements_info vertex_elements;
>
> uint32_t binding_stride[MAX_VBS];
> + uint8_t num_vertex_bindings;
>
> uint32_t user_data_0[MESA_SHADER_STAGES];
> union {
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list