[Mesa-dev] [PATCH 3/3] radv: use typed buffer loads for vertex input fetches

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Mar 12 19:25:38 UTC 2019


r-b for the series

On Tue, Feb 26, 2019 at 1:39 PM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> This drastically reduces the number of SGPRs because the driver
> now uses descriptors per vertex binding, instead of per vertex
> attribute format.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1354285 -> 1282109 (-5.33 %)
> VGPRS: 909896 -> 908800 (-0.12 %)
> Spilled SGPRs: 24840 -> 24811 (-0.12 %)
> Code Size: 49221144 -> 48986628 (-0.48 %) bytes
> Max Waves: 243930 -> 244229 (0.12 %)
>
> Totals from affected shaders:
> SGPRS: 390648 -> 318472 (-18.48 %)
> VGPRS: 288432 -> 287336 (-0.38 %)
> Spilled SGPRs: 94 -> 65 (-30.85 %)
> Code Size: 11548412 -> 11313896 (-2.03 %) bytes
> Max Waves: 86460 -> 86759 (0.35 %)
>
> This gives a really tiny boost.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 21 +++++++++-----
>  src/amd/vulkan/radv_nir_to_llvm.c | 47 +++++++++++++++++++++++++------
>  src/amd/vulkan/radv_pipeline.c    | 37 ++----------------------
>  src/amd/vulkan/radv_private.h     |  5 +---
>  4 files changed, 57 insertions(+), 53 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index ad0b934ddfc..5ab93d11d68 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1985,13 +1985,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
>  {
>         if ((pipeline_is_dirty ||
>             (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
> -           cmd_buffer->state.pipeline->vertex_elements.count &&
> +           cmd_buffer->state.pipeline->num_vertex_bindings &&
>             radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
>                 struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
>                 unsigned vb_offset;
>                 void *vb_ptr;
>                 uint32_t i = 0;
> -               uint32_t count = velems->count;
> +               uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
>                 uint64_t va;
>
>                 /* allocate some descriptor state for vertex buffers */
> @@ -2002,13 +2002,15 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
>                 for (i = 0; i < count; i++) {
>                         uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
>                         uint32_t offset;
> -                       int vb = velems->binding[i];
> -                       struct radv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
> -                       uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
> +                       struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
> +                       uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i];
> +
> +                       if (!buffer)
> +                               continue;
>
>                         va = radv_buffer_get_va(buffer->bo);
>
> -                       offset = cmd_buffer->vertex_bindings[vb].offset + velems->offset[i];
> +                       offset = cmd_buffer->vertex_bindings[i].offset;
>                         va += offset + buffer->offset;
>                         desc[0] = va;
>                         desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
> @@ -2016,7 +2018,12 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
>                                 desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
>                         else
>                                 desc[2] = buffer->size - offset;
> -                       desc[3] = velems->rsrc_word3[i];
> +                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> +                                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> +                                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> +                                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> +                                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
> +                                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
>                 }
>
>                 va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index 36f499be212..e6c8f3ecb92 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
>
>         LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
>
> +       alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
> +
>         if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
>                 alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
>         else
> @@ -2035,7 +2037,7 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
>                 alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
>         }
>
> -       return alpha;
> +       return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
>  }
>
>  static unsigned
> @@ -2096,7 +2098,7 @@ radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
>
>         for (unsigned i = num_channels; i < 4; i++) {
>                 chan[i] = i == 3 ? one : zero;
> -               chan[i] = ac_to_float(&ctx->ac, chan[i]);
> +               chan[i] = ac_to_integer(&ctx->ac, chan[i]);
>         }
>
>         return ac_build_gather_values(&ctx->ac, chan, 4);
> @@ -2154,20 +2156,49 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
>                 } else
>                         buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
>                                                     ctx->abi.base_vertex, "");
> -               t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
> -
> -               t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
>
>                 /* Adjust the number of channels to load based on the vertex
>                  * attribute format.
>                  */
>                 unsigned num_format_channels = get_num_channels_from_data_format(data_format);
>                 unsigned num_channels = MIN2(num_input_channels, num_format_channels);
> +               unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
> +               unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
> +               unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
>
> -               input = ac_build_buffer_load_format(&ctx->ac, t_list,
> +               if (attrib_stride != 0 && attrib_offset > attrib_stride) {
> +                       LLVMValueRef buffer_offset =
> +                               LLVMConstInt(ctx->ac.i32,
> +                                            attrib_offset / attrib_stride, false);
> +
> +                       buffer_index = LLVMBuildAdd(ctx->ac.builder,
>                                                     buffer_index,
> -                                                   ctx->ac.i32_0,
> -                                                   num_channels, false, true);
> +                                                   buffer_offset, "");
> +
> +                       attrib_offset = attrib_offset % attrib_stride;
> +               }
> +
> +               t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
> +               t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
> +
> +               input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index,
> +                                             LLVMConstInt(ctx->ac.i32, attrib_offset, false),
> +                                             ctx->ac.i32_0, ctx->ac.i32_0,
> +                                             num_channels,
> +                                             data_format, num_format,
> +                                             false, false, true);
> +
> +               if (ctx->options->key.vs.post_shuffle & (1 << attrib_index)) {
> +                       if (num_channels > 1) {
> +                               LLVMValueRef c[4];
> +                               c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
> +                               c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
> +                               c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
> +                               c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
> +
> +                               input = ac_build_gather_values(&ctx->ac, c, 4);
> +                       }
> +               }
>
>                 input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
>                                                         is_float);
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 5fd57932102..30c3f60790e 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1244,25 +1244,6 @@ si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
>         }
>  }
>
> -static unsigned si_map_swizzle(unsigned swizzle)
> -{
> -       switch (swizzle) {
> -       case VK_SWIZZLE_Y:
> -               return V_008F0C_SQ_SEL_Y;
> -       case VK_SWIZZLE_Z:
> -               return V_008F0C_SQ_SEL_Z;
> -       case VK_SWIZZLE_W:
> -               return V_008F0C_SQ_SEL_W;
> -       case VK_SWIZZLE_0:
> -               return V_008F0C_SQ_SEL_0;
> -       case VK_SWIZZLE_1:
> -               return V_008F0C_SQ_SEL_1;
> -       default: /* VK_SWIZZLE_X */
> -               return V_008F0C_SQ_SEL_X;
> -       }
> -}
> -
> -
>  static unsigned radv_dynamic_state_mask(VkDynamicState state)
>  {
>         switch(state) {
> @@ -3557,24 +3538,10 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
>                         &vi_info->pVertexAttributeDescriptions[i];
>                 unsigned loc = desc->location;
>                 const struct vk_format_description *format_desc;
> -               int first_non_void;
> -               uint32_t num_format, data_format;
> -               format_desc = vk_format_description(desc->format);
> -               first_non_void = vk_format_get_first_non_void_channel(desc->format);
>
> -               num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
> -               data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
> +               format_desc = vk_format_description(desc->format);
>
> -               velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
> -                       S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
> -                       S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
> -                       S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
> -                       S_008F0C_NUM_FORMAT(num_format) |
> -                       S_008F0C_DATA_FORMAT(data_format);
>                 velems->format_size[loc] = format_desc->block.bits / 8;
> -               velems->offset[loc] = desc->offset;
> -               velems->binding[loc] = desc->binding;
> -               velems->count = MAX2(velems->count, loc + 1);
>         }
>
>         for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
> @@ -3582,6 +3549,8 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
>                         &vi_info->pVertexBindingDescriptions[i];
>
>                 pipeline->binding_stride[desc->binding] = desc->stride;
> +               pipeline->num_vertex_bindings =
> +                       MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
>         }
>  }
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index c73bdaca0a3..39fa6110fde 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1342,11 +1342,7 @@ struct radv_prim_vertex_count {
>  };
>
>  struct radv_vertex_elements_info {
> -       uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS];
>         uint32_t format_size[MAX_VERTEX_ATTRIBS];
> -       uint32_t binding[MAX_VERTEX_ATTRIBS];
> -       uint32_t offset[MAX_VERTEX_ATTRIBS];
> -       uint32_t count;
>  };
>
>  struct radv_ia_multi_vgt_param_helpers {
> @@ -1378,6 +1374,7 @@ struct radv_pipeline {
>         struct radv_vertex_elements_info             vertex_elements;
>
>         uint32_t                                     binding_stride[MAX_VBS];
> +       uint8_t                                      num_vertex_bindings;
>
>         uint32_t user_data_0[MESA_SHADER_STAGES];
>         union {
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list