[Mesa-dev] [PATCH 8/9] radv/ac: add support for indirect access of descriptor sets.

Connor Abbott cwabbott0 at gmail.com
Wed Apr 19 00:09:18 UTC 2017


On Mon, Apr 17, 2017 at 11:57 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> We want to expose more descriptor sets to the applications,
> but currently we have a 1:1 mapping between shader descriptor
> sets and 2 user sgprs, limiting us to 4 per stage. This commit
> check if we don't have enough user sgprs for the number of
> bound sets for this shader, we can ask for them to be indirected.
>
> Two sgprs are then used to point to a buffer or 64-bit pointers
> to the number of allocated descriptor sets. All shaders point
> to the same buffer.
>
> We can use some user sgprs to inline one or two descriptor sets
> in future, but until we have a workload that needs this I don't
>  think we should spend too much time on it.

FWIW, from what I understand this is what Vulkan expects the driver to
do. The idea is that games often have draw loops that look like:

for each large thing A {
   set parameters for A; // cold
   for each smaller thing B in A {
      set parameters for B; // medium
      for each even smaller thing C in B {
         set parameters for C; // hot, executed a lot
         draw();
      }
   }
}

Vulkan expects the developer to put the frequently-changing parameters
for C into the last descriptor set so that changing it out is always
fast, and then the driver will inline as many descriptor set pointers
as it can, starting from the end. That's the intent behind the spec's
advice for apps to "Place the least frequently changing descriptor
sets near the start of the pipeline layout, and place the descriptor
sets representing the most frequently changing resources near the
end." So, if application developers have done their homework, then
this opimization can be quite effective.

>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c  | 52 ++++++++++++++++++++++++++----------
>  src/amd/common/ac_nir_to_llvm.h  |  5 ++--
>  src/amd/vulkan/radv_cmd_buffer.c | 57 +++++++++++++++++++++++++++++++++++++++-
>  src/amd/vulkan/radv_pipeline.c   |  7 +++++
>  src/amd/vulkan/radv_private.h    |  2 +-
>  5 files changed, 105 insertions(+), 18 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 8ae69d5..493e521 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -537,7 +537,7 @@ static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
>         set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
>  }
>
> -#if 0
> +
>  static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
>                                            uint32_t indirect_offset)
>  {
> @@ -546,7 +546,6 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin
>         ud_info->indirect = true;
>         ud_info->indirect_offset = indirect_offset;
>  }
> -#endif
>
>  static void declare_tess_lds(struct nir_to_llvm_context *ctx)
>  {
> @@ -559,6 +558,7 @@ static void declare_tess_lds(struct nir_to_llvm_context *ctx)
>  struct user_sgpr_info {
>         bool need_ring_offsets;
>         uint8_t sgpr_count;
> +       bool indirect_all_descriptor_sets;
>  };
>
>  static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
> @@ -623,6 +623,8 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
>                 fprintf(stderr, "radv: TODO: add support for indirect sgprs\n");
>                 /* need to add support for indirect descriptor sets */
>                 assert(0);
> +               user_sgpr_info->sgpr_count += 2;
> +               user_sgpr_info->indirect_all_descriptor_sets = true;
>         } else {
>                 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
>         }
> @@ -645,11 +647,16 @@ static void create_function(struct nir_to_llvm_context *ctx)
>         }
>
>         /* 1 for each descriptor set */
> -       for (unsigned i = 0; i < num_sets; ++i) {
> -               if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
> -                       array_params_mask |= (1 << arg_idx);
> -                       arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
> +       if (!user_sgpr_info.indirect_all_descriptor_sets) {
> +               for (unsigned i = 0; i < num_sets; ++i) {
> +                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
> +                               array_params_mask |= (1 << arg_idx);
> +                               arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
> +                       }
>                 }
> +       } else {
> +               array_params_mask |= (1 << arg_idx);
> +               arg_types[arg_idx++] = const_array(const_array(ctx->i8, 1024 * 1024), 32);
>         }
>
>         if (ctx->shader_info->info.needs_push_constants) {
> @@ -804,14 +811,31 @@ static void create_function(struct nir_to_llvm_context *ctx)
>                         ctx->ring_offsets = LLVMGetParam(ctx->main_function, arg_idx++);
>         }
>
> -       for (unsigned i = 0; i < num_sets; ++i) {
> -               if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
> -                       set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
> -                       user_sgpr_idx += 2;
> -                       ctx->descriptor_sets[i] =
> -                               LLVMGetParam(ctx->main_function, arg_idx++);
> -               } else
> -                       ctx->descriptor_sets[i] = NULL;
> +       if (!user_sgpr_info.indirect_all_descriptor_sets) {
> +               for (unsigned i = 0; i < num_sets; ++i) {
> +                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
> +                               set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
> +                               user_sgpr_idx += 2;
> +                               ctx->descriptor_sets[i] =
> +                                       LLVMGetParam(ctx->main_function, arg_idx++);
> +                       } else
> +                               ctx->descriptor_sets[i] = NULL;
> +               }
> +       } else {
> +               uint32_t desc_sgpr_idx = user_sgpr_idx;
> +               LLVMValueRef desc_sets = LLVMGetParam(ctx->main_function, arg_idx++);
> +               set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx, 2);
> +               user_sgpr_idx += 2;
> +
> +               for (unsigned i = 0; i < num_sets; ++i) {
> +                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
> +                               set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
> +                               ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
> +
> +                       } else
> +                               ctx->descriptor_sets[i] = NULL;
> +               }
> +               ctx->shader_info->need_indirect_descriptor_sets = true;
>         }
>
>         if (ctx->shader_info->info.needs_push_constants) {
> diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
> index 7a4065a..401d284 100644
> --- a/src/amd/common/ac_nir_to_llvm.h
> +++ b/src/amd/common/ac_nir_to_llvm.h
> @@ -83,7 +83,8 @@ struct ac_userdata_info {
>  enum ac_ud_index {
>         AC_UD_SCRATCH_RING_OFFSETS = 0,
>         AC_UD_PUSH_CONSTANTS = 1,
> -       AC_UD_SHADER_START = 2,
> +       AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
> +       AC_UD_SHADER_START = 3,
>         AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
>         AC_UD_VS_BASE_VERTEX_START_INSTANCE,
>         AC_UD_VS_LS_TCS_IN_LAYOUT,
> @@ -142,7 +143,7 @@ struct ac_shader_variant_info {
>         unsigned num_user_sgprs;
>         unsigned num_input_sgprs;
>         unsigned num_input_vgprs;
> -
> +       bool need_indirect_descriptor_sets;
>         union {
>                 struct {
>                         struct ac_vs_output_info outinfo;
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index 343e043..31543ef 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1246,7 +1246,7 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
>         struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx];
>         uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
>
> -       if (desc_set_loc->sgpr_idx == -1)
> +       if (desc_set_loc->sgpr_idx == -1 || desc_set_loc->indirect)
>                 return;
>
>         assert(!desc_set_loc->indirect);
> @@ -1314,17 +1314,72 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer)
>  }
>
>  static void
> +radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
> +                                   struct radv_pipeline *pipeline)
> +{
> +       uint32_t size = MAX_SETS * 2 * 4;
> +       uint32_t offset;
> +       void *ptr;
> +
> +       if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size,
> +                                         256, &offset, &ptr))
> +               return;
> +
> +       for (unsigned i = 0; i < MAX_SETS; i++) {
> +               uint32_t *uptr = ((uint32_t *)ptr) + i * 2;
> +               uint64_t set_va = 0;
> +               struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i];
> +               if (set)
> +                       set_va = set->va;
> +               uptr[0] = set_va & 0xffffffff;
> +               uptr[1] = set_va >> 32;
> +       }
> +
> +       uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
> +       va += offset;
> +
> +       if (pipeline->shaders[MESA_SHADER_VERTEX])
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +
> +       if (pipeline->shaders[MESA_SHADER_FRAGMENT])
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +
> +       if (radv_pipeline_has_gs(pipeline))
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_GEOMETRY,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +
> +       if (radv_pipeline_has_tess(pipeline))
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_CTRL,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +
> +       if (radv_pipeline_has_tess(pipeline))
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_EVAL,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +
> +       if (pipeline->shaders[MESA_SHADER_COMPUTE])
> +               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE,
> +                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
> +}
> +
> +static void
>  radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
>                        struct radv_pipeline *pipeline,
>                        VkShaderStageFlags stages)
>  {
>         unsigned i;
> +
>         if (!cmd_buffer->state.descriptors_dirty)
>                 return;
>
>         if (cmd_buffer->state.push_descriptors_dirty)
>                 radv_flush_push_descriptors(cmd_buffer);
>
> +       if (pipeline->need_indirect_descriptor_sets) {
> +               radv_flush_indirect_descriptor_sets(cmd_buffer, pipeline);
> +       }
> +
>         for (i = 0; i < MAX_SETS; i++) {
>                 if (!(cmd_buffer->state.descriptors_dirty & (1 << i)))
>                         continue;
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index cf11362..8e71d59 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2112,6 +2112,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>         calculate_pa_cl_vs_out_cntl(pipeline);
>         calculate_ps_inputs(pipeline);
>
> +       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
> +               if (pipeline->shaders[i]) {
> +                       pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
> +               }
> +       }
> +
>         uint32_t stages = 0;
>         if (radv_pipeline_has_tess(pipeline)) {
>                 stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
> @@ -2270,6 +2276,7 @@ static VkResult radv_compute_pipeline_create(
>                                        pipeline->layout, NULL);
>
>
> +       pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
>         result = radv_pipeline_scratch_init(device, pipeline);
>         if (result != VK_SUCCESS) {
>                 radv_pipeline_destroy(device, pipeline, pAllocator);
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 4ace068..0d60aac 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1007,7 +1007,7 @@ struct radv_pipeline {
>         struct radv_pipeline_layout *                 layout;
>
>         bool                                         needs_data_cache;
> -
> +       bool                                         need_indirect_descriptor_sets;
>         struct radv_shader_variant *                 shaders[MESA_SHADER_STAGES];
>         struct radv_shader_variant *gs_copy_shader;
>         VkShaderStageFlags                           active_stages;
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list