[Mesa-dev] [PATCH 2/2] [rfc] radv: inline push constants where possible. (v2)

Alex Smith asmith at feralinteractive.com
Fri Jan 12 09:49:56 UTC 2018


Looks like it's working fine here now. One comment inline below.

On 12 January 2018 at 02:43, Dave Airlie <airlied at gmail.com> wrote:

> From: Dave Airlie <airlied at redhat.com>
>
> Instead of putting the push constants into the upload buffer,
> if we have space in the sgprs we can upload the per-stage
> constants into the shaders directly.
>
> This saves a few reads from memory in the meta shaders,
> we should also be able to inline other objects like
> descriptors.
>
> v2: fixup 16->available_sgprs (Samuel)
> fixup dynamic offsets. (Alex)
> bump to 12.
> handle push consts > 32 better, avoid F1 2017 crash
>
> TODO: proper vega support (Samuel)
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c  | 102 ++++++++++++++++++++++++++++++
> +++++----
>  src/amd/common/ac_nir_to_llvm.h  |   5 ++
>  src/amd/common/ac_shader_info.c  |   5 +-
>  src/amd/common/ac_shader_info.h  |   1 +
>  src/amd/vulkan/radv_cmd_buffer.c |  75 +++++++++++++++++++++-------
>  5 files changed, 159 insertions(+), 29 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_
> llvm.c
> index 6c578de3aca..00ad76a82f7 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -92,6 +92,7 @@ struct nir_to_llvm_context {
>         LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>         LLVMValueRef ring_offsets;
>         LLVMValueRef push_constants;
> +       LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST];
>         LLVMValueRef view_index;
>         LLVMValueRef num_work_groups;
>         LLVMValueRef workgroup_ids[3];
> @@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef
> func,
>         LLVMSetFunctionCallConv(func, calling_conv);
>  }
>
> -#define MAX_ARGS 23
> +#define MAX_ARGS 32
>  struct arg_info {
>         LLVMTypeRef types[MAX_ARGS];
>         LLVMValueRef *assign[MAX_ARGS];
> @@ -538,6 +539,8 @@ struct user_sgpr_info {
>         bool need_ring_offsets;
>         uint8_t sgpr_count;
>         bool indirect_all_descriptor_sets;
> +       uint8_t base_inline_push_consts;
> +       uint8_t num_inline_push_consts;
>  };
>
>  static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
> @@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct
> nir_to_llvm_context *ctx,
>         } else {
>                 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask)
> * 2;
>         }
> +
> +       if (ctx->shader_info->info.loads_push_constants) {
> +               uint32_t remaining_sgprs = available_sgprs -
> user_sgpr_info->sgpr_count;
> +               if (!ctx->shader_info->info.has_indirect_push_constants &&
> +                   !ctx->shader_info->info.loads_dynamic_offsets)
> +                       remaining_sgprs += 2;
> +
> +               if (ctx->options->layout->push_constant_size) {
> +                       uint8_t num_32bit_push_consts =
> (ctx->shader_info->info.max_push_constant_used -
> +
> ctx->shader_info->info.min_push_constant_used) / 4;
> +
> +                       if ((ctx->shader_info->info.min_push_constant_used
> / 4) <= 63 &&
> +                           (ctx->shader_info->info.max_push_constant_used
> / 4) <= 63) {
> +                               user_sgpr_info->base_inline_push_consts =
> ctx->shader_info->info.min_push_constant_used / 4;
> +
> +                               if (num_32bit_push_consts <
> remaining_sgprs) {
> +                                       user_sgpr_info->num_inline_push_consts
> = num_32bit_push_consts;
> +                                       if (!ctx->shader_info->info.has_
> indirect_push_constants)
> +
>  ctx->shader_info->info.loads_push_constants = false;
> +                               } else {
> +                                       user_sgpr_info->num_inline_push_consts
> = remaining_sgprs;
> +                               }
> +
> +                               if (user_sgpr_info->num_inline_push_consts
> > AC_UD_MAX_INLINE_PUSH_CONST)
> +                                       user_sgpr_info->num_inline_push_consts
> = AC_UD_MAX_INLINE_PUSH_CONST;
>

This is done after possibly setting loads_push_constants to false, if this
happens shouldn't that still be true?

With that fixed, for the series:

Reviewed-by: Alex Smith <asmith at feralinteractive.com>

Thanks,
Alex


> +                       }
> +               }
> +       }
>  }
>
> +static void
> +declare_inline_push_consts(struct nir_to_llvm_context *ctx,
> +                          gl_shader_stage stage,
> +                          const struct user_sgpr_info *user_sgpr_info,
> +                          struct arg_info *args)
> +{
> +       ctx->shader_info->inline_push_const_mask = (1ULL <<
> user_sgpr_info->num_inline_push_consts) - 1;
> +       ctx->shader_info->inline_push_const_base =
> user_sgpr_info->base_inline_push_consts;
> +
> +       for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts;
> i++)
> +               add_arg(args, ARG_SGPR, ctx->ac.i32,
> &ctx->inline_push_consts[i]);
> +
> +}
>  static void
>  declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
>                            gl_shader_stage stage,
> @@ -640,10 +684,14 @@ declare_global_input_sgprs(struct
> nir_to_llvm_context *ctx,
>                 add_array_arg(args, const_array(type, 32), desc_sets);
>         }
>
> -       if (ctx->shader_info->info.loads_push_constants) {
> +       if (ctx->shader_info->info.loads_push_constants ||
> +           ctx->shader_info->info.loads_dynamic_offsets) {
>                 /* 1 for push constants and dynamic descriptors */
>                 add_array_arg(args, type, &ctx->push_constants);
>         }
> +
> +       if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage &&
> previous_stage == MESA_SHADER_VERTEX)))
> +               declare_inline_push_consts(ctx, stage, user_sgpr_info,
> args);
>  }
>
>  static void
> @@ -651,6 +699,7 @@ declare_vs_specific_input_sgprs(struct
> nir_to_llvm_context *ctx,
>                                 gl_shader_stage stage,
>                                 bool has_previous_stage,
>                                 gl_shader_stage previous_stage,
> +                               const struct user_sgpr_info
> *user_sgpr_info,
>                                 struct arg_info *args)
>  {
>         if (!ctx->is_gs_copy_shader &&
> @@ -660,6 +709,7 @@ declare_vs_specific_input_sgprs(struct
> nir_to_llvm_context *ctx,
>                         add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32,
> 16),
>                                 &ctx->vertex_buffers);
>                 }
> +               declare_inline_push_consts(ctx, stage, user_sgpr_info,
> args);
>                 add_arg(args, ARG_SGPR, ctx->ac.i32,
> &ctx->abi.base_vertex);
>                 add_arg(args, ARG_SGPR, ctx->ac.i32,
> &ctx->abi.start_instance);
>                 if (ctx->shader_info->info.vs.needs_draw_id) {
> @@ -693,6 +743,16 @@ declare_tes_input_vgprs(struct nir_to_llvm_context
> *ctx, struct arg_info *args)
>         add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
>  }
>
> +static void
> +set_inline_pushconst_locs(struct nir_to_llvm_context *ctx,
> +                         const struct user_sgpr_info *user_sgpr_info,
> +                         uint8_t *user_sgpr_idx)
> +{
> +       ctx->shader_info->user_sgprs_locs.push_const_base =
> user_sgpr_info->base_inline_push_consts;
> +       for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts;
> i++)
> +               set_loc(&ctx->shader_info->user_sgprs_locs.inline_push_consts[i],
> user_sgpr_idx, 1, 0);
> +}
> +
>  static void
>  set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage
> stage,
>                       bool has_previous_stage, gl_shader_stage
> previous_stage,
> @@ -731,15 +791,21 @@ set_global_input_locs(struct nir_to_llvm_context
> *ctx, gl_shader_stage stage,
>                 ctx->shader_info->need_indirect_descriptor_sets = true;
>         }
>
> -       if (ctx->shader_info->info.loads_push_constants) {
> +       if (ctx->shader_info->info.loads_push_constants ||
> +           ctx->shader_info->info.loads_dynamic_offsets) {
>                 set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx,
> 2);
>         }
> +
> +
> +       if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage &&
> previous_stage == MESA_SHADER_VERTEX)))
> +               set_inline_pushconst_locs(ctx, user_sgpr_info,
> user_sgpr_idx);
>  }
>
>  static void
>  set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
>                            gl_shader_stage stage, bool has_previous_stage,
>                            gl_shader_stage previous_stage,
> +                          const struct user_sgpr_info *user_sgpr_info,
>                            uint8_t *user_sgpr_idx)
>  {
>         if (!ctx->is_gs_copy_shader &&
> @@ -750,6 +816,7 @@ set_vs_specific_input_locs(struct nir_to_llvm_context
> *ctx,
>                                        user_sgpr_idx, 2);
>                 }
>
> +               set_inline_pushconst_locs(ctx, user_sgpr_info,
> user_sgpr_idx);
>                 unsigned vs_num = 2;
>                 if (ctx->shader_info->info.vs.needs_draw_id)
>                         vs_num++;
> @@ -805,7 +872,7 @@ static void create_function(struct nir_to_llvm_context
> *ctx,
>                                            previous_stage, &user_sgpr_info,
>                                            &args, &desc_sets);
>                 declare_vs_specific_input_sgprs(ctx, stage,
> has_previous_stage,
> -                                               previous_stage, &args);
> +                                               previous_stage,
> &user_sgpr_info, &args);
>
>                 if (ctx->shader_info->info.needs_multiview_view_index ||
> (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls &&
> ctx->options->key.has_multiview_view_index))
>                         add_arg(&args, ARG_SGPR, ctx->ac.i32,
> &ctx->view_index);
> @@ -838,7 +905,7 @@ static void create_function(struct nir_to_llvm_context
> *ctx,
>                                                    &desc_sets);
>                         declare_vs_specific_input_sgprs(ctx, stage,
>                                                         has_previous_stage,
> -                                                       previous_stage,
> &args);
> +                                                       previous_stage,
> &user_sgpr_info, &args);
>
>                         add_arg(&args, ARG_SGPR, ctx->ac.i32,
>                                 &ctx->ls_out_layout);
> @@ -934,7 +1001,7 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
>                         } else {
>                                 declare_vs_specific_input_sgprs(ctx,
> stage,
>
> has_previous_stage,
> -
>  previous_stage,
> +
>  previous_stage, &user_sgpr_info,
>                                                                 &args);
>                         }
>
> @@ -1076,7 +1143,7 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
>                 break;
>         case MESA_SHADER_VERTEX:
>                 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
> -                                          previous_stage, &user_sgpr_idx);
> +                                          previous_stage,
> &user_sgpr_info, &user_sgpr_idx);
>                 if (ctx->view_index)
>                         set_loc_shader(ctx, AC_UD_VIEW_INDEX,
> &user_sgpr_idx, 1);
>                 if (ctx->options->key.vs.as_ls) {
> @@ -1088,7 +1155,7 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
>                 break;
>         case MESA_SHADER_TESS_CTRL:
>                 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
> -                                          previous_stage, &user_sgpr_idx);
> +                                          previous_stage,
> &user_sgpr_info, &user_sgpr_idx);
>                 if (has_previous_stage)
>                         set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
>                                        &user_sgpr_idx, 1);
> @@ -1108,6 +1175,7 @@ static void create_function(struct
> nir_to_llvm_context *ctx,
>                                 set_vs_specific_input_locs(ctx, stage,
>
>  has_previous_stage,
>                                                            previous_stage,
> +                                                          &user_sgpr_info,
>                                                            &user_sgpr_idx);
>                         else
>                                 set_loc_shader(ctx,
> AC_UD_TES_OFFCHIP_LAYOUT,
> @@ -2371,9 +2439,23 @@ static LLVMValueRef visit_load_push_constant(struct
> nir_to_llvm_context *ctx,
>                                               nir_intrinsic_instr *instr)
>  {
>         LLVMValueRef ptr, addr;
> +       LLVMValueRef src0 = get_src(ctx->nir, instr->src[0]);
> +       unsigned index = nir_intrinsic_base(instr);
> +
> +       if (LLVMIsConstant(src0)) {
> +               unsigned array_index = index;
> +               array_index += LLVMConstIntGetZExtValue(src0);
> +               array_index /= 4;
> +
> +               array_index -= ctx->shader_info->inline_push_const_base;
> +               uint64_t bits = (((1ULL << instr->num_components) - 1) <<
> array_index);
> +               if ((bits & ctx->shader_info->inline_push_const_mask) ==
> bits) {
> +                       return ac_build_gather_values(&ctx->ac,
> &ctx->inline_push_consts[array_index], instr->num_components);
> +               }
> +       }
>
> -       addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
> -       addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir,
> instr->src[0]), "");
> +       addr = LLVMConstInt(ctx->ac.i32, index, 0);
> +       addr = LLVMBuildAdd(ctx->builder, addr, src0, "");
>
>         ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
>         ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
> diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_
> llvm.h
> index b3ad0a09857..22330fdfbc4 100644
> --- a/src/amd/common/ac_nir_to_llvm.h
> +++ b/src/amd/common/ac_nir_to_llvm.h
> @@ -127,10 +127,13 @@ enum ac_ud_index {
>
>  // Match MAX_SETS from radv_descriptor_set.h
>  #define AC_UD_MAX_SETS MAX_SETS
> +#define AC_UD_MAX_INLINE_PUSH_CONST 12
>
>  struct ac_userdata_locations {
>         struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];
>         struct ac_userdata_info shader_data[AC_UD_MAX_UD];
> +       struct ac_userdata_info inline_push_consts[AC_UD_MAX_
> INLINE_PUSH_CONST];
> +       uint8_t push_const_base;
>  };
>
>  struct ac_vs_output_info {
> @@ -156,6 +159,8 @@ struct ac_shader_variant_info {
>         unsigned num_user_sgprs;
>         unsigned num_input_sgprs;
>         unsigned num_input_vgprs;
> +       uint64_t inline_push_const_mask;
> +       uint32_t inline_push_const_base;
>         bool need_indirect_descriptor_sets;
>         struct {
>                 struct {
> diff --git a/src/amd/common/ac_shader_info.c b/src/amd/common/ac_shader_
> info.c
> index 18fa9e1c94c..fbb46684ae3 100644
> --- a/src/amd/common/ac_shader_info.c
> +++ b/src/amd/common/ac_shader_info.c
> @@ -179,9 +179,10 @@ ac_nir_shader_info_pass(struct nir_shader *nir,
>  {
>         struct nir_function *func = (struct nir_function
> *)exec_list_get_head(&nir->functions);
>
> -
> -       if (options->layout->dynamic_offset_count)
> +       if (options->layout->dynamic_offset_count) {
>                 info->loads_push_constants = true;
> +               info->loads_dynamic_offsets = true;
> +       }
>
>         nir_foreach_variable(variable, &nir->inputs)
>                 gather_info_input_decl(nir, options, variable, info);
> diff --git a/src/amd/common/ac_shader_info.h b/src/amd/common/ac_shader_
> info.h
> index e35cde0ca97..e8ea33f2e3a 100644
> --- a/src/amd/common/ac_shader_info.h
> +++ b/src/amd/common/ac_shader_info.h
> @@ -32,6 +32,7 @@ struct ac_shader_info {
>         uint8_t min_push_constant_used;
>         uint8_t max_push_constant_used;
>         bool has_indirect_push_constants;
> +       bool loads_dynamic_offsets;
>         bool loads_push_constants;
>         bool needs_multiview_view_index;
>         bool uses_invocation_id;
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_
> buffer.c
> index d870a9bedb3..e1953a3a095 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1807,6 +1807,27 @@ radv_flush_descriptors(struct radv_cmd_buffer
> *cmd_buffer,
>         assert(cmd_buffer->cs->cdw <= cdw_max);
>  }
>
> +static struct ac_userdata_info *
> +radv_lookup_push_const_sgpr(struct radv_shader_variant *shader,
> +                           int idx)
> +{
> +       idx -= shader->info.user_sgprs_locs.push_const_base;
> +       return &shader->info.user_sgprs_locs.inline_push_consts[idx];
> +}
> +
> +static void
> +radv_emit_inline_pushconsts(struct radv_cmd_buffer *cmd_buffer,
> +                           struct radv_shader_variant *shader,
> +                           unsigned base_reg,
> +                           int idx, int count, uint32_t *values)
> +{
> +       struct ac_userdata_info *loc = radv_lookup_push_const_sgpr(shader,
> idx);
> +       assert (loc->sgpr_idx == -1);
> +       assert (!loc->indirect);
> +       radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx *
> 4, count);
> +       radeon_emit_array(cmd_buffer->cs, values, count);
> +}
> +
>  static void
>  radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
>                      struct radv_pipeline *pipeline,
> @@ -1816,36 +1837,56 @@ radv_flush_constants(struct radv_cmd_buffer
> *cmd_buffer,
>         unsigned offset;
>         void *ptr;
>         uint64_t va;
> +       bool need_push_constants = false;
>
>         stages &= cmd_buffer->push_constant_stages;
>         if (!stages ||
>             (!layout->push_constant_size && !layout->dynamic_offset_count)
> )
>                 return;
>
> -       if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
> layout->push_constant_size +
> -                                         16 *
> layout->dynamic_offset_count,
> -                                         256, &offset, &ptr))
> -               return;
> +       radv_foreach_stage(stage, stages) {
> +               if (!pipeline->shaders[stage])
> +                       continue;
> +
> +               need_push_constants |= pipeline->shaders[stage]->
> info.info.loads_push_constants;
> +               need_push_constants |= pipeline->shaders[stage]->
> info.info.loads_dynamic_offsets;
>
> -       memcpy(ptr, cmd_buffer->push_constants,
> layout->push_constant_size);
> -       if (layout->dynamic_offset_count) {
> -               memcpy((char*)ptr + layout->push_constant_size,
> cmd_buffer->dynamic_buffers,
> -                      16 * layout->dynamic_offset_count);
> +               uint32_t mask = pipeline->shaders[stage]->
> info.inline_push_const_mask;
> +               uint32_t base_reg = pipeline->user_data_0[stage];
> +               while (mask) {
> +                       int start, count;
> +                       u_bit_scan_consecutive_range(&mask, &start,
> &count);
> +                       start += pipeline->shaders[stage]->
> info.inline_push_const_base;
> +                       radv_emit_inline_pushconsts(cmd_buffer,
> pipeline->shaders[stage], base_reg,
> +                                                   start, count,
> (uint32_t *)&cmd_buffer->push_constants[start * 4]);
> +               }
>         }
>
> -       va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> -       va += offset;
> +       if (need_push_constants) {
> +               if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
> layout->push_constant_size +
> +                                                 16 *
> layout->dynamic_offset_count,
> +                                                 256, &offset, &ptr))
> +                       return;
>
> -       MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer-
> >device->ws,
> -                                                          cmd_buffer->cs,
> MESA_SHADER_STAGES * 4);
> +               memcpy(ptr, cmd_buffer->push_constants,
> layout->push_constant_size);
> +               if (layout->dynamic_offset_count) {
> +                       memcpy((char*)ptr + layout->push_constant_size,
> cmd_buffer->dynamic_buffers,
> +                              16 * layout->dynamic_offset_count);
> +               }
>
> -       radv_foreach_stage(stage, stages) {
> -               if (pipeline->shaders[stage]) {
> -                       radv_emit_userdata_address(cmd_buffer, pipeline,
> stage,
> -                                                  AC_UD_PUSH_CONSTANTS,
> va);
> +               va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> +               va += offset;
> +
> +               MAYBE_UNUSED unsigned cdw_max =
> radeon_check_space(cmd_buffer->device->ws,
> +
> cmd_buffer->cs, MESA_SHADER_STAGES * 4);
> +
> +               radv_foreach_stage(stage, stages) {
> +                       if (pipeline->shaders[stage]) {
> +                               radv_emit_userdata_address(cmd_buffer,
> pipeline, stage,
> +
> AC_UD_PUSH_CONSTANTS, va);
> +                       }
>                 }
>         }
> -
>         cmd_buffer->push_constant_stages &= ~stages;
>         assert(cmd_buffer->cs->cdw <= cdw_max);
>  }
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180112/5ffb81a1/attachment-0001.html>


More information about the mesa-dev mailing list