[Mesa-dev] [PATCH 4/4] radeonsi/nir: gather buffers declared more accurately and use const fast path

Marek Olšák maraeo at gmail.com
Fri Mar 30 02:52:25 UTC 2018


On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:

> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
> setting the more accurate masks for some builtin uniforms for now as
> it causes some piglit regressions.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c     |  8 +++
>  src/gallium/drivers/radeonsi/si_shader_nir.c | 82
> ++++++++++++++++++++++++++--
>  2 files changed, 84 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 62cb7ea7eb5..9a12f9ee8f2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct
> si_shader_context *ctx, int i)
>  static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef
> index)
>  {
>         struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +       struct si_shader_selector *sel = ctx->shader->selector;
> +
>         LLVMValueRef ptr = LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
> +       if (sel->info.const_buffers_declared == 1 &&
> +           sel->info.shader_buffers_declared == 0 &&
> +           !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) {
>

You don't have to check SI and LLVM here. (because const_buffers_declared >
1)


> +               return load_const_buffer_desc_fast_path(ctx);
> +       }
> +
>         index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
>         index = LLVMBuildAdd(ctx->ac.builder, index,
>                              LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS,
> 0), "");
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 52950668714..595f376f6a2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>
>         info->num_outputs = num_outputs;
>
> +       struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> +                                              _mesa_key_pointer_equal);
> +
> +       unsigned ubo_idx = 1;
>         nir_foreach_variable(variable, &nir->uniforms) {
>                 const struct glsl_type *type = variable->type;
>                 enum glsl_base_type base_type =
>                         glsl_get_base_type(glsl_without_array(type));
>                 unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
>
> +               /* Gather buffers declared bitmasks. Note: radeonsi doesn't
> +                * really use the mask (other than ubo_idx == 1 for regular
> +                * uniforms) its really only used for getting the buffer
> count
> +                * so we don't need to worry about the ordering.
> +                */
> +               if (variable->interface_type != NULL) {
> +                       if (variable->data.mode == nir_var_uniform) {
> +
> +                               unsigned block_size;
> +                               if (base_type != GLSL_TYPE_INTERFACE) {
> +                                       struct set_entry *entry =
> +                                               _mesa_set_search(ubo_set,
> variable->interface_type);
> +
> +                                       /* Check if we have already
> processed
> +                                        * a member from this ubo.
> +                                        */
> +                                       if (entry)
> +                                               continue;
> +
> +                                       block_size = 1;
> +                               } else {
> +                                       block_size = aoa_size;
> +                               }
> +
> +                               info->const_buffers_declared |=
> u_bit_consecutive(ubo_idx, block_size);
> +                               ubo_idx += block_size;
>

Can you explain what this does?


> +
> +                               _mesa_set_add(ubo_set,
> variable->interface_type);
> +                       }
> +
> +                       if (variable->data.mode == nir_var_shader_storage)
> {
> +                               /* TODO: make this more accurate */
> +                               info->shader_buffers_declared =
> +                                       u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
>
+                       }
> +
> +                       continue;
> +               }
> +
>                 /* We rely on the fact that nir_lower_samplers_as_deref has
>                  * eliminated struct dereferences.
>                  */
> -               if (base_type == GLSL_TYPE_SAMPLER)
> +               if (base_type == GLSL_TYPE_SAMPLER) {
>                         info->samplers_declared |=
>                                 u_bit_consecutive(variable->data.binding,
> aoa_size);
> -               else if (base_type == GLSL_TYPE_IMAGE)
> +
> +                       if (variable->data.bindless)
> +                               info->const_buffers_declared |= 1;
> +               } else if (base_type == GLSL_TYPE_IMAGE) {
>                         info->images_declared |=
>                                 u_bit_consecutive(variable->data.binding,
> aoa_size);
> +
> +                       if (variable->data.bindless)
> +                               info->const_buffers_declared |= 1;
> +               } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
> +                       if (strncmp(variable->name, "state.", 6) == 0) {
> +                               /* FIXME: figure out why piglit tests with
> builtin
> +                                * uniforms are failing without this.
> +                                */
> +                               info->const_buffers_declared =
> +                                       u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
>
+                       } else {
> +                               info->const_buffers_declared |= 1;
> +                               info->const_file_max[0] +=
> +                                       glsl_count_attribute_slots(type,
> false);
> +                       }
> +               }
>         }
>
> +       _mesa_set_destroy(ubo_set, NULL);
> +
> +       /* This is the max index not max count so we adjust it here */
> +       if (info->const_file_max[0] != 0)
> +               info->const_file_max[0] -= 1;
> +
>         info->num_written_clipdistance = nir->info.clip_distance_array_
> size;
>         info->num_written_culldistance = nir->info.cull_distance_array_
> size;
>         info->clipdist_writemask = u_bit_consecutive(0, info->num_written_
> clipdistance);
> @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>         if (info->processor == PIPE_SHADER_FRAGMENT)
>                 info->uses_kill = nir->info.fs.uses_discard;
>
> -       /* TODO make this more accurate */
> -       info->const_buffers_declared = u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
> -       info->shader_buffers_declared = u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
> -
>         func = (struct nir_function *)exec_list_get_head_const(&
> nir->functions);
>         nir_foreach_block(block, func->impl) {
>                 nir_foreach_instr(instr, block)
> @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>  void
>  si_lower_nir(struct si_shader_selector* sel)
>  {
> +       /* Disable const buffer fast path for old LLVM versions */
> +       if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) {
> +               sel->info.const_buffers_declared = u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
> +               sel->info.shader_buffers_declared = u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
> +       }
>

You can ignore what I said in patch 3.

You can do this instead:
if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0)
   const_buffers_declared |= 0x2;

This disables the optimization while still significantly decreases overhead
in the descriptor upload path.

Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180329/a3b458a7/attachment-0001.html>


More information about the mesa-dev mailing list