[Mesa-dev] [PATCH v2] radeonsi/nir: gather buffers declared more accurately and use const fast path

Marek Olšák maraeo at gmail.com
Mon Apr 2 01:06:03 UTC 2018


On Fri, Mar 30, 2018 at 4:18 AM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:

> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
> setting the more accurate masks for some builtin uniforms for now as
> it causes some piglit regressions.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c     |  7 +++
>  src/gallium/drivers/radeonsi/si_shader_nir.c | 90
> ++++++++++++++++++++++++++--
>  2 files changed, 91 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 714415edba7..13b588eb68f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct
> si_shader_context *ctx, int i)
>  static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef
> index)
>  {
>         struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +       struct si_shader_selector *sel = ctx->shader->selector;
> +
>         LLVMValueRef ptr = LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
> +       if (sel->info.const_buffers_declared == 1 &&
> +           sel->info.shader_buffers_declared == 0) {
> +               return load_const_buffer_desc_fast_path(ctx);
> +       }
> +
>         index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
>         index = LLVMBuildAdd(ctx->ac.builder, index,
>                              LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS,
> 0), "");
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 437eefc54d0..e116c23815b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -600,23 +600,98 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>
>         info->num_outputs = num_outputs;
>
> +       struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> +                                              _mesa_key_pointer_equal);
> +
> +       unsigned ubo_idx = 1;
>         nir_foreach_variable(variable, &nir->uniforms) {
>                 const struct glsl_type *type = variable->type;
>                 enum glsl_base_type base_type =
>                         glsl_get_base_type(glsl_without_array(type));
>                 unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
>
> +               /* Gather buffers declared bitmasks. Note: radeonsi doesn't
> +                * really use the mask (other than ubo_idx == 1 for regular
> +                * uniforms) its really only used for getting the buffer
> count
> +                * so we don't need to worry about the ordering.
> +                */
> +               if (variable->interface_type != NULL) {
> +                       if (variable->data.mode == nir_var_uniform) {
> +
> +                               unsigned block_count;
> +                               if (base_type != GLSL_TYPE_INTERFACE) {
> +                                       struct set_entry *entry =
> +                                               _mesa_set_search(ubo_set,
> variable->interface_type);
> +
> +                                       /* Check if we have already
> processed
> +                                        * a member from this ubo.
> +                                        */
> +                                       if (entry)
> +                                               continue;
> +
> +                                       block_count = 1;
> +                               } else {
> +                                       block_count = aoa_size;
> +                               }
> +
> +                               info->const_buffers_declared |=
> u_bit_consecutive(ubo_idx, block_count);
> +                               ubo_idx += block_count;
> +
> +                               _mesa_set_add(ubo_set,
> variable->interface_type);
> +                       }
> +
> +                       if (variable->data.mode == nir_var_shader_storage)
> {
> +                               /* TODO: make this more accurate */
> +                               info->shader_buffers_declared =
> +                                       u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
> +                       }
> +
> +                       continue;
> +               }
> +
>                 /* We rely on the fact that nir_lower_samplers_as_deref has
>                  * eliminated struct dereferences.
>                  */
> -               if (base_type == GLSL_TYPE_SAMPLER)
> +               if (base_type == GLSL_TYPE_SAMPLER) {
>                         info->samplers_declared |=
>                                 u_bit_consecutive(variable->data.binding,
> aoa_size);
> -               else if (base_type == GLSL_TYPE_IMAGE)
> +
> +                       if (variable->data.bindless) {
> +                               info->const_buffers_declared |= 1;
> +                               info->const_file_max[0] +=
> +                                       glsl_count_attribute_slots(type,
> false);
> +                       }
> +               } else if (base_type == GLSL_TYPE_IMAGE) {
>                         info->images_declared |=
>                                 u_bit_consecutive(variable->data.binding,
> aoa_size);
> +
> +                       if (variable->data.bindless) {
> +                               info->const_buffers_declared |= 1;
> +                               info->const_file_max[0] +=
> +                                       glsl_count_attribute_slots(type,
> false);
> +                       }
> +               } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
> +                       if (strncmp(variable->name, "state.", 6) == 0 ||
> +                           strncmp(variable->name, "gl_", 3) == 0) {
> +                               /* FIXME: figure out why piglit tests with
> builtin
> +                                * uniforms are failing without this.
> +                                */
> +                               info->const_buffers_declared =
> +                                       u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
> +                       } else {
> +                               info->const_buffers_declared |= 1;
> +                               info->const_file_max[0] +=
> +                                       glsl_count_attribute_slots(type,
> false);
> +                       }
> +               }
>         }
>
> +       _mesa_set_destroy(ubo_set, NULL);
> +
> +       /* This is the max index not max count so we adjust it here */
> +       if (info->const_file_max[0] != 0)
> +               info->const_file_max[0] -= 1;
>

const_file_max should be -1 by default, so perhaps this can be done
unconditionally.

Other than that:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180401/cdf75099/attachment-0001.html>


More information about the mesa-dev mailing list