[Mesa-dev] [PATCH 4/4] radeonsi/nir: gather buffers declared more accurately and use const fast path
Timothy Arceri
tarceri at itsqueeze.com
Fri Mar 30 05:50:57 UTC 2018
On 30/03/18 13:52, Marek Olšák wrote:
> On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarceri at itsqueeze.com
> <mailto:tarceri at itsqueeze.com>> wrote:
>
> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
> setting the more accurate masks for some builtin uniforms for now as
> it causes some piglit regressions.
> ---
> src/gallium/drivers/radeonsi/si_shader.c | 8 +++
> src/gallium/drivers/radeonsi/si_shader_nir.c | 82
> ++++++++++++++++++++++++++--
> 2 files changed, 84 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 62cb7ea7eb5..9a12f9ee8f2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2377,8 +2377,16 @@ static LLVMValueRef
> load_const_buffer_desc(struct si_shader_context *ctx, int i)
> static LLVMValueRef load_ubo(struct ac_shader_abi *abi,
> LLVMValueRef index)
> {
> struct si_shader_context *ctx =
> si_shader_context_from_abi(abi);
> + struct si_shader_selector *sel = ctx->shader->selector;
> +
> LLVMValueRef ptr = LLVMGetParam(ctx->main_fn,
> ctx->param_const_and_shader_buffers);
>
> + if (sel->info.const_buffers_declared == 1 &&
> + sel->info.shader_buffers_declared == 0 &&
> + !(ctx->screen->info.chip_class == SI && HAVE_LLVM <
> 0x0600)) {
>
>
> You don't have to check SI and LLVM here. (because
> const_buffers_declared > 1)
Right. Will fix.
>
> + return load_const_buffer_desc_fast_path(ctx);
> + }
> +
> index = si_llvm_bound_index(ctx, index,
> ctx->num_const_buffers);
> index = LLVMBuildAdd(ctx->ac.builder, index,
> LLVMConstInt(ctx->i32,
> SI_NUM_SHADER_BUFFERS, 0), "");
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 52950668714..595f376f6a2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct
> nir_shader *nir,
>
> info->num_outputs = num_outputs;
>
> + struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> +
> + unsigned ubo_idx = 1;
> nir_foreach_variable(variable, &nir->uniforms) {
> const struct glsl_type *type = variable->type;
> enum glsl_base_type base_type =
> glsl_get_base_type(glsl_without_array(type));
> unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
>
> + /* Gather buffers declared bitmasks. Note: radeonsi
> doesn't
> + * really use the mask (other than ubo_idx == 1 for
> regular
> + * uniforms) its really only used for getting the
> buffer count
> + * so we don't need to worry about the ordering.
> + */
> + if (variable->interface_type != NULL) {
> + if (variable->data.mode == nir_var_uniform) {
> +
> + unsigned block_size;
> + if (base_type != GLSL_TYPE_INTERFACE) {
> + struct set_entry *entry =
> +
> _mesa_set_search(ubo_set, variable->interface_type);
> +
> + /* Check if we have already
> processed
> + * a member from this ubo.
> + */
> + if (entry)
> + continue;
> +
> + block_size = 1;
> + } else {
> + block_size = aoa_size;
> + }
> +
> + info->const_buffers_declared |=
> u_bit_consecutive(ubo_idx, block_size);
> + ubo_idx += block_size;
>
>
> Can you explain what this does?
Sets the mask for an array of blocks e.g.
uniform block {
vec4 a;
vec4 b;
} name[4];
Since each block array element is considered a separate buffer. I should
probably rename block_size -> block_count
>
> +
> + _mesa_set_add(ubo_set,
> variable->interface_type);
> + }
> +
> + if (variable->data.mode ==
> nir_var_shader_storage) {
> + /* TODO: make this more accurate */
> + info->shader_buffers_declared =
> + u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
>
> + }
> +
> + continue;
> + }
> +
> /* We rely on the fact that
> nir_lower_samplers_as_deref has
> * eliminated struct dereferences.
> */
> - if (base_type == GLSL_TYPE_SAMPLER)
> + if (base_type == GLSL_TYPE_SAMPLER) {
> info->samplers_declared |=
>
> u_bit_consecutive(variable->data.binding, aoa_size);
> - else if (base_type == GLSL_TYPE_IMAGE)
> +
> + if (variable->data.bindless)
> + info->const_buffers_declared |= 1;
> + } else if (base_type == GLSL_TYPE_IMAGE) {
> info->images_declared |=
>
> u_bit_consecutive(variable->data.binding, aoa_size);
> +
> + if (variable->data.bindless)
> + info->const_buffers_declared |= 1;
> + } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
> + if (strncmp(variable->name, "state.", 6) == 0) {
> + /* FIXME: figure out why piglit
> tests with builtin
> + * uniforms are failing without this.
> + */
> + info->const_buffers_declared =
> + u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
>
> + } else {
> + info->const_buffers_declared |= 1;
> + info->const_file_max[0] +=
> +
> glsl_count_attribute_slots(type, false);
> + }
> + }
> }
>
> + _mesa_set_destroy(ubo_set, NULL);
> +
> + /* This is the max index not max count so we adjust it here */
> + if (info->const_file_max[0] != 0)
> + info->const_file_max[0] -= 1;
> +
> info->num_written_clipdistance =
> nir->info.clip_distance_array_size;
> info->num_written_culldistance =
> nir->info.cull_distance_array_size;
> info->clipdist_writemask = u_bit_consecutive(0,
> info->num_written_clipdistance);
> @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader
> *nir,
> if (info->processor == PIPE_SHADER_FRAGMENT)
> info->uses_kill = nir->info.fs.uses_discard;
>
> - /* TODO make this more accurate */
> - info->const_buffers_declared = u_bit_consecutive(0,
> SI_NUM_CONST_BUFFERS);
> - info->shader_buffers_declared = u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS);
> -
> func = (struct nir_function
> *)exec_list_get_head_const(&nir->functions);
> nir_foreach_block(block, func->impl) {
> nir_foreach_instr(instr, block)
> @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader
> *nir,
> void
> si_lower_nir(struct si_shader_selector* sel)
> {
> + /* Disable const buffer fast path for old LLVM versions */
> + if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) {
> + sel->info.const_buffers_declared =
> u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
> + sel->info.shader_buffers_declared =
> u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
> + }
>
>
> You can ignore what I said in patch 3.
>
> You can do this instead:
> if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0)
> const_buffers_declared |= 0x2;
>
> This disables the optimization while still significantly decreases
> overhead in the descriptor upload path.
Makes sense will change. Thanks!.
>
> Marek
More information about the mesa-dev
mailing list