<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <span dir="ltr"><<a href="mailto:tarceri@itsqueeze.com" target="_blank">tarceri@itsqueeze.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip<br>
setting the more accurate masks for some builtin uniforms for now as<br>
it causes some piglit regressions.<br>
---<br>
 src/gallium/drivers/radeonsi/<wbr>si_shader.c     |  8 +++<br>
 src/gallium/drivers/radeonsi/<wbr>si_shader_nir.c | 82 ++++++++++++++++++++++++++--<br>
 2 files changed, 84 insertions(+), 6 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/<wbr>radeonsi/si_shader.c b/src/gallium/drivers/<wbr>radeonsi/si_shader.c<br>
index 62cb7ea7eb5..9a12f9ee8f2 100644<br>
--- a/src/gallium/drivers/<wbr>radeonsi/si_shader.c<br>
+++ b/src/gallium/drivers/<wbr>radeonsi/si_shader.c<br>
@@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)<br>
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)<br>
 {<br>
        struct si_shader_context *ctx = si_shader_context_from_abi(<wbr>abi);<br>
+       struct si_shader_selector *sel = ctx->shader->selector;<br>
+<br>
        LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_<wbr>buffers);<br>
<br>
+       if (sel->info.const_buffers_<wbr>declared == 1 &&<br>
+           sel->info.shader_buffers_<wbr>declared == 0 &&<br>
+           !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) {<br></blockquote><div><br></div><div>You don't have to check SI and LLVM here. (because const_buffers_declared > 1)<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+               return load_const_buffer_desc_fast_<wbr>path(ctx);<br>
+       }<br>
+<br>
        index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);<br>
        index = LLVMBuildAdd(ctx->ac.builder, index,<br>
                             LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");<br>
diff --git a/src/gallium/drivers/<wbr>radeonsi/si_shader_nir.c b/src/gallium/drivers/<wbr>radeonsi/si_shader_nir.c<br>
index 52950668714..595f376f6a2 100644<br>
--- a/src/gallium/drivers/<wbr>radeonsi/si_shader_nir.c<br>
+++ b/src/gallium/drivers/<wbr>radeonsi/si_shader_nir.c<br>
@@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir,<br>
<br>
        info->num_outputs = num_outputs;<br>
<br>
+       struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,<br>
+                                              _mesa_key_pointer_equal);<br>
+<br>
+       unsigned ubo_idx = 1;<br>
        nir_foreach_variable(variable, &nir->uniforms) {<br>
                const struct glsl_type *type = variable->type;<br>
                enum glsl_base_type base_type =<br>
                        glsl_get_base_type(glsl_<wbr>without_array(type));<br>
                unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));<br>
<br>
+               /* Gather buffers declared bitmasks. Note: radeonsi doesn't<br>
+                * really use the mask (other than ubo_idx == 1 for regular<br>
+                * uniforms) its really only used for getting the buffer count<br>
+                * so we don't need to worry about the ordering.<br>
+                */<br>
+               if (variable->interface_type != NULL) {<br>
+                       if (variable->data.mode == nir_var_uniform) {<br>
+<br>
+                               unsigned block_size;<br>
+                               if (base_type != GLSL_TYPE_INTERFACE) {<br>
+                                       struct set_entry *entry =<br>
+                                               _mesa_set_search(ubo_set, variable->interface_type);<br>
+<br>
+                                       /* Check if we have already processed<br>
+                                        * a member from this ubo.<br>
+                                        */<br>
+                                       if (entry)<br>
+                                               continue;<br>
+<br>
+                                       block_size = 1;<br>
+                               } else {<br>
+                                       block_size = aoa_size;<br>
+                               }<br>
+<br>
+                               info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_size);<br>
+                               ubo_idx += block_size;<br></blockquote><div><br></div><div>Can you explain what this does?<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+                               _mesa_set_add(ubo_set, variable->interface_type);<br>
+                       }<br>
+<br>
+                       if (variable->data.mode == nir_var_shader_storage) {<br>
+                               /* TODO: make this more accurate */<br>
+                               info->shader_buffers_declared =<br>
+                                       u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);<br></blockquote><div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+                       }<br>
+<br>
+                       continue;<br>
+               }<br>
+<br>
                /* We rely on the fact that nir_lower_samplers_as_deref has<br>
                 * eliminated struct dereferences.<br>
                 */<br>
-               if (base_type == GLSL_TYPE_SAMPLER)<br>
+               if (base_type == GLSL_TYPE_SAMPLER) {<br>
                        info->samplers_declared |=<br>
                                u_bit_consecutive(variable-><wbr>data.binding, aoa_size);<br>
-               else if (base_type == GLSL_TYPE_IMAGE)<br>
+<br>
+                       if (variable->data.bindless)<br>
+                               info->const_buffers_declared |= 1;<br>
+               } else if (base_type == GLSL_TYPE_IMAGE) {<br>
                        info->images_declared |=<br>
                                u_bit_consecutive(variable-><wbr>data.binding, aoa_size);<br>
+<br>
+                       if (variable->data.bindless)<br>
+                               info->const_buffers_declared |= 1;<br>
+               } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {<br>
+                       if (strncmp(variable->name, "state.", 6) == 0) {<br>
+                               /* FIXME: figure out why piglit tests with builtin<br>
+                                * uniforms are failing without this.<br>
+                                */<br>
+                               info->const_buffers_declared =<br>
+                                       u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);<br></blockquote><div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+                       } else {<br>
+                               info->const_buffers_declared |= 1;<br>
+                               info->const_file_max[0] +=<br>
+                                       glsl_count_attribute_slots(<wbr>type, false);<br>
+                       }<br>
+               }<br>
        }<br>
<br>
+       _mesa_set_destroy(ubo_set, NULL);<br>
+<br>
+       /* This is the max index not max count so we adjust it here */<br>
+       if (info->const_file_max[0] != 0)<br>
+               info->const_file_max[0] -= 1;<br>
+<br>
        info->num_written_clipdistance = nir->info.clip_distance_array_<wbr>size;<br>
        info->num_written_culldistance = nir->info.cull_distance_array_<wbr>size;<br>
        info->clipdist_writemask = u_bit_consecutive(0, info->num_written_<wbr>clipdistance);<br>
@@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,<br>
        if (info->processor == PIPE_SHADER_FRAGMENT)<br>
                info->uses_kill = nir->info.fs.uses_discard;<br>
<br>
-       /* TODO make this more accurate */<br>
-       info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);<br>
-       info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);<br>
-<br>
        func = (struct nir_function *)exec_list_get_head_const(&<wbr>nir->functions);<br>
        nir_foreach_block(block, func->impl) {<br>
                nir_foreach_instr(instr, block)<br>
@@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader *nir,<br>
 void<br>
 si_lower_nir(struct si_shader_selector* sel)<br>
 {<br>
+       /* Disable const buffer fast path for old LLVM versions */<br>
+       if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) {<br>
+               sel->info.const_buffers_<wbr>declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);<br>
+               sel->info.shader_buffers_<wbr>declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);<br>
+       }<br></blockquote><div><br>You can ignore what I said in patch 3.<br><br></div><div>You can do this instead:<br></div><div>if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0)<br></div><div>   const_buffers_declared |= 0x2;<br><br></div></div>This disables the optimization while still significantly decreases overhead in the descriptor upload path. <br><br></div><div class="gmail_extra">Marek<br></div></div>