<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com" target="_blank">tarceri@itsqueeze.com</a>> wrote: <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip setting the more accurate masks for some builtin uniforms for now as it causes some piglit regressions. --- src/gallium/drivers/radeonsi/si_shader.c | 8 +++ src/gallium/drivers/radeonsi/si_shader_nir.c | 82 ++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 62cb7ea7eb5..9a12f9ee8f2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct si_shader_selector *sel = ctx->shader->selector; + LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); + if (sel->info.const_buffers_declared == 1 && + sel->info.shader_buffers_declared == 0 && + !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) { </blockquote><div> </div><div>You don't have to check SI and LLVM here. (because const_buffers_declared > 1) </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + return load_const_buffer_desc_fast_path(ctx); + } + index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); index = LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 52950668714..595f376f6a2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->num_outputs = num_outputs; + struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + unsigned ubo_idx = 1; nir_foreach_variable(variable, &nir->uniforms) { const struct glsl_type *type = variable->type; enum glsl_base_type base_type = glsl_get_base_type(glsl_without_array(type)); unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); + /* Gather buffers declared bitmasks. Note: radeonsi doesn't + * really use the mask (other than ubo_idx == 1 for regular + * uniforms) its really only used for getting the buffer count + * so we don't need to worry about the ordering. + */ + if (variable->interface_type != NULL) { + if (variable->data.mode == nir_var_uniform) { + + unsigned block_size; + if (base_type != GLSL_TYPE_INTERFACE) { + struct set_entry *entry = + _mesa_set_search(ubo_set, variable->interface_type); + + /* Check if we have already processed + * a member from this ubo. + */ + if (entry) + continue; + + block_size = 1; + } else { + block_size = aoa_size; + } + + info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_size); + ubo_idx += block_size; </blockquote><div> </div><div>Can you explain what this does? </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + + _mesa_set_add(ubo_set, variable->interface_type); + } + + if (variable->data.mode == nir_var_shader_storage) { + /* TODO: make this more accurate */ + info->shader_buffers_declared = + u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); </blockquote><div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + } + + continue; + } + /* We rely on the fact that nir_lower_samplers_as_deref has * eliminated struct dereferences. */ - if (base_type == GLSL_TYPE_SAMPLER) + if (base_type == GLSL_TYPE_SAMPLER) { info->samplers_declared |= u_bit_consecutive(variable->data.binding, aoa_size); - else if (base_type == GLSL_TYPE_IMAGE) + + if (variable->data.bindless) + info->const_buffers_declared |= 1; + } else if (base_type == GLSL_TYPE_IMAGE) { info->images_declared |= u_bit_consecutive(variable->data.binding, aoa_size); + + if (variable->data.bindless) + info->const_buffers_declared |= 1; + } else if (base_type != GLSL_TYPE_ATOMIC_UINT) { + if (strncmp(variable->name, "state.", 6) == 0) { + /* FIXME: figure out why piglit tests with builtin + * uniforms are failing without this. + */ + info->const_buffers_declared = + u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); </blockquote><div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + } else { + info->const_buffers_declared |= 1; + info->const_file_max[0] += + glsl_count_attribute_slots(type, false); + } + } } + _mesa_set_destroy(ubo_set, NULL); + + /* This is the max index not max count so we adjust it here */ + if (info->const_file_max[0] != 0) + info->const_file_max[0] -= 1; + info->num_written_clipdistance = nir->info.clip_distance_array_size; info->num_written_culldistance = nir->info.cull_distance_array_size; info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, if (info->processor == PIPE_SHADER_FRAGMENT) info->uses_kill = nir->info.fs.uses_discard; - /* TODO make this more accurate */ - info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); - info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); - func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block(block, func->impl) { nir_foreach_instr(instr, block) @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader *nir, void si_lower_nir(struct si_shader_selector* sel) { + /* Disable const buffer fast path for old LLVM versions */ + if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) { + sel->info.const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); + sel->info.shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); + } </blockquote><div> You can ignore what I said in patch 3. </div><div>You can do this instead: </div><div>if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0) </div><div> const_buffers_declared |= 0x2; </div></div>This disables the optimization while still significantly decreases overhead in the descriptor upload path. </div><div class="gmail_extra">Marek </div></div>