<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, May 19, 2016 at 10:07 AM, Ian Romanick <span dir="ltr"><<a href="mailto:idr@freedesktop.org" target="_blank">idr@freedesktop.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">So... what did we decide for arrays of atomic counters? Do we need an<br>
extra pass for that or ... ?<br>
<br>
Also... how does this handle the possibly unsized (actually<br>
draw-time-sized) array at the end of an SSBO?<br></blockquote><div><br></div><div>This patch only matters for the block index not the offset into the buffer. For atomics, the "block index" isn't allowed to be indirected (Maybe they don't even have one?). For UBOs and SSBOs, we set the correct buffer size in the surface state so the hardware won't let it go outside. This should also work for SSBO unsized arrays.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
For UBOs, I think this patch is definitely sufficient, and I think it<br>
improves things quite a lot for SSBOs. We may need some more, but this<br>
patch is<br>
<br>
Reviewed-by: Ian Romanick <<a href="mailto:ian.d.romanick@intel.com">ian.d.romanick@intel.com</a>><br>
<div class="HOEnZb"><div class="h5"><br>
On 05/19/2016 12:21 AM, Jason Ekstrand wrote:<br>
> This prevents array overflow when the block is actually an array of UBOs or<br>
> SSBOs. On some hardware such as i965, such overflows can cause GPU hangs.<br>
> ---<br>
> src/compiler/glsl/ir_optimization.h | 2 +-<br>
> src/compiler/glsl/linker.cpp | 3 ++-<br>
> src/compiler/glsl/lower_ubo_reference.cpp | 36 +++++++++++++++++++++++++++----<br>
> src/mesa/drivers/dri/i965/brw_compiler.c | 1 +<br>
> src/mesa/main/mtypes.h | 3 +++<br>
> 5 files changed, 39 insertions(+), 6 deletions(-)<br>
><br>
> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h<br>
> index 5fc2740..4afa37e 100644<br>
> --- a/src/compiler/glsl/ir_optimization.h<br>
> +++ b/src/compiler/glsl/ir_optimization.h<br>
> @@ -123,7 +123,7 @@ bool lower_clip_distance(gl_shader *shader);<br>
> void lower_output_reads(unsigned stage, exec_list *instructions);<br>
> bool lower_packing_builtins(exec_list *instructions, int op_mask);<br>
> void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);<br>
> -void lower_ubo_reference(struct gl_shader *shader);<br>
> +void lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices);<br>
> void lower_packed_varyings(void *mem_ctx,<br>
> unsigned locations_used, ir_variable_mode mode,<br>
> unsigned gs_input_vertices, gl_shader *shader,<br>
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp<br>
> index 71a71df..07c8263 100644<br>
> --- a/src/compiler/glsl/linker.cpp<br>
> +++ b/src/compiler/glsl/linker.cpp<br>
> @@ -4879,7 +4879,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)<br>
> &ctx->Const.ShaderCompilerOptions[i];<br>
><br>
> if (options->LowerBufferInterfaceBlocks)<br>
> - lower_ubo_reference(prog->_LinkedShaders[i]);<br>
> + lower_ubo_reference(prog->_LinkedShaders[i],<br>
> + options->ClampBlockIndicesToArrayBounds);<br>
><br>
> if (options->LowerShaderSharedVariables)<br>
> lower_shared_reference(prog->_LinkedShaders[i],<br>
> diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp<br>
> index 1a0140f..749deed 100644<br>
> --- a/src/compiler/glsl/lower_ubo_reference.cpp<br>
> +++ b/src/compiler/glsl/lower_ubo_reference.cpp<br>
> @@ -44,8 +44,10 @@ namespace {<br>
> class lower_ubo_reference_visitor :<br>
> public lower_buffer_access::lower_buffer_access {<br>
> public:<br>
> - lower_ubo_reference_visitor(struct gl_shader *shader)<br>
> - : shader(shader), struct_field(NULL), variable(NULL)<br>
> + lower_ubo_reference_visitor(struct gl_shader *shader,<br>
> + bool clamp_block_indices)<br>
> + : shader(shader), clamp_block_indices(clamp_block_indices),<br>
> + struct_field(NULL), variable(NULL)<br>
> {<br>
> }<br>
><br>
> @@ -104,6 +106,7 @@ public:<br>
> ir_visitor_status visit_enter(ir_call *ir);<br>
><br>
> struct gl_shader *shader;<br>
> + bool clamp_block_indices;<br>
> struct gl_uniform_buffer_variable *ubo_var;<br>
> const struct glsl_struct_field *struct_field;<br>
> ir_variable *variable;<br>
> @@ -242,6 +245,26 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,<br>
> return NULL;<br>
> }<br>
><br>
> +static ir_rvalue *<br>
> +clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)<br>
> +{<br>
> + assert(type->is_array());<br>
> +<br>
> + const unsigned array_size = type->arrays_of_arrays_size();<br>
> +<br>
> + ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);<br>
> + max_index->type = index->type;<br>
> +<br>
> + ir_constant *zero = new(mem_ctx) ir_constant(0);<br>
> + zero->type = index->type;<br>
> +<br>
> + if (index->type->base_type == GLSL_TYPE_INT)<br>
> + index = max2(index, zero);<br>
> + index = min2(index, max_index);<br>
> +<br>
> + return index;<br>
> +}<br>
> +<br>
> void<br>
> lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,<br>
> ir_variable *var,<br>
> @@ -258,6 +281,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,<br>
> interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,<br>
> deref, &nonconst_block_index);<br>
><br>
> + if (nonconst_block_index && clamp_block_indices) {<br>
> + nonconst_block_index =<br>
> + clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);<br>
> + }<br>
> +<br>
> /* Locate the block by interface name */<br>
> unsigned num_blocks;<br>
> struct gl_uniform_block **blocks;<br>
> @@ -1062,9 +1090,9 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)<br>
> } /* unnamed namespace */<br>
><br>
> void<br>
> -lower_ubo_reference(struct gl_shader *shader)<br>
> +lower_ubo_reference(struct gl_shader *shader, bool clamp_block_indices)<br>
> {<br>
> - lower_ubo_reference_visitor v(shader);<br>
> + lower_ubo_reference_visitor v(shader, clamp_block_indices);<br>
><br>
> /* Loop over the instructions lowering references, because we take<br>
> * a deref of a UBO array using a UBO dereference as the index will<br>
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c<br>
> index 82131db..3f17589 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_compiler.c<br>
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.c<br>
> @@ -188,6 +188,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)<br>
> }<br>
><br>
> compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;<br>
> + compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;<br>
> }<br>
><br>
> compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;<br>
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h<br>
> index 569e0ac..24f442f 100644<br>
> --- a/src/mesa/main/mtypes.h<br>
> +++ b/src/mesa/main/mtypes.h<br>
> @@ -2949,6 +2949,9 @@ struct gl_shader_compiler_options<br>
><br>
> GLboolean LowerBufferInterfaceBlocks; /**< Lower UBO and SSBO access to intrinsics. */<br>
><br>
> + /** Clamp UBO and SSBO block indices so they don't go out-of-bounds. */<br>
> + GLboolean ClampBlockIndicesToArrayBounds;<br>
> +<br>
> GLboolean LowerShaderSharedVariables; /**< Lower compute shader shared<br>
> * variable access to intrinsics. */<br>
><br>
><br>
<br>
</div></div></blockquote></div><br></div></div>