[Mesa-dev] [PATCH] glsl/linker: add check for compute shared memory size

Sat Oct 7 05:53:51 UTC 2017

Reviewed-by: Timothy Arceri <tarceri at itsqueeze.com>

On 07/10/17 07:17, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> Unlike uniforms, the limit on shared memory size is not called out
> explicitly in the list of things that cause linker errors, but presumably
> that's just an oversight in the spec.
> 
> Fixes dEQP-GLES31.functional.debug.negative_coverage.{callbacks,get_error,log}.compute.exceed_shared_memory_size_limit
> ---
>   src/compiler/glsl/ir_optimization.h          |  5 +++--
>   src/compiler/glsl/linker.cpp                 |  3 +--
>   src/compiler/glsl/lower_shared_reference.cpp | 21 +++++++++++++++++++--
>   3 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
> index 38fb54990ea..eb3ec3b0c7d 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -136,22 +136,23 @@ bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
>   bool lower_noise(exec_list *instructions);
>   bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
>       exec_list *instructions, bool lower_input, bool lower_output,
>       bool lower_temp, bool lower_uniform);
>   bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
>   bool lower_const_arrays_to_uniforms(exec_list *instructions, unsigned stage);
>   bool lower_clip_cull_distance(struct gl_shader_program *prog,
>                                 gl_linked_shader *shader);
>   void lower_output_reads(unsigned stage, exec_list *instructions);
>   bool lower_packing_builtins(exec_list *instructions, int op_mask);
> -void lower_shared_reference(struct gl_linked_shader *shader,
> -                            unsigned *shared_size);
> +void lower_shared_reference(struct gl_context *ctx,
> +                            struct gl_shader_program *prog,
> +                            struct gl_linked_shader *shader);
>   void lower_ubo_reference(struct gl_linked_shader *shader,
>                            bool clamp_block_indices, bool use_std430_as_default);
>   void lower_packed_varyings(void *mem_ctx,
>                              unsigned locations_used,
>                              const uint8_t *components,
>                              ir_variable_mode mode,
>                              unsigned gs_input_vertices,
>                              gl_linked_shader *shader,
>                              bool disable_varying_packing, bool xfb_enabled);
>   bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index f352c5385ca..03eb05bf637 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -4650,22 +4650,21 @@ link_varyings_and_uniforms(unsigned first, unsigned last,
>   
>         const struct gl_shader_compiler_options *options =
>            &ctx->Const.ShaderCompilerOptions[i];
>   
>         if (options->LowerBufferInterfaceBlocks)
>            lower_ubo_reference(prog->_LinkedShaders[i],
>                                options->ClampBlockIndicesToArrayBounds,
>                                ctx->Const.UseSTD430AsDefaultPacking);
>   
>         if (i == MESA_SHADER_COMPUTE)
> -         lower_shared_reference(prog->_LinkedShaders[i],
> -                                &prog->Comp.SharedSize);
> +         lower_shared_reference(ctx, prog, prog->_LinkedShaders[i]);
>   
>         lower_vector_derefs(prog->_LinkedShaders[i]);
>         do_vec_index_to_swizzle(prog->_LinkedShaders[i]->ir);
>      }
>   
>      return true;
>   }
>   
>   static void
>   linker_optimisation_loop(struct gl_context *ctx, exec_list *ir,
> diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp
> index b9098913af8..a1b3f7df47e 100644
> --- a/src/compiler/glsl/lower_shared_reference.cpp
> +++ b/src/compiler/glsl/lower_shared_reference.cpp
> @@ -26,20 +26,21 @@
>    *
>    * IR lower pass to replace dereferences of compute shader shared variables
>    * with intrinsic function calls.
>    *
>    * This relieves drivers of the responsibility of allocating space for the
>    * shared variables in the shared memory region.
>    */
>   
>   #include "lower_buffer_access.h"
>   #include "ir_builder.h"
> +#include "linker.h"
>   #include "main/macros.h"
>   #include "util/list.h"
>   #include "glsl_parser_extras.h"
>   
>   using namespace ir_builder;
>   
>   namespace {
>   
>   struct var_offset {
>      struct list_head node;
> @@ -471,29 +472,45 @@ lower_shared_reference_visitor::visit_enter(ir_call *ir)
>         base_ir->replace_with(new_ir);
>         return visit_continue_with_parent;
>      }
>   
>      return rvalue_visit(ir);
>   }
>   
>   } /* unnamed namespace */
>   
>   void
> -lower_shared_reference(struct gl_linked_shader *shader, unsigned *shared_size)
> +lower_shared_reference(struct gl_context *ctx,
> +                       struct gl_shader_program *prog,
> +                       struct gl_linked_shader *shader)
>   {
>      if (shader->Stage != MESA_SHADER_COMPUTE)
>         return;
>   
>      lower_shared_reference_visitor v(shader);
>   
>      /* Loop over the instructions lowering references, because we take a deref
>       * of an shared variable array using a shared variable dereference as the
>       * index will produce a collection of instructions all of which have cloned
>       * shared variable dereferences for that array index.
>       */
>      do {
>         v.progress = false;
>         visit_list_elements(&v, shader->ir);
>      } while (v.progress);
>   
> -   *shared_size = v.shared_size;
> +   prog->Comp.SharedSize = v.shared_size;
> +
> +   /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
> +    * specification says:
> +    *
> +    *   "There is a limit to the total size of all variables declared as
> +    *    shared in a single program object. This limit, expressed in units of
> +    *    basic machine units, may be queried as the value of
> +    *    MAX_COMPUTE_SHARED_MEMORY_SIZE."
> +    */
> +   if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
> +      linker_error(prog, "Too much shared memory used (%u/%u)\n",
> +                   prog->Comp.SharedSize,
> +                   ctx->Const.MaxComputeSharedMemorySize);
> +   }
>   }
>