[Mesa-dev] [PATCH v4 (part2) 20/59] glsl: Add parser/compiler support for std430 interface packing qualifier

Wed Sep 9 03:32:05 PDT 2015

On 31/08/15 08:38, Samuel Iglesias Gonsálvez wrote:
> 
> 
> On 29/08/15 02:27, Jordan Justen wrote:
>> On 2015-08-05 01:30:17, Iago Toral Quiroga wrote:
>>> From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
>>>
>>> This commit also adds functions to calculate std430 base alignment and sizes
>>>
>>> Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
>>> ---
>>>  src/glsl/ast.h                   |   1 +
>>>  src/glsl/ast_to_hir.cpp          |  20 +++++--
>>>  src/glsl/ast_type.cpp            |   1 +
>>>  src/glsl/glsl_parser.yy          |   2 +
>>>  src/glsl/glsl_types.cpp          | 117 +++++++++++++++++++++++++++++++++++++++
>>>  src/glsl/glsl_types.h            |  15 ++++-
>>>  src/glsl/link_uniform_blocks.cpp |  15 ++++-
>>>  src/mesa/main/mtypes.h           |   3 +-
>>>  8 files changed, 165 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/glsl/ast.h b/src/glsl/ast.h
>>> index d8c6cea..ac3f80f 100644
>>> --- a/src/glsl/ast.h
>>> +++ b/src/glsl/ast.h
>>> @@ -491,6 +491,7 @@ struct ast_type_qualifier {
>>>          /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
>>>          /** \{ */
>>>           unsigned std140:1;
>>> +         unsigned std430:1;
>>>           unsigned shared:1;
>>>           unsigned packed:1;
>>>           unsigned column_major:1;
>>> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
>>> index 6e63d86..2dadb03 100644
>>> --- a/src/glsl/ast_to_hir.cpp
>>> +++ b/src/glsl/ast_to_hir.cpp
>>> @@ -2886,11 +2886,12 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
>>>         var->data.depth_layout = ir_depth_layout_none;
>>>  
>>>     if (qual->flags.q.std140 ||
>>> +       qual->flags.q.std430 ||
>>>         qual->flags.q.packed ||
>>>         qual->flags.q.shared) {
>>>        _mesa_glsl_error(loc, state,
>>> -                       "uniform block layout qualifiers std140, packed, and "
>>> -                       "shared can only be applied to uniform blocks, not "
>>> +                       "uniform block layout qualifiers std140, std430, packed, "
>>> +                       "and shared can only be applied to uniform blocks, not "
>>>                         "members");
>>>     }
>>>  
>>> @@ -5655,12 +5656,14 @@ ast_process_structure_or_interface_block(exec_list *instructions,
>>>           const struct ast_type_qualifier *const qual =
>>>              & decl_list->type->qualifier;
>>>           if (qual->flags.q.std140 ||
>>> +             qual->flags.q.std430 ||
>>>               qual->flags.q.packed ||
>>>               qual->flags.q.shared) {
>>>              _mesa_glsl_error(&loc, state,
>>>                               "uniform/shader storage block layout qualifiers "
>>> -                             "std140, packed, and shared can only be applied "
>>> -                             "to uniform/shader storage blocks, not members");
>>> +                             "std140, std430, packed, and shared can only be "
>>> +                             "applied to uniform/shader storage blocks, not "
>>> +                             "members");
>>>           }
>>>  
>>>           if (qual->flags.q.constant) {
>>> @@ -5872,6 +5875,13 @@ ast_interface_block::hir(exec_list *instructions,
>>>                         this->block_name);
>>>     }
>>>  
>>> +   if (!this->layout.flags.q.buffer &&
>>> +       this->layout.flags.q.std430) {
>>> +      _mesa_glsl_error(&loc, state,
>>> +                       "std430 storage block layout qualifier is supported "
>>> +                       "only for shader storage blocks");
>>> +   }
>>> +
>>>     /* The ast_interface_block has a list of ast_declarator_lists.  We
>>>      * need to turn those into ir_variables with an association
>>>      * with this uniform block.
>>> @@ -5881,6 +5891,8 @@ ast_interface_block::hir(exec_list *instructions,
>>>        packing = GLSL_INTERFACE_PACKING_SHARED;
>>>     } else if (this->layout.flags.q.packed) {
>>>        packing = GLSL_INTERFACE_PACKING_PACKED;
>>> +   } else if (this->layout.flags.q.std430) {
>>> +      packing = GLSL_INTERFACE_PACKING_STD430;
>>>     } else {
>>>        /* The default layout is std140.
>>>         */
>>> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
>>> index a4671e2..6e69ba2 100644
>>> --- a/src/glsl/ast_type.cpp
>>> +++ b/src/glsl/ast_type.cpp
>>> @@ -123,6 +123,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
>>>     ubo_layout_mask.flags.q.std140 = 1;
>>>     ubo_layout_mask.flags.q.packed = 1;
>>>     ubo_layout_mask.flags.q.shared = 1;
>>> +   ubo_layout_mask.flags.q.std430 = 1;
>>>  
>>>     ast_type_qualifier ubo_binding_mask;
>>>     ubo_binding_mask.flags.i = 0;
>>> diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
>>> index 2b0c8bd..e1c0f3d 100644
>>> --- a/src/glsl/glsl_parser.yy
>>> +++ b/src/glsl/glsl_parser.yy
>>> @@ -1197,6 +1197,8 @@ layout_qualifier_id:
>>>              $$.flags.q.std140 = 1;
>>>           } else if (match_layout_qualifier($1, "shared", state) == 0) {
>>>              $$.flags.q.shared = 1;
>>> +         } else if (match_layout_qualifier($1, "std430", state) == 0) {
>>> +            $$.flags.q.std430 = 1;
>>>           } else if (match_layout_qualifier($1, "column_major", state) == 0) {
>>>              $$.flags.q.column_major = 1;
>>>           /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed
>>> diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
>>> index 755618a..d29769a 100644
>>> --- a/src/glsl/glsl_types.cpp
>>> +++ b/src/glsl/glsl_types.cpp
>>> @@ -1357,6 +1357,123 @@ glsl_type::std140_size(bool row_major) const
>>>     return -1;
>>>  }
>>>  
>>> +unsigned
>>> +glsl_type::std430_base_alignment(bool row_major) const
>>> +{
>>> +   unsigned base_alignment = 0;
>>> +   /* When using the "std430" storage layout, shader storage
>>> +    * blocks will be laid out in buffer storage identically to uniform and
>>> +    * shader storage blocks using the "std140" layout, except that the base
>>> +    * alignment of arrays of scalars and vectors in rule (4) and of structures
>>> +    * in rule (9) are not rounded up a multiple of the base alignment of a vec4.
>>> +    */
>>> +
>>> +   /* (1) If the member is a scalar consuming <N> basic machine units, the
>>> +    *     base alignment is <N>.
>>> +    *
>>> +    * (2) If the member is a two- or four-component vector with components
>>> +    *     consuming <N> basic machine units, the base alignment is 2<N> or
>>> +    *     4<N>, respectively.
>>> +    *
>>> +    * (3) If the member is a three-component vector with components consuming
>>> +    *     <N> basic machine units, the base alignment is 4<N>.
>>> +    */
>>> +   if (this->is_array()) {
>>> +      base_alignment = this->fields.array->std430_base_alignment(row_major);
>>> +   } else {
>>> +      /* For the rest of cases, use std140_base_alignment() */
>>> +      base_alignment = this->std140_base_alignment(row_major);
>>> +   }
>>> +   return base_alignment;
>>> +}
>>> +
>>> +unsigned
>>> +glsl_type::std430_size(bool row_major) const
>>
>> I think you should move these two new glsl_type functions into a
>> separate, earlier commit.
>>
> 
> OK.
> 
>> For the parse/link portions of this patch:
>> Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
>>
> 
> Thanks.
> 
>>> +{
>>> +   unsigned N = is_double() ? 8 : 4;
>>> +
>>> +   /* (1) If the member is a scalar consuming <N> basic machine units, the
>>> +    *     base alignment is <N>.
>>> +    *
>>> +    * (2) If the member is a two- or four-component vector with components
>>> +    *     consuming <N> basic machine units, the base alignment is 2<N> or
>>> +    *     4<N>, respectively.
>>> +    *
>>> +    * (3) If the member is a three-component vector with components consuming
>>> +    *     <N> basic machine units, the base alignment is 4<N>.
>>> +    */
>>> +   if (this->is_scalar() || this->is_vector()) {
>>> +      if (this->vector_elements > 2)
>>> +         return glsl_align(this->vector_elements * N, 16);
>>> +      else
>>> +         return this->vector_elements * N;
>>> +   }
>>> +
>>> +   if (this->without_array()->is_matrix()) {
>>> +      const struct glsl_type *element_type;
>>> +      const struct glsl_type *vec_type;
>>> +      unsigned int array_len;
>>> +
>>> +      if (this->is_array()) {
>>> +        element_type = this->fields.array;
>>> +        array_len = this->length;
>>> +      } else {
>>> +        element_type = this;
>>> +        array_len = 1;
>>> +      }
>>> +
>>> +      if (row_major) {
>>> +         vec_type = get_instance(element_type->base_type,
>>> +                                 element_type->matrix_columns, 1);
>>> +
>>> +        array_len *= element_type->vector_elements;
>>> +      } else {
>>> +        vec_type = get_instance(element_type->base_type,
>>> +                                element_type->vector_elements, 1);
>>> +        array_len *= element_type->matrix_columns;
>>> +      }
>>> +      const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
>>> +                                                                 array_len);
>>> +
>>> +      return array_type->std430_size(false);
>>> +   }
>>> +
>>> +   /* When using the "std430" storage layout, shader storage
>>> +    * blocks will be laid out in buffer storage identically to uniform and
>>> +    * shader storage blocks using the "std140" layout, except that the base
>>> +    * alignment of arrays of scalars and vectors in rule (4) and of structures
>>> +    * in rule (9) are not rounded up a multiple of the base alignment of a vec4.
>>> +    */
>>> +   if (this->is_array())
>>> +         return this->length * this->fields.array->std430_size(row_major);
>>
>> Hmm, you quoted the extension spec, but the 4.30 spec has: "When using
>> the std430 storage layout, shader storage blocks will be laid out in
>> buffer storage identically to uniform and shader storage blocks using
>> the std140 layout, except that the base alignment and stride of arrays
>> of scalars and vectors in rule 4 and of structures in rule 9 are not
>> rounded up a multiple of the base alignment of a vec4 ."
>>
>> Notably, it mentions stride. Wouldn't this be wrong for a vec3 array,
>> where the stride should be 12 bytes?
>>
> 
> Good catch! Both specs differ in this case. I will update the comment.
> 
> Right now, we are using a 16 bytes stride for an array of vec3s. NVIDIA
> proprietary driver (4.50 NVIDIA 352.21) does the same.
> 
> I am going to modify our code to fix this case. Thanks!
> 

Before doing any change, I have investigated which is the proper stride
value for array of vec3. From 4.30 spec:

"3. If the member is a three-component vector with components consuming
N basic machine units, the base alignment is 4N."

std430 explicitly says that the base alignment and stride of arrays of
scalars and vectors in rule 4 are not rounded up a multiple of the base
alignment of a vec4. But it doesn't mention any change to rule 3.

According to rule (3) and std430 changes to rule (4), the stride should
be 16 for an array of vec3.

Sam

>> I wonder if we'll need to employ some of idr's std140 tricks to work
>> out all the kinks in std430.
>>
>> http://www.paranormal-entertainment.com/idr/blog/posts/2014-10-08T13:28:09Z-Stochastic_Search-Based_Testing_for_Uniform_Block_Layouts/
>>
> 
> Thanks for the link! I am going to take a look at it.
> 
> Sam
> 
>> -Jordan
>>
>>> +
>>> +   if (this->is_record()) {
>>> +      unsigned size = 0;
>>> +      unsigned max_align = 0;
>>> +
>>> +      for (unsigned i = 0; i < this->length; i++) {
>>> +         bool field_row_major = row_major;
>>> +         const enum glsl_matrix_layout matrix_layout =
>>> +            glsl_matrix_layout(this->fields.structure[i].matrix_layout);
>>> +         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
>>> +            field_row_major = true;
>>> +         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
>>> +            field_row_major = false;
>>> +         }
>>> +
>>> +         const struct glsl_type *field_type = this->fields.structure[i].type;
>>> +         unsigned align = field_type->std430_base_alignment(field_row_major);
>>> +         size = glsl_align(size, align);
>>> +         size += field_type->std430_size(field_row_major);
>>> +
>>> +         max_align = MAX2(align, max_align);
>>> +      }
>>> +      size = glsl_align(size, max_align);
>>> +      return size;
>>> +   }
>>> +   /* For the rest of cases, return std140_size(). */
>>> +   return this->std140_size(row_major);
>>> +}
>>>  
>>>  unsigned
>>>  glsl_type::count_attribute_slots() const
>>> diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
>>> index e7c73da..d994eb5 100644
>>> --- a/src/glsl/glsl_types.h
>>> +++ b/src/glsl/glsl_types.h
>>> @@ -77,7 +77,8 @@ enum glsl_sampler_dim {
>>>  enum glsl_interface_packing {
>>>     GLSL_INTERFACE_PACKING_STD140,
>>>     GLSL_INTERFACE_PACKING_SHARED,
>>> -   GLSL_INTERFACE_PACKING_PACKED
>>> +   GLSL_INTERFACE_PACKING_PACKED,
>>> +   GLSL_INTERFACE_PACKING_STD430
>>>  };
>>>  
>>>  enum glsl_matrix_layout {
>>> @@ -326,6 +327,18 @@ struct glsl_type {
>>>     unsigned std140_size(bool row_major) const;
>>>  
>>>     /**
>>> +    * Alignment in bytes of the start of this type in a std430 shader
>>> +    * storage block.
>>> +    */
>>> +   unsigned std430_base_alignment(bool row_major) const;
>>> +
>>> +   /** Size in bytes of this type in a std430 shader storage block.
>>> +    *
>>> +    * Note that this is not GL_BUFFER_SIZE
>>> +    */
>>> +   unsigned std430_size(bool row_major) const;
>>> +
>>> +   /**
>>>      * \brief Can this type be implicitly converted to another?
>>>      *
>>>      * \return True if the types are identical or if this type can be converted
>>> diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
>>> index 4df39e2..c891d03 100644
>>> --- a/src/glsl/link_uniform_blocks.cpp
>>> +++ b/src/glsl/link_uniform_blocks.cpp
>>> @@ -119,8 +119,16 @@ private:
>>>           v->IndexName = v->Name;
>>>        }
>>>  
>>> -      const unsigned alignment = type->std140_base_alignment(v->RowMajor);
>>> -      unsigned size = type->std140_size(v->RowMajor);
>>> +      unsigned alignment = 0;
>>> +      unsigned size = 0;
>>> +
>>> +      if (v->Type->interface_packing == GLSL_INTERFACE_PACKING_STD430) {
>>> +         alignment = type->std430_base_alignment(v->RowMajor);
>>> +         size = type->std430_size(v->RowMajor);
>>> +      } else {
>>> +         alignment = type->std140_base_alignment(v->RowMajor);
>>> +         size = type->std140_size(v->RowMajor);
>>> +      }
>>>  
>>>        this->offset = glsl_align(this->offset, alignment);
>>>        v->Offset = this->offset;
>>> @@ -255,7 +263,8 @@ link_uniform_blocks(void *mem_ctx,
>>>                   == unsigned(ubo_packing_shared));
>>>     STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
>>>                   == unsigned(ubo_packing_packed));
>>> -
>>> +   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
>>> +                 == unsigned(ubo_packing_std430));
>>>  
>>>     hash_table_foreach (block_hash, entry) {
>>>        const struct link_uniform_block_active *const b =
>>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>>> index a252bf4..544950a 100644
>>> --- a/src/mesa/main/mtypes.h
>>> +++ b/src/mesa/main/mtypes.h
>>> @@ -2657,7 +2657,8 @@ enum gl_uniform_block_packing
>>>  {
>>>     ubo_packing_std140,
>>>     ubo_packing_shared,
>>> -   ubo_packing_packed
>>> +   ubo_packing_packed,
>>> +   ubo_packing_std430
>>>  };
>>>  
>>>  
>>> -- 
>>> 1.9.1
>>>
>>