[Mesa-dev] [PATCH 6/8] glsl: use ARB_enhahnced_layouts for packing where possible

Kenneth Graunke kenneth at whitecape.org
Fri Apr 21 08:27:53 UTC 2017


On Monday, April 17, 2017 10:52:23 PM PDT Timothy Arceri wrote:
> From: Timothy Arceri <timothy.arceri at collabora.com>
> 
> If packing doesn't cross locations we can easily make use of
> ARB_enhanced_layouts to do packing rather than using the GLSL IR
> lowering pass lower_packed_varyings().
> 
> Shader-db Broadwell results:
> 
> total instructions in shared programs: 12977822 -> 12977819 (-0.00%)
> instructions in affected programs: 1871 -> 1868 (-0.16%)
> helped: 4
> HURT: 3
> 
> total cycles in shared programs: 246567288 -> 246567668 (0.00%)
> cycles in affected programs: 1370386 -> 1370766 (0.03%)
> helped: 592
> HURT: 733
> ---
>  src/compiler/glsl/link_varyings.cpp | 68 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
> 
> diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
> index 37297be..5902421 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -1210,20 +1210,21 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
>  namespace {
>  
>  /**
>   * Data structure recording the relationship between outputs of one shader
>   * stage (the "producer") and inputs of another (the "consumer").
>   */
>  class varying_matches
>  {
>  public:
>     varying_matches(bool disable_varying_packing, bool xfb_enabled,
> +                   bool enhanced_layouts_enabled,
>                     gl_shader_stage producer_stage,
>                     gl_shader_stage consumer_stage);
>     ~varying_matches();
>     void record(ir_variable *producer_var, ir_variable *consumer_var);
>     unsigned assign_locations(struct gl_shader_program *prog,
>                               uint8_t *components,
>                               uint64_t reserved_slots);
>     void store_locations() const;
>  
>  private:
> @@ -1243,20 +1244,22 @@ private:
>  
>     /**
>      * If true, this driver has transform feedback enabled. The transform
>      * feedback code requires at least some packing be done even when varying
>      * packing is disabled, fortunately where transform feedback requires
>      * packing it's safe to override the disabled setting. See
>      * is_varying_packing_safe().
>      */
>     const bool xfb_enabled;
>  
> +   const bool enhanced_layouts_enabled;
> +
>     /**
>      * Enum representing the order in which varyings are packed within a
>      * packing class.
>      *
>      * Currently we pack vec4's first, then vec2's, then scalar values, then
>      * vec3's.  This order ensures that the only vectors that are at risk of
>      * having to be "double parked" (split between two adjacent varying slots)
>      * are the vec3's.
>      */
>     enum packing_order_enum {
> @@ -1319,24 +1322,26 @@ private:
>     unsigned matches_capacity;
>  
>     gl_shader_stage producer_stage;
>     gl_shader_stage consumer_stage;
>  };
>  
>  } /* anonymous namespace */
>  
>  varying_matches::varying_matches(bool disable_varying_packing,
>                                   bool xfb_enabled,
> +                                 bool enhanced_layouts_enabled,
>                                   gl_shader_stage producer_stage,
>                                   gl_shader_stage consumer_stage)
>     : disable_varying_packing(disable_varying_packing),
>       xfb_enabled(xfb_enabled),
> +     enhanced_layouts_enabled(enhanced_layouts_enabled),
>       producer_stage(producer_stage),
>       consumer_stage(consumer_stage)
>  {
>     /* Note: this initial capacity is rather arbitrarily chosen to be large
>      * enough for many cases without wasting an unreasonable amount of space.
>      * varying_matches::record() will resize the array if there are more than
>      * this number of varyings.
>      */
>     this->matches_capacity = 8;
>     this->matches = (match *)
> @@ -1615,37 +1620,99 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
>  }
>  
>  
>  /**
>   * Update the producer and consumer shaders to reflect the locations
>   * assignments that were made by varying_matches::assign_locations().
>   */
>  void
>  varying_matches::store_locations() const
>  {
> +   /* Check is location needs to be packed with lower_packed_varyings() or if
> +    * we can just use ARB_enhanced_layouts packing.
> +    */
> +   bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
> +   const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
> +
>     for (unsigned i = 0; i < this->num_matches; i++) {
>        ir_variable *producer_var = this->matches[i].producer_var;
>        ir_variable *consumer_var = this->matches[i].consumer_var;
>        unsigned generic_location = this->matches[i].generic_location;
>        unsigned slot = generic_location / 4;
>        unsigned offset = generic_location % 4;
>  
>        if (producer_var) {
>           producer_var->data.location = VARYING_SLOT_VAR0 + slot;
>           producer_var->data.location_frac = offset;
>        }
>  
>        if (consumer_var) {
>           assert(consumer_var->data.location == -1);
>           consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
>           consumer_var->data.location_frac = offset;
>        }
> +
> +      /* Find locations suitable for native packing via
> +       * ARB_enhanced_layouts.
> +       */
> +      if (producer_var && consumer_var) {
> +         if (enhanced_layouts_enabled) {
> +            const glsl_type *type =
> +               get_varying_type(producer_var, producer_stage);
> +            if (type->is_array() || type->is_matrix() || type->is_record() ||
> +                type->is_double()) {
> +               unsigned comp_slots = type->component_slots() + offset;
> +               unsigned slots = comp_slots / 4;
> +               if (comp_slots % 4)
> +                  slots += 1;
> +
> +               for (unsigned j = 0; j < slots; j++) {
> +                  pack_loc[slot + j] = true;
> +               }
> +            } else if (offset + type->vector_elements > 4) {
> +               pack_loc[slot] = true;
> +               pack_loc[slot + 1] = true;
> +            } else {
> +               loc_type[slot][offset] = type;
> +            }
> +         }
> +      }
> +   }
> +
> +   /* Attempt to use ARB_enhanced_layouts for more efficient packing if
> +    * suitable.
> +    */
> +   if (enhanced_layouts_enabled) {
> +      for (unsigned i = 0; i < this->num_matches; i++) {
> +         ir_variable *producer_var = this->matches[i].producer_var;
> +         ir_variable *consumer_var = this->matches[i].consumer_var;
> +         unsigned generic_location = this->matches[i].generic_location;
> +         unsigned slot = generic_location / 4;
> +
> +         if (pack_loc[slot] || !producer_var || !consumer_var)
> +            continue;
> +
> +         const glsl_type *type =
> +            get_varying_type(producer_var, producer_stage);
> +         bool type_match = true;
> +         for (unsigned j = 0; j < 4; j++) {
> +            if (loc_type[slot][j]) {
> +               if (type->base_type != loc_type[slot][j]->base_type)
> +                  type_match = false;
> +            }
> +         }
> +
> +         if (type_match) {
> +            producer_var->data.explicit_location = 1;
> +            consumer_var->data.explicit_location = 1;

You probably want to set

            producer_var->data.explicit_component = 1;
            consumer_var->data.explicit_component = 1;

as well.  It doesn't seem to affect code generation, but it does make
ir_print_visitor actually print the component info.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170421/88fb4c3b/attachment.sig>


More information about the mesa-dev mailing list