[Mesa-dev] [PATCH] glsl: disable varying packing when its not safe

Nanley Chery nanleychery at gmail.com
Fri Jan 29 13:14:58 PST 2016


On Fri, Jan 29, 2016 at 04:52:54PM +1100, Timothy Arceri wrote:
> In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation
> qualifiers will match between stages so we cannot safely pack
> varyings using the current packing pass in Mesa.
> 
> We also disable packing on outward facing SSO as these too are outside
> the rule that guarantees the interpolation qualifiers match.
> 
> We do however enable packing on individual arrays, structs, and
> matrices as these are required by the transform feedback code and it
> is still safe to do so.
> 
> Finally we also enable packing when a varying is only used for
> transform feedback and its not a SSO.
> 
> This fixes all remaining rendering issues with the dEQP SSO tests,
> the only issues remaining with thoses tests are to do with validation.
> 
> Note: There is still one remaining SSO bug that this patch doesn't fix.
> Their is a chance that VS -> TCS will have mismatching interfaces
> because we pack VS output in case its used by transform feedback but
> don't pack TCS input for performance reasons. This patch will make the
> situation better but doesn't fix it.
> 
> Cc: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> Cc: Tapani Pälli <tapani.palli at intel.com>
> ---
> 
>  This patch applies on top of my ARB_enhanced_layouts component packing
>  series [1].
> 
>  The GLES 3.1 and GL 4.4 changes were tested by changing the code to
>  always disable packing and running the changes in Intel CI system.
>  
>  This patch would also make it possible for a backend to do less packing
>  in GLSL IR if it wished to do so now that the transform feedback requirements
>  have been isolated. 
> 
>  [1] http://patchwork.freedesktop.org/series/2101/
> 
>  src/compiler/glsl/ir.h                      |   7 +
>  src/compiler/glsl/ir_optimization.h         |   2 +-
>  src/compiler/glsl/link_varyings.cpp         | 199 +++++++++++++++++++++-------
>  src/compiler/glsl/lower_packed_varyings.cpp |  21 ++-
>  4 files changed, 174 insertions(+), 55 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
> index 287c0cb..8cadf34 100644
> --- a/src/compiler/glsl/ir.h
> +++ b/src/compiler/glsl/ir.h
> @@ -736,6 +736,13 @@ public:
>        unsigned is_unmatched_generic_inout:1;
>  
>        /**
> +       * Is this varying used only by transform feedback?
> +       *
> +       * This is used by the linker to decide if its safe to pack the varying.
> +       */
> +      unsigned is_xfb_only:1;
> +
> +      /**
>         * If non-zero, then this variable may be packed along with other variables
>         * into a single varying slot, so this offset should be applied when
>         * accessing components.  For example, an offset of 1 means that the x
> diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
> index 94658bb..1ca26cd 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -129,7 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
>                             unsigned locations_used, ir_variable_mode mode,
>                             gl_shader *shader, unsigned base_location,
>                             bool disable_varying_packing,
> -                           bool has_enhanced_layouts);
> +                           bool xfb_enabled, bool has_enhanced_layouts);
>  bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
>  bool lower_vector_derefs(gl_shader *shader);
>  void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
> diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
> index 78eb2b6..2716723 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -41,6 +41,23 @@
>  
>  
>  /**
> + * Packing is always safe on individual arrays, structure and matices. It is
> + * also safe if the varying is only used for transform feedback.
> + */
> +static bool
> +is_varying_packing_safe(const ir_variable *var,
> +                        const gl_shader_stage consumer_stage,
> +                        const gl_shader_stage producer_stage) {
> +   if ((consumer_stage == MESA_SHADER_TESS_EVAL) ||
> +       (consumer_stage == MESA_SHADER_TESS_CTRL) ||
> +       (producer_stage == MESA_SHADER_TESS_CTRL)) {
> +      return false;
> +}
> +   return var->type->is_array() || var->type->is_record() ||
> +          var->type->is_matrix() || var->data.is_xfb_only;
> +}
> +
> +/**
>   * Get the varying type stripped of the outermost array if we're processing
>   * a stage whose varyings are arrays indexed by a vertex number (such as
>   * geometry shader inputs).
> @@ -878,7 +895,7 @@ namespace {
>  class varying_matches
>  {
>  public:
> -   varying_matches(bool disable_varying_packing,
> +   varying_matches(bool disable_varying_packing, bool xfb_enabled,
>                     gl_shader_stage producer_stage,
>                     gl_shader_stage consumer_stage);
>     ~varying_matches();
> @@ -892,10 +909,22 @@ private:
>      * If true, this driver disables varying packing, so all varyings need to
>      * be aligned on slot boundaries, and take up a number of slots equal to
>      * their number of matrix columns times their array size.
> +    *
> +    * Packing may also be disabled because our current packing method is not
> +    * safe in SSO or versions of OpenGL where interpolation qualifiers are not
> +    * guaranteed to match across stages.
>      */
>     const bool disable_varying_packing;
>  
>     /**
> +    * If true, this driver has transform feedback enabled. The transform
> +    * feedback code requires at least some packing be done even when varying
> +    * packing is disabled, fortunately the cases its required for safe its
> +    * still to do so. See is_varying_packing_safe().
> +    */
> +   const bool xfb_enabled;
> +
> +   /**
>      * Enum representing the order in which varyings are packed within a
>      * packing class.
>      *
> @@ -909,11 +938,13 @@ private:
>        PACKING_ORDER_VEC2,
>        PACKING_ORDER_SCALAR,
>        PACKING_ORDER_VEC3,
> +      PACKING_ORDER_LAST,
>     };
>  
>     static unsigned compute_packing_class(const ir_variable *var);
>     static packing_order_enum compute_packing_order(const ir_variable *var);
>     static int match_comparator(const void *x_generic, const void *y_generic);
> +   static int xfb_comparator(const void *x_generic, const void *y_generic);
>  
>     /**
>      * Structure recording the relationship between a single producer output
> @@ -969,9 +1000,11 @@ private:
>  } /* anonymous namespace */
>  
>  varying_matches::varying_matches(bool disable_varying_packing,
> +                                 bool xfb_enabled,
>                                   gl_shader_stage producer_stage,
>                                   gl_shader_stage consumer_stage)
>     : disable_varying_packing(disable_varying_packing),
> +     xfb_enabled(xfb_enabled),
>       producer_stage(producer_stage),
>       consumer_stage(consumer_stage)
>  {
> @@ -1067,7 +1100,9 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
>        = this->compute_packing_class(var);
>     this->matches[this->num_matches].packing_order
>        = this->compute_packing_order(var);
> -   if (this->disable_varying_packing) {
> +   if (this->disable_varying_packing &&
> +       !(xfb_enabled &&
> +         is_varying_packing_safe(var, consumer_stage, producer_stage))) {
>        unsigned slots = type->count_attribute_slots(false);
>        this->matches[this->num_matches].num_components = slots * 4;
>     } else {
> @@ -1093,37 +1128,24 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
>                                    uint64_t reserved_slots,
>                                    bool separate_shader)
>  {
> -   /* We disable varying sorting for separate shader programs for the
> -    * following reasons:
> -    *
> -    * 1/ All programs must sort the code in the same order to guarantee the
> -    *    interface matching. However varying_matches::record() will change the
> -    *    interpolation qualifier of some stages.
> -    *
> -    * 2/ GLSL version 4.50 removes the matching constrain on the interpolation
> -    *    qualifier.
> -    *
> -    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
> -    *
> -    *    "The type and presence of interpolation qualifiers of variables with
> -    *    the same name declared in all linked shaders for the same cross-stage
> -    *    interface must match, otherwise the link command will fail.
> -    *
> -    *    When comparing an output from one stage to an input of a subsequent
> -    *    stage, the input and output don't match if their interpolation
> -    *    qualifiers (or lack thereof) are not the same."
> -    *
> -    *    "It is a link-time error if, within the same stage, the interpolation
> -    *    qualifiers of variables of the same name do not match."
> +   /* If packing has been disbaled then we cannot safely sort the varyings as
> +    * it may mean we are using a version of OpenGL where interpolation
> +    * qualifiers are not guaranteed to be matching across shaders, sorting
> +    * in this case could result in mismatching shader interfaces.
>      */
> -   if (!separate_shader) {
> +   if (!this->disable_varying_packing) {
>        /* Sort varying matches into an order that makes them easy to pack. */
>        qsort(this->matches, this->num_matches, sizeof(*this->matches),
>              &varying_matches::match_comparator);
> +   } else if (xfb_enabled) {
> +      /* Only sort varyings that are only used by transform feedback. */
> +      qsort(this->matches, this->num_matches, sizeof(*this->matches),
> +            &varying_matches::xfb_comparator);
>     }
>  
>     unsigned generic_location = 0;
>     unsigned generic_patch_location = MAX_VARYING*4;
> +   bool previous_var_xfb_only = false;
>  
>     for (unsigned i = 0; i < this->num_matches; i++) {
>        unsigned *location = &generic_location;
> @@ -1147,16 +1169,30 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
>        /* Advance to the next slot if this varying has a different packing
>         * class than the previous one, and we're not already on a slot
>         * boundary.
> +       *
> +       * Also advance to the next slot if packing disabled, this makes sure we
> +       * don't assign varyings the same locations, this is possible because
> +       * we still pack individual arrays, records and matrices even when
> +       * packing is disabled. Note we don't advance to the next slot if we can
> +       * pack varyings together that are only used for transform feedback.
>         */
> -      if (i > 0 &&
> -          this->matches[i - 1].packing_class
> -          != this->matches[i].packing_class) {
> +      if ((this->disable_varying_packing &&
> +           !(previous_var_xfb_only && var->data.is_xfb_only)) ||
> +          (i > 0 && this->matches[i - 1].packing_class
> +          != this->matches[i].packing_class )) {
>           *location = ALIGN(*location, 4);
>        }
>  
> +      previous_var_xfb_only = var->data.is_xfb_only;
> +
>        unsigned num_elements =  type->count_attribute_slots(is_vertex_input);
> -      unsigned slot_end = this->disable_varying_packing ? 4 :
> -         type->without_array()->vector_elements;
> +      unsigned slot_end;
> +      if (this->disable_varying_packing &&
> +          !(xfb_enabled &&
> +            is_varying_packing_safe(var, consumer_stage, producer_stage)))
> +         slot_end = 4;
> +      else
> +         slot_end = type->without_array()->vector_elements;
>        slot_end += *location - 1;
>  
>        /* FIXME: We could be smarter in the below code and loop back over
> @@ -1180,7 +1216,9 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
>           /* Increase the slot to make sure there is enough room for next
>            * array element.
>            */
> -         if (this->disable_varying_packing)
> +         if (this->disable_varying_packing &&
> +             !(xfb_enabled &&
> +               is_varying_packing_safe(var, consumer_stage, producer_stage)))
>              slot_end += 4;
>           else
>              slot_end += type->without_array()->vector_elements;
> @@ -1306,6 +1344,22 @@ varying_matches::match_comparator(const void *x_generic, const void *y_generic)
>  
>  
>  /**
> + * Comparison function passed to qsort() to sort varyings used only by
> + * transform feedback when packing of other varyings is disabled.
> + */
> +int
> +varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
> +{
> +   const match *x = (const match *) x_generic;
> +
> +   if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
> +         return match_comparator(x_generic, y_generic);
> +
> +   return PACKING_ORDER_LAST;
> +}
> +
> +
> +/**
>   * Is the given variable a varying variable to be counted against the
>   * limit in ctx->Const.MaxVarying?
>   * This includes variables such as texcoords, colors and generic
> @@ -1623,26 +1677,67 @@ assign_varying_locations(struct gl_context *ctx,
>                           unsigned num_tfeedback_decls,
>                           tfeedback_decl *tfeedback_decls)
>  {
> -   if (ctx->Const.DisableVaryingPacking) {
> -      /* Transform feedback code assumes varyings are packed, so if the driver
> -       * has disabled varying packing, make sure it does not support transform
> -       * feedback.
> -       */
> -      assert(!ctx->Extensions.EXT_transform_feedback);
> -   }
> +   /* Transform feedback code assumes varying arrays are packed, so if the
> +    * driver has disabled varying packing, make sure to at least enable
> +    * packing required by transform feedback.
> +    */
> +   bool xfb_enabled = ctx->Extensions.EXT_transform_feedback;

Since this functionality is not available in OpenGL ES 1-2, shouldn't
the variable be assigned to the following?

(_mesa_has_EXT_transform_feedback() || _mesa_is_gles3())

Regards,
Nanley

> +
> +   /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is no guarantee
> +    * that interpolation qualifiers will match between shaders in these
> +    * versions. We also disable packing for the outer facing interfaces for
> +    * SSO as they also do not have the same restriction during linking.
> +    *
> +    * Packing is still enabled on individual arrays, structs, and matrices as
> +    * these are required by the transform feedback code and it is still safe
> +    * to do so. We also enable packing when a varying is only used for
> +    * transform feedback and its not a SSO.
> +    *
> +    * Varying packing currently only packs together varyings with matching
> +    * interpolation qualifiers as the backends assume all packed components
> +    * are to be processed in the same way. Therefore we cannot do packing in
> +    * these versions of GL without the risk of mismatching interfaces.
> +    *
> +    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
> +    *
> +    *    "The type and presence of interpolation qualifiers of variables with
> +    *    the same name declared in all linked shaders for the same cross-stage
> +    *    interface must match, otherwise the link command will fail.
> +    *
> +    *    When comparing an output from one stage to an input of a subsequent
> +    *    stage, the input and output don't match if their interpolation
> +    *    qualifiers (or lack thereof) are not the same."
> +    *
> +    * This text was also in at least revison 7 of the 4.40 spec but is no
> +    * longer in revision 9 and not in the 4.50 spec. There is also no text in
> +    * the GLSL ES spec that says these qualifiers must match across stages.
> +    *
> +    * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec:
> +    *
> +    *    "The type and presence of the interpolation qualifiers and storage
> +    *    qualifiers and invariant qualifiers of variables with the same name
> +    *    declared in all linked shaders must match, otherwise the link command
> +    *    will fail."
> +    *
> +    * This text is no longer in the 3.1 or 3.2 specs.
> +    */
> +   bool disable_varying_packing = ctx->Const.DisableVaryingPacking;
> +   if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
> +       (ctx->API == API_OPENGLES2 && ctx->Version >= 31) ||
> +       (prog->SeparateShader && (producer == NULL || consumer == NULL)))
> +      disable_varying_packing = true;
>  
>     /* Tessellation shaders treat inputs and outputs as shared memory and can
>      * access inputs and outputs of other invocations.
>      * Therefore, they can't be lowered to temps easily (and definitely not
>      * efficiently).
>      */
> -   bool disable_varying_packing =
> -      ctx->Const.DisableVaryingPacking ||
> +   disable_varying_packing = disable_varying_packing ||
>        (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
>        (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
>        (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
>  
> -   varying_matches matches(disable_varying_packing,
> +   varying_matches matches(disable_varying_packing, xfb_enabled,
>                             producer ? producer->Stage : (gl_shader_stage)-1,
>                             consumer ? consumer->Stage : (gl_shader_stage)-1);
>     hash_table *tfeedback_candidates
> @@ -1762,8 +1857,10 @@ assign_varying_locations(struct gl_context *ctx,
>           return false;
>        }
>  
> -      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout)
> +      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
> +         matched_candidate->toplevel_var->data.is_xfb_only = 1;
>           matches.record(matched_candidate->toplevel_var, NULL);
> +      }
>     }
>  
>     const uint64_t reserved_slots =
> @@ -1863,13 +1960,19 @@ assign_varying_locations(struct gl_context *ctx,
>           if (vertex_attributes > 0)
>              lower_packed_varyings(mem_ctx, vertex_attributes,
>                                    ir_var_shader_in, producer,
> -                                  VERT_ATTRIB_GENERIC0, true,
> +                                  VERT_ATTRIB_GENERIC0, true, false,
>                                    ctx->Extensions.ARB_enhanced_layouts);
>        }
>  
> +      /* TCS inputs are not packed so don't enable packing required for
> +       * transform feedback if the the consumer is a TCS.
> +       */
> +      if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL)
> +         xfb_enabled = false;
> +
>        lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, producer,
> -                            VARYING_SLOT_VAR0,
> -                            disable_varying_packing,
> +                            VARYING_SLOT_VAR0, disable_varying_packing,
> +                            xfb_enabled,
>                              ctx->Extensions.ARB_enhanced_layouts);
>     }
>  
> @@ -1886,13 +1989,13 @@ assign_varying_locations(struct gl_context *ctx,
>           unsigned frag_outs = _mesa_bitcount_64(reserved_slots);
>           if (frag_outs > 0)
>              lower_packed_varyings(mem_ctx, frag_outs, ir_var_shader_out,
> -                                  consumer, FRAG_RESULT_DATA0, true,
> +                                  consumer, FRAG_RESULT_DATA0, true, false,
>                                    ctx->Extensions.ARB_enhanced_layouts);
>        }
>  
>        lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, consumer,
> -                            VARYING_SLOT_VAR0,
> -                            disable_varying_packing,
> +                            VARYING_SLOT_VAR0, disable_varying_packing,
> +                            xfb_enabled,
>                              ctx->Extensions.ARB_enhanced_layouts);
>     }
>  
> diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp
> index d6d68fc..f570167 100644
> --- a/src/compiler/glsl/lower_packed_varyings.cpp
> +++ b/src/compiler/glsl/lower_packed_varyings.cpp
> @@ -172,7 +172,7 @@ update_packed_array_type(const glsl_type *type, const glsl_type *packed_type)
>  
>  static bool
>  needs_lowering(ir_variable *var, bool has_enhanced_layouts,
> -               bool disable_varying_packing)
> +               bool disable_varying_packing, bool xfb_enabled)
>  {
>     /* Don't lower varying with explicit location unless ARB_enhanced_layouts
>      * is enabled, also don't try to pack structs with explicit location as
> @@ -186,7 +186,10 @@ needs_lowering(ir_variable *var, bool has_enhanced_layouts,
>     /* Don't disable packing for explicit locations when ARB_enhanced_layouts
>      * is supported.
>      */
> -   if (disable_varying_packing && !var->data.explicit_location)
> +   if (disable_varying_packing && !var->data.is_xfb_only &&
> +       !var->data.explicit_location &&
> +       !((var->type->is_array() || var->type->is_record() ||
> +          var->type->is_matrix()) && xfb_enabled))
>        return false;
>  
>     /* Things composed of vec4's don't need lowering everything else does. */
> @@ -285,6 +288,7 @@ public:
>                                   exec_list *out_variables,
>                                   unsigned base_location,
>                                   bool disable_varying_packing,
> +                                 bool xfb_enabled,
>                                   bool has_enhanced_layouts);
>  
>     void run(struct gl_shader *shader);
> @@ -353,6 +357,7 @@ private:
>     exec_list *out_variables;
>  
>     bool disable_varying_packing;
> +   bool xfb_enabled;
>     bool has_enhanced_layouts;
>  };
>  
> @@ -362,7 +367,8 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
>        void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
>        bool is_outer_array_vert_idx, exec_list *out_instructions,
>        exec_list *out_variables, unsigned base_location,
> -      bool disable_varying_packing, bool has_enhanced_layouts)
> +      bool disable_varying_packing, bool xfb_enabled,
> +      bool has_enhanced_layouts)
>     : mem_ctx(mem_ctx),
>       base_location(base_location),
>       locations_used(locations_used),
> @@ -374,6 +380,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
>       out_instructions(out_instructions),
>       out_variables(out_variables),
>       disable_varying_packing(disable_varying_packing),
> +     xfb_enabled(xfb_enabled),
>       has_enhanced_layouts(has_enhanced_layouts)
>  {
>  }
> @@ -388,7 +395,8 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
>  
>        if (var->data.mode != this->mode ||
>            var->data.location < (int) this->base_location ||
> -          !needs_lowering(var, has_enhanced_layouts, disable_varying_packing))
> +          !needs_lowering(var, has_enhanced_layouts, disable_varying_packing,
> +                          xfb_enabled))
>           continue;
>  
>        /* This lowering pass is only capable of packing floats and ints
> @@ -1118,7 +1126,7 @@ void
>  lower_packed_varyings(void *mem_ctx, unsigned locations_used,
>                        ir_variable_mode mode, gl_shader *shader,
>                        unsigned base_location, bool disable_varying_packing,
> -                      bool has_enhanced_layouts)
> +                      bool xfb_enabled, bool has_enhanced_layouts)
>  {
>     ir_function *main_func = shader->symbols->get_function("main");
>     exec_list void_parameters;
> @@ -1143,6 +1151,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
>                                              &new_variables,
>                                              base_location,
>                                              disable_varying_packing,
> +                                            xfb_enabled,
>                                              has_enhanced_layouts);
>        visitor.run(shader);
>        if (mode == ir_var_shader_out) {
> @@ -1190,7 +1199,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
>  
>           if (var->data.mode != mode ||
>               var->data.location < (int) base_location ||
> -             !needs_lowering(var, has_enhanced_layouts, true))
> +             !needs_lowering(var, has_enhanced_layouts, true, false))
>           continue;
>  
>           /* Clone the variable for program resource list before
> -- 
> 2.5.0
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list