[Mesa-dev] [PATCH] glsl: disable varying packing when its not safe

Timothy Arceri timothy.arceri at collabora.com
Thu Jan 28 21:52:54 PST 2016


In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation
qualifiers will match between stages so we cannot safely pack
varyings using the current packing pass in Mesa.

We also disable packing on outward facing SSO as these too are outside
the rule that guarantees the interpolation qualifiers match.

We do however enable packing on individual arrays, structs, and
matrices as these are required by the transform feedback code and it
is still safe to do so.

Finally we also enable packing when a varying is only used for
transform feedback and its not a SSO.

This fixes all remaining rendering issues with the dEQP SSO tests,
the only issues remaining with thoses tests are to do with validation.

Note: There is still one remaining SSO bug that this patch doesn't fix.
Their is a chance that VS -> TCS will have mismatching interfaces
because we pack VS output in case its used by transform feedback but
don't pack TCS input for performance reasons. This patch will make the
situation better but doesn't fix it.

Cc: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Cc: Tapani Pälli <tapani.palli at intel.com>
---

 This patch applies on top of my ARB_enhanced_layouts component packing
 series [1].

 The GLES 3.1 and GL 4.4 changes were tested by changing the code to
 always disable packing and running the changes in Intel CI system.
 
 This patch would also make it possible for a backend to do less packing
 in GLSL IR if it wished to do so now that the transform feedback requirements
 have been isolated. 

 [1] http://patchwork.freedesktop.org/series/2101/

 src/compiler/glsl/ir.h                      |   7 +
 src/compiler/glsl/ir_optimization.h         |   2 +-
 src/compiler/glsl/link_varyings.cpp         | 199 +++++++++++++++++++++-------
 src/compiler/glsl/lower_packed_varyings.cpp |  21 ++-
 4 files changed, 174 insertions(+), 55 deletions(-)

diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 287c0cb..8cadf34 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -736,6 +736,13 @@ public:
       unsigned is_unmatched_generic_inout:1;
 
       /**
+       * Is this varying used only by transform feedback?
+       *
+       * This is used by the linker to decide if its safe to pack the varying.
+       */
+      unsigned is_xfb_only:1;
+
+      /**
        * If non-zero, then this variable may be packed along with other variables
        * into a single varying slot, so this offset should be applied when
        * accessing components.  For example, an offset of 1 means that the x
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 94658bb..1ca26cd 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -129,7 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
                            unsigned locations_used, ir_variable_mode mode,
                            gl_shader *shader, unsigned base_location,
                            bool disable_varying_packing,
-                           bool has_enhanced_layouts);
+                           bool xfb_enabled, bool has_enhanced_layouts);
 bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
 bool lower_vector_derefs(gl_shader *shader);
 void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 78eb2b6..2716723 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -41,6 +41,23 @@
 
 
 /**
+ * Packing is always safe on individual arrays, structure and matices. It is
+ * also safe if the varying is only used for transform feedback.
+ */
+static bool
+is_varying_packing_safe(const ir_variable *var,
+                        const gl_shader_stage consumer_stage,
+                        const gl_shader_stage producer_stage) {
+   if ((consumer_stage == MESA_SHADER_TESS_EVAL) ||
+       (consumer_stage == MESA_SHADER_TESS_CTRL) ||
+       (producer_stage == MESA_SHADER_TESS_CTRL)) {
+      return false;
+}
+   return var->type->is_array() || var->type->is_record() ||
+          var->type->is_matrix() || var->data.is_xfb_only;
+}
+
+/**
  * Get the varying type stripped of the outermost array if we're processing
  * a stage whose varyings are arrays indexed by a vertex number (such as
  * geometry shader inputs).
@@ -878,7 +895,7 @@ namespace {
 class varying_matches
 {
 public:
-   varying_matches(bool disable_varying_packing,
+   varying_matches(bool disable_varying_packing, bool xfb_enabled,
                    gl_shader_stage producer_stage,
                    gl_shader_stage consumer_stage);
    ~varying_matches();
@@ -892,10 +909,22 @@ private:
     * If true, this driver disables varying packing, so all varyings need to
     * be aligned on slot boundaries, and take up a number of slots equal to
     * their number of matrix columns times their array size.
+    *
+    * Packing may also be disabled because our current packing method is not
+    * safe in SSO or versions of OpenGL where interpolation qualifiers are not
+    * guaranteed to match across stages.
     */
    const bool disable_varying_packing;
 
    /**
+    * If true, this driver has transform feedback enabled. The transform
+    * feedback code requires at least some packing be done even when varying
+    * packing is disabled, fortunately the cases its required for safe its
+    * still to do so. See is_varying_packing_safe().
+    */
+   const bool xfb_enabled;
+
+   /**
     * Enum representing the order in which varyings are packed within a
     * packing class.
     *
@@ -909,11 +938,13 @@ private:
       PACKING_ORDER_VEC2,
       PACKING_ORDER_SCALAR,
       PACKING_ORDER_VEC3,
+      PACKING_ORDER_LAST,
    };
 
    static unsigned compute_packing_class(const ir_variable *var);
    static packing_order_enum compute_packing_order(const ir_variable *var);
    static int match_comparator(const void *x_generic, const void *y_generic);
+   static int xfb_comparator(const void *x_generic, const void *y_generic);
 
    /**
     * Structure recording the relationship between a single producer output
@@ -969,9 +1000,11 @@ private:
 } /* anonymous namespace */
 
 varying_matches::varying_matches(bool disable_varying_packing,
+                                 bool xfb_enabled,
                                  gl_shader_stage producer_stage,
                                  gl_shader_stage consumer_stage)
    : disable_varying_packing(disable_varying_packing),
+     xfb_enabled(xfb_enabled),
      producer_stage(producer_stage),
      consumer_stage(consumer_stage)
 {
@@ -1067,7 +1100,9 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
       = this->compute_packing_class(var);
    this->matches[this->num_matches].packing_order
       = this->compute_packing_order(var);
-   if (this->disable_varying_packing) {
+   if (this->disable_varying_packing &&
+       !(xfb_enabled &&
+         is_varying_packing_safe(var, consumer_stage, producer_stage))) {
       unsigned slots = type->count_attribute_slots(false);
       this->matches[this->num_matches].num_components = slots * 4;
    } else {
@@ -1093,37 +1128,24 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
                                   uint64_t reserved_slots,
                                   bool separate_shader)
 {
-   /* We disable varying sorting for separate shader programs for the
-    * following reasons:
-    *
-    * 1/ All programs must sort the code in the same order to guarantee the
-    *    interface matching. However varying_matches::record() will change the
-    *    interpolation qualifier of some stages.
-    *
-    * 2/ GLSL version 4.50 removes the matching constrain on the interpolation
-    *    qualifier.
-    *
-    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
-    *
-    *    "The type and presence of interpolation qualifiers of variables with
-    *    the same name declared in all linked shaders for the same cross-stage
-    *    interface must match, otherwise the link command will fail.
-    *
-    *    When comparing an output from one stage to an input of a subsequent
-    *    stage, the input and output don't match if their interpolation
-    *    qualifiers (or lack thereof) are not the same."
-    *
-    *    "It is a link-time error if, within the same stage, the interpolation
-    *    qualifiers of variables of the same name do not match."
+   /* If packing has been disbaled then we cannot safely sort the varyings as
+    * it may mean we are using a version of OpenGL where interpolation
+    * qualifiers are not guaranteed to be matching across shaders, sorting
+    * in this case could result in mismatching shader interfaces.
     */
-   if (!separate_shader) {
+   if (!this->disable_varying_packing) {
       /* Sort varying matches into an order that makes them easy to pack. */
       qsort(this->matches, this->num_matches, sizeof(*this->matches),
             &varying_matches::match_comparator);
+   } else if (xfb_enabled) {
+      /* Only sort varyings that are only used by transform feedback. */
+      qsort(this->matches, this->num_matches, sizeof(*this->matches),
+            &varying_matches::xfb_comparator);
    }
 
    unsigned generic_location = 0;
    unsigned generic_patch_location = MAX_VARYING*4;
+   bool previous_var_xfb_only = false;
 
    for (unsigned i = 0; i < this->num_matches; i++) {
       unsigned *location = &generic_location;
@@ -1147,16 +1169,30 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
       /* Advance to the next slot if this varying has a different packing
        * class than the previous one, and we're not already on a slot
        * boundary.
+       *
+       * Also advance to the next slot if packing disabled, this makes sure we
+       * don't assign varyings the same locations, this is possible because
+       * we still pack individual arrays, records and matrices even when
+       * packing is disabled. Note we don't advance to the next slot if we can
+       * pack varyings together that are only used for transform feedback.
        */
-      if (i > 0 &&
-          this->matches[i - 1].packing_class
-          != this->matches[i].packing_class) {
+      if ((this->disable_varying_packing &&
+           !(previous_var_xfb_only && var->data.is_xfb_only)) ||
+          (i > 0 && this->matches[i - 1].packing_class
+          != this->matches[i].packing_class )) {
          *location = ALIGN(*location, 4);
       }
 
+      previous_var_xfb_only = var->data.is_xfb_only;
+
       unsigned num_elements =  type->count_attribute_slots(is_vertex_input);
-      unsigned slot_end = this->disable_varying_packing ? 4 :
-         type->without_array()->vector_elements;
+      unsigned slot_end;
+      if (this->disable_varying_packing &&
+          !(xfb_enabled &&
+            is_varying_packing_safe(var, consumer_stage, producer_stage)))
+         slot_end = 4;
+      else
+         slot_end = type->without_array()->vector_elements;
       slot_end += *location - 1;
 
       /* FIXME: We could be smarter in the below code and loop back over
@@ -1180,7 +1216,9 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
          /* Increase the slot to make sure there is enough room for next
           * array element.
           */
-         if (this->disable_varying_packing)
+         if (this->disable_varying_packing &&
+             !(xfb_enabled &&
+               is_varying_packing_safe(var, consumer_stage, producer_stage)))
             slot_end += 4;
          else
             slot_end += type->without_array()->vector_elements;
@@ -1306,6 +1344,22 @@ varying_matches::match_comparator(const void *x_generic, const void *y_generic)
 
 
 /**
+ * Comparison function passed to qsort() to sort varyings used only by
+ * transform feedback when packing of other varyings is disabled.
+ */
+int
+varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
+{
+   const match *x = (const match *) x_generic;
+
+   if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
+         return match_comparator(x_generic, y_generic);
+
+   return PACKING_ORDER_LAST;
+}
+
+
+/**
  * Is the given variable a varying variable to be counted against the
  * limit in ctx->Const.MaxVarying?
  * This includes variables such as texcoords, colors and generic
@@ -1623,26 +1677,67 @@ assign_varying_locations(struct gl_context *ctx,
                          unsigned num_tfeedback_decls,
                          tfeedback_decl *tfeedback_decls)
 {
-   if (ctx->Const.DisableVaryingPacking) {
-      /* Transform feedback code assumes varyings are packed, so if the driver
-       * has disabled varying packing, make sure it does not support transform
-       * feedback.
-       */
-      assert(!ctx->Extensions.EXT_transform_feedback);
-   }
+   /* Transform feedback code assumes varying arrays are packed, so if the
+    * driver has disabled varying packing, make sure to at least enable
+    * packing required by transform feedback.
+    */
+   bool xfb_enabled = ctx->Extensions.EXT_transform_feedback;
+
+   /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is no guarantee
+    * that interpolation qualifiers will match between shaders in these
+    * versions. We also disable packing for the outer facing interfaces for
+    * SSO as they also do not have the same restriction during linking.
+    *
+    * Packing is still enabled on individual arrays, structs, and matrices as
+    * these are required by the transform feedback code and it is still safe
+    * to do so. We also enable packing when a varying is only used for
+    * transform feedback and its not a SSO.
+    *
+    * Varying packing currently only packs together varyings with matching
+    * interpolation qualifiers as the backends assume all packed components
+    * are to be processed in the same way. Therefore we cannot do packing in
+    * these versions of GL without the risk of mismatching interfaces.
+    *
+    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
+    *
+    *    "The type and presence of interpolation qualifiers of variables with
+    *    the same name declared in all linked shaders for the same cross-stage
+    *    interface must match, otherwise the link command will fail.
+    *
+    *    When comparing an output from one stage to an input of a subsequent
+    *    stage, the input and output don't match if their interpolation
+    *    qualifiers (or lack thereof) are not the same."
+    *
+    * This text was also in at least revison 7 of the 4.40 spec but is no
+    * longer in revision 9 and not in the 4.50 spec. There is also no text in
+    * the GLSL ES spec that says these qualifiers must match across stages.
+    *
+    * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec:
+    *
+    *    "The type and presence of the interpolation qualifiers and storage
+    *    qualifiers and invariant qualifiers of variables with the same name
+    *    declared in all linked shaders must match, otherwise the link command
+    *    will fail."
+    *
+    * This text is no longer in the 3.1 or 3.2 specs.
+    */
+   bool disable_varying_packing = ctx->Const.DisableVaryingPacking;
+   if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
+       (ctx->API == API_OPENGLES2 && ctx->Version >= 31) ||
+       (prog->SeparateShader && (producer == NULL || consumer == NULL)))
+      disable_varying_packing = true;
 
    /* Tessellation shaders treat inputs and outputs as shared memory and can
     * access inputs and outputs of other invocations.
     * Therefore, they can't be lowered to temps easily (and definitely not
     * efficiently).
     */
-   bool disable_varying_packing =
-      ctx->Const.DisableVaryingPacking ||
+   disable_varying_packing = disable_varying_packing ||
       (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
       (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
       (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
 
-   varying_matches matches(disable_varying_packing,
+   varying_matches matches(disable_varying_packing, xfb_enabled,
                            producer ? producer->Stage : (gl_shader_stage)-1,
                            consumer ? consumer->Stage : (gl_shader_stage)-1);
    hash_table *tfeedback_candidates
@@ -1762,8 +1857,10 @@ assign_varying_locations(struct gl_context *ctx,
          return false;
       }
 
-      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout)
+      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
+         matched_candidate->toplevel_var->data.is_xfb_only = 1;
          matches.record(matched_candidate->toplevel_var, NULL);
+      }
    }
 
    const uint64_t reserved_slots =
@@ -1863,13 +1960,19 @@ assign_varying_locations(struct gl_context *ctx,
          if (vertex_attributes > 0)
             lower_packed_varyings(mem_ctx, vertex_attributes,
                                   ir_var_shader_in, producer,
-                                  VERT_ATTRIB_GENERIC0, true,
+                                  VERT_ATTRIB_GENERIC0, true, false,
                                   ctx->Extensions.ARB_enhanced_layouts);
       }
 
+      /* TCS inputs are not packed so don't enable packing required for
+       * transform feedback if the the consumer is a TCS.
+       */
+      if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL)
+         xfb_enabled = false;
+
       lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, producer,
-                            VARYING_SLOT_VAR0,
-                            disable_varying_packing,
+                            VARYING_SLOT_VAR0, disable_varying_packing,
+                            xfb_enabled,
                             ctx->Extensions.ARB_enhanced_layouts);
    }
 
@@ -1886,13 +1989,13 @@ assign_varying_locations(struct gl_context *ctx,
          unsigned frag_outs = _mesa_bitcount_64(reserved_slots);
          if (frag_outs > 0)
             lower_packed_varyings(mem_ctx, frag_outs, ir_var_shader_out,
-                                  consumer, FRAG_RESULT_DATA0, true,
+                                  consumer, FRAG_RESULT_DATA0, true, false,
                                   ctx->Extensions.ARB_enhanced_layouts);
       }
 
       lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, consumer,
-                            VARYING_SLOT_VAR0,
-                            disable_varying_packing,
+                            VARYING_SLOT_VAR0, disable_varying_packing,
+                            xfb_enabled,
                             ctx->Extensions.ARB_enhanced_layouts);
    }
 
diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp
index d6d68fc..f570167 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -172,7 +172,7 @@ update_packed_array_type(const glsl_type *type, const glsl_type *packed_type)
 
 static bool
 needs_lowering(ir_variable *var, bool has_enhanced_layouts,
-               bool disable_varying_packing)
+               bool disable_varying_packing, bool xfb_enabled)
 {
    /* Don't lower varying with explicit location unless ARB_enhanced_layouts
     * is enabled, also don't try to pack structs with explicit location as
@@ -186,7 +186,10 @@ needs_lowering(ir_variable *var, bool has_enhanced_layouts,
    /* Don't disable packing for explicit locations when ARB_enhanced_layouts
     * is supported.
     */
-   if (disable_varying_packing && !var->data.explicit_location)
+   if (disable_varying_packing && !var->data.is_xfb_only &&
+       !var->data.explicit_location &&
+       !((var->type->is_array() || var->type->is_record() ||
+          var->type->is_matrix()) && xfb_enabled))
       return false;
 
    /* Things composed of vec4's don't need lowering everything else does. */
@@ -285,6 +288,7 @@ public:
                                  exec_list *out_variables,
                                  unsigned base_location,
                                  bool disable_varying_packing,
+                                 bool xfb_enabled,
                                  bool has_enhanced_layouts);
 
    void run(struct gl_shader *shader);
@@ -353,6 +357,7 @@ private:
    exec_list *out_variables;
 
    bool disable_varying_packing;
+   bool xfb_enabled;
    bool has_enhanced_layouts;
 };
 
@@ -362,7 +367,8 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
       void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
       bool is_outer_array_vert_idx, exec_list *out_instructions,
       exec_list *out_variables, unsigned base_location,
-      bool disable_varying_packing, bool has_enhanced_layouts)
+      bool disable_varying_packing, bool xfb_enabled,
+      bool has_enhanced_layouts)
    : mem_ctx(mem_ctx),
      base_location(base_location),
      locations_used(locations_used),
@@ -374,6 +380,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
      out_instructions(out_instructions),
      out_variables(out_variables),
      disable_varying_packing(disable_varying_packing),
+     xfb_enabled(xfb_enabled),
      has_enhanced_layouts(has_enhanced_layouts)
 {
 }
@@ -388,7 +395,8 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
 
       if (var->data.mode != this->mode ||
           var->data.location < (int) this->base_location ||
-          !needs_lowering(var, has_enhanced_layouts, disable_varying_packing))
+          !needs_lowering(var, has_enhanced_layouts, disable_varying_packing,
+                          xfb_enabled))
          continue;
 
       /* This lowering pass is only capable of packing floats and ints
@@ -1118,7 +1126,7 @@ void
 lower_packed_varyings(void *mem_ctx, unsigned locations_used,
                       ir_variable_mode mode, gl_shader *shader,
                       unsigned base_location, bool disable_varying_packing,
-                      bool has_enhanced_layouts)
+                      bool xfb_enabled, bool has_enhanced_layouts)
 {
    ir_function *main_func = shader->symbols->get_function("main");
    exec_list void_parameters;
@@ -1143,6 +1151,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
                                             &new_variables,
                                             base_location,
                                             disable_varying_packing,
+                                            xfb_enabled,
                                             has_enhanced_layouts);
       visitor.run(shader);
       if (mode == ir_var_shader_out) {
@@ -1190,7 +1199,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
 
          if (var->data.mode != mode ||
              var->data.location < (int) base_location ||
-             !needs_lowering(var, has_enhanced_layouts, true))
+             !needs_lowering(var, has_enhanced_layouts, true, false))
          continue;
 
          /* Clone the variable for program resource list before
-- 
2.5.0



More information about the mesa-dev mailing list