[Mesa-dev] [PATCH 6/8] glsl: use ARB_enhahnced_layouts for packing where possible

Timothy Arceri tarceri at itsqueeze.com
Tue Apr 18 05:52:23 UTC 2017


From: Timothy Arceri <timothy.arceri at collabora.com>

If packing doesn't cross locations we can easily make use of
ARB_enhanced_layouts to do packing rather than using the GLSL IR
lowering pass lower_packed_varyings().

Shader-db Broadwell results:

total instructions in shared programs: 12977822 -> 12977819 (-0.00%)
instructions in affected programs: 1871 -> 1868 (-0.16%)
helped: 4
HURT: 3

total cycles in shared programs: 246567288 -> 246567668 (0.00%)
cycles in affected programs: 1370386 -> 1370766 (0.03%)
helped: 592
HURT: 733
---
 src/compiler/glsl/link_varyings.cpp | 68 +++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 37297be..5902421 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1210,20 +1210,21 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
 namespace {
 
 /**
  * Data structure recording the relationship between outputs of one shader
  * stage (the "producer") and inputs of another (the "consumer").
  */
 class varying_matches
 {
 public:
    varying_matches(bool disable_varying_packing, bool xfb_enabled,
+                   bool enhanced_layouts_enabled,
                    gl_shader_stage producer_stage,
                    gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
    unsigned assign_locations(struct gl_shader_program *prog,
                              uint8_t *components,
                              uint64_t reserved_slots);
    void store_locations() const;
 
 private:
@@ -1243,20 +1244,22 @@ private:
 
    /**
     * If true, this driver has transform feedback enabled. The transform
     * feedback code requires at least some packing be done even when varying
     * packing is disabled, fortunately where transform feedback requires
     * packing it's safe to override the disabled setting. See
     * is_varying_packing_safe().
     */
    const bool xfb_enabled;
 
+   const bool enhanced_layouts_enabled;
+
    /**
     * Enum representing the order in which varyings are packed within a
     * packing class.
     *
     * Currently we pack vec4's first, then vec2's, then scalar values, then
     * vec3's.  This order ensures that the only vectors that are at risk of
     * having to be "double parked" (split between two adjacent varying slots)
     * are the vec3's.
     */
    enum packing_order_enum {
@@ -1319,24 +1322,26 @@ private:
    unsigned matches_capacity;
 
    gl_shader_stage producer_stage;
    gl_shader_stage consumer_stage;
 };
 
 } /* anonymous namespace */
 
 varying_matches::varying_matches(bool disable_varying_packing,
                                  bool xfb_enabled,
+                                 bool enhanced_layouts_enabled,
                                  gl_shader_stage producer_stage,
                                  gl_shader_stage consumer_stage)
    : disable_varying_packing(disable_varying_packing),
      xfb_enabled(xfb_enabled),
+     enhanced_layouts_enabled(enhanced_layouts_enabled),
      producer_stage(producer_stage),
      consumer_stage(consumer_stage)
 {
    /* Note: this initial capacity is rather arbitrarily chosen to be large
     * enough for many cases without wasting an unreasonable amount of space.
     * varying_matches::record() will resize the array if there are more than
     * this number of varyings.
     */
    this->matches_capacity = 8;
    this->matches = (match *)
@@ -1615,37 +1620,99 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
 }
 
 
 /**
  * Update the producer and consumer shaders to reflect the locations
  * assignments that were made by varying_matches::assign_locations().
  */
 void
 varying_matches::store_locations() const
 {
+   /* Check is location needs to be packed with lower_packed_varyings() or if
+    * we can just use ARB_enhanced_layouts packing.
+    */
+   bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
+   const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
+
    for (unsigned i = 0; i < this->num_matches; i++) {
       ir_variable *producer_var = this->matches[i].producer_var;
       ir_variable *consumer_var = this->matches[i].consumer_var;
       unsigned generic_location = this->matches[i].generic_location;
       unsigned slot = generic_location / 4;
       unsigned offset = generic_location % 4;
 
       if (producer_var) {
          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
          producer_var->data.location_frac = offset;
       }
 
       if (consumer_var) {
          assert(consumer_var->data.location == -1);
          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
          consumer_var->data.location_frac = offset;
       }
+
+      /* Find locations suitable for native packing via
+       * ARB_enhanced_layouts.
+       */
+      if (producer_var && consumer_var) {
+         if (enhanced_layouts_enabled) {
+            const glsl_type *type =
+               get_varying_type(producer_var, producer_stage);
+            if (type->is_array() || type->is_matrix() || type->is_record() ||
+                type->is_double()) {
+               unsigned comp_slots = type->component_slots() + offset;
+               unsigned slots = comp_slots / 4;
+               if (comp_slots % 4)
+                  slots += 1;
+
+               for (unsigned j = 0; j < slots; j++) {
+                  pack_loc[slot + j] = true;
+               }
+            } else if (offset + type->vector_elements > 4) {
+               pack_loc[slot] = true;
+               pack_loc[slot + 1] = true;
+            } else {
+               loc_type[slot][offset] = type;
+            }
+         }
+      }
+   }
+
+   /* Attempt to use ARB_enhanced_layouts for more efficient packing if
+    * suitable.
+    */
+   if (enhanced_layouts_enabled) {
+      for (unsigned i = 0; i < this->num_matches; i++) {
+         ir_variable *producer_var = this->matches[i].producer_var;
+         ir_variable *consumer_var = this->matches[i].consumer_var;
+         unsigned generic_location = this->matches[i].generic_location;
+         unsigned slot = generic_location / 4;
+
+         if (pack_loc[slot] || !producer_var || !consumer_var)
+            continue;
+
+         const glsl_type *type =
+            get_varying_type(producer_var, producer_stage);
+         bool type_match = true;
+         for (unsigned j = 0; j < 4; j++) {
+            if (loc_type[slot][j]) {
+               if (type->base_type != loc_type[slot][j]->base_type)
+                  type_match = false;
+            }
+         }
+
+         if (type_match) {
+            producer_var->data.explicit_location = 1;
+            consumer_var->data.explicit_location = 1;
+         }
+      }
    }
 }
 
 
 /**
  * Compute the "packing class" of the given varying.  This is an unsigned
  * integer with the property that two variables in the same packing class can
  * be safely backed into the same vec4.
  */
 unsigned
@@ -2093,20 +2160,21 @@ assign_varying_locations(struct gl_context *ctx,
     * these are required by the transform feedback code and it is still safe
     * to do so. We also enable packing when a varying is only used for
     * transform feedback and its not a SSO.
     */
    bool disable_varying_packing =
       ctx->Const.DisableVaryingPacking || unpackable_tess;
    if (prog->SeparateShader && (producer == NULL || consumer == NULL))
       disable_varying_packing = true;
 
    varying_matches matches(disable_varying_packing, xfb_enabled,
+                           ctx->Extensions.ARB_enhanced_layouts,
                            producer ? producer->Stage : (gl_shader_stage)-1,
                            consumer ? consumer->Stage : (gl_shader_stage)-1);
    hash_table *tfeedback_candidates =
          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
                                  _mesa_key_string_equal);
    hash_table *consumer_inputs =
          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
                                  _mesa_key_string_equal);
    hash_table *consumer_interface_inputs =
          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
-- 
2.9.3



More information about the mesa-dev mailing list