[Mesa-dev] [PATCH 06/10] glsl: Adds GLES 2.0 varyings packing heuristic

Vincent Lejeune vljn at ovi.com
Thu Feb 23 12:12:27 PST 2012


v2: Adds the others packing heuristic
---
 src/glsl/linker.cpp |  514 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 514 insertions(+), 0 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 707e645..e9c8cd1 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1831,6 +1831,512 @@ public:
    }
 };
 
+namespace pack {
+
+/**
+ * Varyings packing can be seen as an instance of the bin packing problem.
+ * It is a NP hard problem in general.
+ *
+ * ES 2.0 specs gives (GLSL_ES_Specification_1.0.17-3.pdf, p111) an algorithm
+ * that specifies a minimum requirement for when a set of varyings must be
+ * supported.
+ * We almost follow the algorithm : the target architecture should be a
+ * 8x4 grid, we use a {number_of_available_reg}x4 grid.
+ */
+class buffer_representation
+{
+protected:
+   class delayed_location_assignment : public exec_node
+   {
+   public:
+      ir_variable *producer;
+      ir_variable *consumer;
+      unsigned location;
+      unsigned horizontal_location;
+      delayed_location_assignment(ir_variable *p, ir_variable *c)
+         : producer(p), consumer(c)
+      {
+      }
+
+      bool is_before(const delayed_location_assignment *comparee) const
+      {
+         unsigned local_size = (producer->type->is_array())?producer->type->length:1;
+         unsigned comparee_size = (comparee->producer->type->is_array())?comparee->producer->type->length:1;
+         return local_size >= comparee_size;
+      }
+
+      void set_location_to_vars(unsigned producer_offset, unsigned consumer_offset, unsigned location, unsigned horizontal_location)
+      {
+         producer->location = producer_offset + location;
+         producer->horizontal_location = horizontal_location;
+         if (consumer) {
+            consumer->location = consumer_offset + location;
+            consumer->horizontal_location = horizontal_location;
+         }
+      }
+
+   };
+   
+   void *ctx;
+   unsigned number_of_regs;
+   bool * is_occupied;
+   unsigned occupied_space_in_column[4];
+   unsigned producer_offset, consumer_offset;
+   exec_list *stored_vars[8];
+   
+   
+#define ARGMAX(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(i):(j)
+#define ARGMIN(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(j):(i)
+   
+   /**
+    * Order index from most occupied column index to least occupied column index
+    */
+   void order(unsigned reordered[4]) const
+   {
+      unsigned mx1 = ARGMAX(0, 1), mx2 = ARGMAX(2, 3);
+      unsigned mn1 = ARGMIN(0, 1), mn2 = ARGMIN(2, 3);
+
+      reordered[0] = ARGMAX(mx1, mx2);
+      unsigned semi_max = ARGMIN(mx1, mx2);
+      reordered[3] = ARGMIN(mn1, mn2);
+      unsigned semi_min = ARGMAX(mn1, mn2);
+      reordered[1] = ARGMAX(semi_max, semi_min);
+      reordered[2] = ARGMIN(semi_min, semi_max);
+   }
+   
+#undef ARGMAX
+#undef ARGMIN
+   
+   INLINE
+   bool& is_occupied_ref(unsigned index, unsigned components)
+   {
+      assert (components < 4 && index < number_of_regs);
+      return is_occupied[4 * index + components];
+   }
+   
+   bool is_range_free(unsigned reg, unsigned component_start, unsigned row_occupied, unsigned components_occupied)
+   {
+
+      if (component_start + components_occupied - 1 > 3)
+         return false;
+      if (reg + row_occupied - 1 > number_of_regs - 1)
+         return false;
+      for (unsigned j = 0; j < row_occupied; j++) {
+         for (unsigned i = 0; i < components_occupied; i++) {
+            if (is_occupied_ref(reg + j, component_start + i))
+               return false;
+         }
+      }
+      return true;
+   }
+   
+   void mark_as_occupied(unsigned location, unsigned horizontal_location, unsigned size, unsigned components_occupied)
+   {
+      for (unsigned j = 0; j < size; j++) {
+         for (unsigned i = 0; i < components_occupied; i++) {
+            is_occupied_ref(location + j, horizontal_location + i) = true;
+            occupied_space_in_column[horizontal_location + i] ++;
+         }
+      }
+   }
+   
+   void insert(delayed_location_assignment *dla, const glsl_type *const type)
+   {
+      if (type->is_array()) {
+         insert(dla, type->fields.array);
+      }
+
+      // MAT4
+      if (type->is_matrix() && type->vector_elements == 4) {
+         insert_largest_first(stored_vars[1], dla);
+         return;
+      }
+
+      // MAT2
+      if (type->is_matrix() && type->vector_elements == 2) {
+         insert_largest_first(stored_vars[2], dla);
+         return;
+      }
+
+      // {I,B,}VEC4
+      if (type->is_vector() && type->vector_elements == 4) {
+         insert_largest_first(stored_vars[3], dla);
+         return;
+      }
+
+      // MAT3
+      if (type->is_matrix() && type->vector_elements == 3) {
+         insert_largest_first(stored_vars[4], dla);
+         return;
+      }
+
+      // {I,B,}VEC3
+      if (type->is_vector() && type->vector_elements == 3) {
+         insert_largest_first(stored_vars[5], dla);
+         return;
+      }
+
+      // {I,B,}VEC2
+      if (type->is_vector() && type->vector_elements == 2) {
+         insert_largest_first(stored_vars[6], dla);
+         return;
+      }
+
+      // float, int
+      if (type->is_scalar()) {
+         insert_largest_first(stored_vars[7], dla);
+         return;
+      }
+
+      assert(0 && "varying type not supported");
+      return;
+   }
+   
+   void
+   insert_largest_first(exec_list *lst, delayed_location_assignment *dla)
+   {
+      if (lst->is_empty()) {
+         lst->push_tail(dla);
+         return;
+      }
+      foreach_list(node, lst) {
+         delayed_location_assignment *dla_from_lst = (class delayed_location_assignment *) node;
+         if (dla->is_before(dla_from_lst)) {
+            dla_from_lst->insert_before(dla);
+            return;
+         }
+
+      }
+      lst->push_tail(dla);
+   }
+   
+   void space_occupied (const glsl_type *const type, unsigned &components, unsigned &size)
+   {
+      if (type->is_array()) {
+         space_occupied(type->fields.array, components, size);
+         size *= type->length;
+         return;
+      }
+
+      if (type == glsl_type::mat2_type) {
+         components = 4;
+         size = 2;
+         return;
+      }
+
+      components = type->vector_elements;
+      size = type->matrix_columns;
+   }
+   
+   /**
+    * "For 2,3 and 4 component variables packing is started using the 1st column of the 1st row.
+    *	Variables are then allocated to successive rows, aligning them to the 1st column"
+    * (ES Spec)
+    */
+   void phase1()
+   {
+      unsigned i, current_row = 0;
+      for (i = 1; i < 8; i++) {
+         foreach_list_safe (node, stored_vars[i]) {
+            delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+            unsigned components, size;
+            space_occupied(dla->producer->type, components, size);
+            if (is_range_free(current_row, 0, size, components)) {
+               dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0);
+               mark_as_occupied(current_row, 0, size, components);
+               dla->remove();
+               current_row += size;
+            } else {
+               return;
+            }
+         }
+      }
+   }
+   
+   /**
+    * "For 2 component variables, when there are no spare rows, the strategy is switched to using the
+    * highest numbered row and the lowest numbered column where the variable will fit. "
+    * (ES Spec)
+    */
+   void phase2()
+   {
+      int current_row = number_of_regs - 1;
+      foreach_list_safe (node, stored_vars[6]) {
+         delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+         unsigned components, size;
+         space_occupied(dla->producer->type, components, size);
+         while (current_row >= 0) {
+            if (is_range_free(current_row, 0, size, 2)) {
+               dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0);
+               mark_as_occupied(current_row, 0, size, 2);
+               dla->remove();
+               current_row -= size;
+               break;
+            } else if (is_range_free(current_row, 2, size, 2)) {
+               dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 2);
+               mark_as_occupied(current_row, 2, size, 2);
+               dla->remove();
+               current_row -= size;
+               break;
+            } else {
+               current_row --;
+            }
+         }
+         if (current_row < 0)
+            return;
+      }
+   }
+   
+   /**
+    * "[1 Component variables] are packed in order of size, largest first.
+    * Each variable is placed in the column that leaves the least
+    * amount of space in the column and aligned to the lowest available rows within that column."
+    * (ES Spec)
+    */
+   void phase3()
+   {
+      foreach_list_safe (node, stored_vars[7]) {
+         delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+         unsigned components, size;
+         space_occupied(dla->producer->type, components, size);
+         unsigned reordered_index[4];
+         // If we are here, no more space in first column
+         order(reordered_index);
+         for (unsigned i = 0; i < 4; i++) {
+            unsigned free_space = number_of_regs - occupied_space_in_column[reordered_index[i]];
+            if (size > free_space)
+               continue;
+            else {
+               // Find first free row ; empty space is ensured to be contiguous
+               unsigned first_free_row = 0;
+               while (!is_range_free(first_free_row,reordered_index[i],1,1))
+                  first_free_row ++ ;
+               dla->set_location_to_vars(producer_offset, consumer_offset, first_free_row, reordered_index[i]);
+               mark_as_occupied(first_free_row, reordered_index[i], size, 1);
+               dla->remove();
+               break;
+            }
+         }
+      }
+   }
+   
+   static void report_error(gl_shader_program *prog, exec_list *failing_varyings)
+   {
+      foreach_list_const (node, failing_varyings) {
+         const delayed_location_assignment *dla = (delayed_location_assignment *) node;
+         linker_error(prog, "Fails to find sufficient resources for varying %s\n",
+                      dla->producer->name);
+      }
+   }
+   
+public:
+   buffer_representation(unsigned p_offset, unsigned c_offset, unsigned n)
+      : ctx(ralloc_context(NULL)), number_of_regs(n), producer_offset(p_offset), consumer_offset(c_offset)
+   {
+      is_occupied = rzalloc_array(ctx, bool, 4 * number_of_regs);
+      for (unsigned i = 0; i < 8; i++) {
+         stored_vars[i] = new (ctx) exec_list();
+      }
+      memset(occupied_space_in_column, 0, sizeof(occupied_space_in_column));
+   }
+   
+   ~buffer_representation()
+   {
+      ralloc_free(ctx);
+   }
+   
+   void insert(ir_variable *producer, ir_variable *consumer)
+   {
+      assert(producer);
+      if (consumer)
+         assert(producer->type == consumer->type);
+
+      delayed_location_assignment *dla = new (ctx) delayed_location_assignment(producer, consumer);
+      insert(dla, producer->type);
+   }
+   
+   // Note : We need prog to report errors
+   bool try_pack(gl_shader_program *prog)
+   {
+      phase1();
+      if (!stored_vars[1]->is_empty() || !stored_vars[2]->is_empty() || !stored_vars[3]->is_empty() ||
+          !stored_vars[4]->is_empty() || !stored_vars[5]->is_empty()) {
+
+         /**
+     * "For 2 component variables, when there are no spare rows, ...
+     * Packing of any further 3 or 4 component variables will fail at this point."
+     * (ES Spec)
+     */
+         report_error(prog, stored_vars[1]);
+         report_error(prog, stored_vars[2]);
+         report_error(prog, stored_vars[3]);
+         report_error(prog, stored_vars[3]);
+         report_error(prog, stored_vars[4]);
+         report_error(prog, stored_vars[5]);
+         return false;
+
+      }
+
+      phase2();
+      if (!stored_vars[6]->is_empty()) {
+         report_error(prog, stored_vars[6]);
+         return false;
+      }
+      phase3();
+      if (!stored_vars[7]->is_empty()) {
+         report_error(prog, stored_vars[7]);
+         return false;
+      }
+
+      return true;
+   }
+   
+   
+};
+
+void count_varyings_per_interp(exec_list *lst, unsigned &none_interp, unsigned &flat_interp,
+                               unsigned &smooth_interp, unsigned &noperspective_interp)
+{
+   none_interp = 0, flat_interp = 0, smooth_interp = 0, noperspective_interp = 0;
+
+   foreach_list_const(node, lst) {
+      const varying_info *vi = (class varying_info *) node;
+      switch (vi->produced->interpolation) {
+      case INTERP_QUALIFIER_NONE:
+         none_interp ++;
+         break;
+      case INTERP_QUALIFIER_SMOOTH:
+         smooth_interp ++;
+         break;
+      case INTERP_QUALIFIER_FLAT:
+         flat_interp ++;
+         break;
+      case INTERP_QUALIFIER_NOPERSPECTIVE:
+         noperspective_interp ++;
+         break;
+      }
+   }
+}
+
+void populate_varyings_per_interp(exec_list *lst, buffer_representation &none_interp, buffer_representation &flat_interp,
+                                  buffer_representation &smooth_interp,buffer_representation &noperspective_interp)
+{
+   foreach_list(node, lst) {
+      varying_info *vi = (class varying_info *) node;
+      switch (vi->produced->interpolation) {
+      case INTERP_QUALIFIER_NONE:
+         none_interp.insert(vi->produced, vi->consumed);
+         break;
+      case INTERP_QUALIFIER_SMOOTH:
+         smooth_interp.insert(vi->produced, vi->consumed);
+         break;
+      case INTERP_QUALIFIER_FLAT:
+         flat_interp.insert(vi->produced, vi->consumed);
+         break;
+      case INTERP_QUALIFIER_NOPERSPECTIVE:
+         noperspective_interp.insert(vi->produced, vi->consumed);
+         break;
+      }
+   }
+}
+
+}
+
+
+
+/**
+ * These function packs varyings according to ES Spec recommandation.
+ * It will trigger "linker_error" if packing fails.
+ * This function is conservative, it does not try to pack varyings with different interpolation
+ */
+bool
+varying_pack_no_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+   // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+   unsigned none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count;
+
+   pack::count_varyings_per_interp(lst, none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count);
+   
+   unsigned total = none_interp_count + smooth_interp_count + flat_interp_count + noperspective_interp_count;
+   if (!total)
+      return true;
+   
+   unsigned none_regs = (max_varyings * none_interp_count ) / total;
+   unsigned flat_regs = (max_varyings * flat_interp_count ) / total;
+   unsigned smooth_regs = (max_varyings * smooth_interp_count ) / total;
+   unsigned noperspective_regs = (max_varyings * noperspective_interp_count ) / total;
+
+   pack::buffer_representation none_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, none_regs);
+   pack::buffer_representation flat_interp(VERT_RESULT_VAR0 + none_regs, FRAG_ATTRIB_VAR0 + none_regs, flat_regs);
+   pack::buffer_representation smooth_interp(VERT_RESULT_VAR0 + none_regs + flat_regs, FRAG_ATTRIB_VAR0 + none_regs + flat_regs, smooth_regs);
+   pack::buffer_representation noperspective_interp(VERT_RESULT_VAR0 + none_regs + flat_regs + smooth_regs,
+                                                    FRAG_ATTRIB_VAR0 + none_regs + flat_regs + smooth_regs,
+                                                    noperspective_regs);
+   
+   pack::populate_varyings_per_interp(lst, none_interp, flat_interp, smooth_interp, none_interp);
+   
+   if (!none_interp.try_pack(prog))
+      return false;
+   if (!flat_interp.try_pack(prog))
+      return false;
+   if (!smooth_interp.try_pack(prog))
+      return false;
+   if (!noperspective_interp.try_pack(prog))
+      return false;
+   
+   return true;
+}
+
+bool
+varying_pack_smooth_and_nopersp_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+   // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+   unsigned flat_interp_count, mixed_interp_count;
+
+   pack::count_varyings_per_interp(lst, mixed_interp_count, flat_interp_count, mixed_interp_count, mixed_interp_count);
+
+   unsigned total = flat_interp_count + mixed_interp_count;
+   if (!total)
+      return true;
+
+   unsigned flat_regs = (max_varyings * flat_interp_count ) / total;
+   unsigned mixed_interp_regs = (max_varyings * mixed_interp_count ) / total;
+
+   pack::buffer_representation flat_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, flat_regs);
+   pack::buffer_representation mixed_interp(VERT_RESULT_VAR0 + flat_regs, FRAG_ATTRIB_VAR0 + flat_regs, mixed_interp_regs);
+
+   pack::populate_varyings_per_interp(lst, mixed_interp, flat_interp, mixed_interp, mixed_interp);
+
+   if (!flat_interp.try_pack(prog))
+      return false;
+   if (!mixed_interp.try_pack(prog))
+      return false;
+
+   return true;
+}
+
+bool
+varying_pack_no_constraints(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+   // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+   unsigned total;
+
+   pack::count_varyings_per_interp(lst, total, total, total, total);
+
+   if (!total)
+      return true;
+
+   pack::buffer_representation varying_buffer(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, max_varyings);
+
+   pack::populate_varyings_per_interp(lst, varying_buffer, varying_buffer, varying_buffer, varying_buffer);
+
+   if (!varying_buffer.try_pack(prog))
+      return false;
+
+   return true;
+}
+
+// Fallback packing function
 bool
 varying_pack_avoid(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
 {
@@ -1931,6 +2437,14 @@ assign_varying_locations(struct gl_context *ctx,
    bool (* varying_pack)(exec_list *, gl_shader_program *, unsigned);
 
    switch (ctx->ShaderCompilerOptions[0].VaryingsPackingConstraint) {
+   case NO_MIXED_INTERPOLATION:
+      varying_pack = varying_pack_no_mixed;
+      break;
+   case SMOOTH_NOPERSPECTIVE_MIXED:
+      varying_pack = varying_pack_smooth_and_nopersp_mixed;
+      break;
+   case NONE:
+      varying_pack = varying_pack_no_constraints;
    default:
       varying_pack = varying_pack_avoid;
       break;
-- 
1.7.7



More information about the mesa-dev mailing list