[Mesa-dev] [PATCH] glsl: Adds GLES 2.0 varyings packing heuristic
Vincent Lejeune
vljn at ovi.com
Fri Feb 24 11:06:23 PST 2012
v2: Adds the others packing heuristic
v3: update enum name
---
src/glsl/linker.cpp | 514 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 514 insertions(+), 0 deletions(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 707e645..3f8ef99 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1831,6 +1831,512 @@ public:
}
};
+namespace pack {
+
+/**
+ * Varyings packing can be seen as an instance of the bin packing problem.
+ * It is a NP hard problem in general.
+ *
+ * ES 2.0 specs gives (GLSL_ES_Specification_1.0.17-3.pdf, p111) an algorithm
+ * that specifies a minimum requirement for when a set of varyings must be
+ * supported.
+ * We almost follow the algorithm : the target architecture should be a
+ * 8x4 grid, we use a {number_of_available_reg}x4 grid.
+ */
+class buffer_representation
+{
+protected:
+ class delayed_location_assignment : public exec_node
+ {
+ public:
+ ir_variable *producer;
+ ir_variable *consumer;
+ unsigned location;
+ unsigned horizontal_location;
+ delayed_location_assignment(ir_variable *p, ir_variable *c)
+ : producer(p), consumer(c)
+ {
+ }
+
+ bool is_before(const delayed_location_assignment *comparee) const
+ {
+ unsigned local_size = (producer->type->is_array())?producer->type->length:1;
+ unsigned comparee_size = (comparee->producer->type->is_array())?comparee->producer->type->length:1;
+ return local_size >= comparee_size;
+ }
+
+ void set_location_to_vars(unsigned producer_offset, unsigned consumer_offset, unsigned location, unsigned horizontal_location)
+ {
+ producer->location = producer_offset + location;
+ producer->horizontal_location = horizontal_location;
+ if (consumer) {
+ consumer->location = consumer_offset + location;
+ consumer->horizontal_location = horizontal_location;
+ }
+ }
+
+ };
+
+ void *ctx;
+ unsigned number_of_regs;
+ bool * is_occupied;
+ unsigned occupied_space_in_column[4];
+ unsigned producer_offset, consumer_offset;
+ exec_list *stored_vars[8];
+
+
+#define ARGMAX(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(i):(j)
+#define ARGMIN(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(j):(i)
+
+ /**
+ * Order index from most occupied column index to least occupied column index
+ */
+ void order(unsigned reordered[4]) const
+ {
+ unsigned mx1 = ARGMAX(0, 1), mx2 = ARGMAX(2, 3);
+ unsigned mn1 = ARGMIN(0, 1), mn2 = ARGMIN(2, 3);
+
+ reordered[0] = ARGMAX(mx1, mx2);
+ unsigned semi_max = ARGMIN(mx1, mx2);
+ reordered[3] = ARGMIN(mn1, mn2);
+ unsigned semi_min = ARGMAX(mn1, mn2);
+ reordered[1] = ARGMAX(semi_max, semi_min);
+ reordered[2] = ARGMIN(semi_min, semi_max);
+ }
+
+#undef ARGMAX
+#undef ARGMIN
+
+ INLINE
+ bool& is_occupied_ref(unsigned index, unsigned components)
+ {
+ assert (components < 4 && index < number_of_regs);
+ return is_occupied[4 * index + components];
+ }
+
+ bool is_range_free(unsigned reg, unsigned component_start, unsigned row_occupied, unsigned components_occupied)
+ {
+
+ if (component_start + components_occupied - 1 > 3)
+ return false;
+ if (reg + row_occupied - 1 > number_of_regs - 1)
+ return false;
+ for (unsigned j = 0; j < row_occupied; j++) {
+ for (unsigned i = 0; i < components_occupied; i++) {
+ if (is_occupied_ref(reg + j, component_start + i))
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void mark_as_occupied(unsigned location, unsigned horizontal_location, unsigned size, unsigned components_occupied)
+ {
+ for (unsigned j = 0; j < size; j++) {
+ for (unsigned i = 0; i < components_occupied; i++) {
+ is_occupied_ref(location + j, horizontal_location + i) = true;
+ occupied_space_in_column[horizontal_location + i] ++;
+ }
+ }
+ }
+
+ void insert(delayed_location_assignment *dla, const glsl_type *const type)
+ {
+ if (type->is_array()) {
+ insert(dla, type->fields.array);
+ }
+
+ // MAT4
+ if (type->is_matrix() && type->vector_elements == 4) {
+ insert_largest_first(stored_vars[1], dla);
+ return;
+ }
+
+ // MAT2
+ if (type->is_matrix() && type->vector_elements == 2) {
+ insert_largest_first(stored_vars[2], dla);
+ return;
+ }
+
+ // {I,B,}VEC4
+ if (type->is_vector() && type->vector_elements == 4) {
+ insert_largest_first(stored_vars[3], dla);
+ return;
+ }
+
+ // MAT3
+ if (type->is_matrix() && type->vector_elements == 3) {
+ insert_largest_first(stored_vars[4], dla);
+ return;
+ }
+
+ // {I,B,}VEC3
+ if (type->is_vector() && type->vector_elements == 3) {
+ insert_largest_first(stored_vars[5], dla);
+ return;
+ }
+
+ // {I,B,}VEC2
+ if (type->is_vector() && type->vector_elements == 2) {
+ insert_largest_first(stored_vars[6], dla);
+ return;
+ }
+
+ // float, int
+ if (type->is_scalar()) {
+ insert_largest_first(stored_vars[7], dla);
+ return;
+ }
+
+ assert(0 && "varying type not supported");
+ return;
+ }
+
+ void
+ insert_largest_first(exec_list *lst, delayed_location_assignment *dla)
+ {
+ if (lst->is_empty()) {
+ lst->push_tail(dla);
+ return;
+ }
+ foreach_list(node, lst) {
+ delayed_location_assignment *dla_from_lst = (class delayed_location_assignment *) node;
+ if (dla->is_before(dla_from_lst)) {
+ dla_from_lst->insert_before(dla);
+ return;
+ }
+
+ }
+ lst->push_tail(dla);
+ }
+
+ void space_occupied (const glsl_type *const type, unsigned &components, unsigned &size)
+ {
+ if (type->is_array()) {
+ space_occupied(type->fields.array, components, size);
+ size *= type->length;
+ return;
+ }
+
+ if (type == glsl_type::mat2_type) {
+ components = 4;
+ size = 2;
+ return;
+ }
+
+ components = type->vector_elements;
+ size = type->matrix_columns;
+ }
+
+ /**
+ * "For 2,3 and 4 component variables packing is started using the 1st column of the 1st row.
+ * Variables are then allocated to successive rows, aligning them to the 1st column"
+ * (ES Spec)
+ */
+ void phase1()
+ {
+ unsigned i, current_row = 0;
+ for (i = 1; i < 8; i++) {
+ foreach_list_safe (node, stored_vars[i]) {
+ delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+ unsigned components, size;
+ space_occupied(dla->producer->type, components, size);
+ if (is_range_free(current_row, 0, size, components)) {
+ dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0);
+ mark_as_occupied(current_row, 0, size, components);
+ dla->remove();
+ current_row += size;
+ } else {
+ return;
+ }
+ }
+ }
+ }
+
+ /**
+ * "For 2 component variables, when there are no spare rows, the strategy is switched to using the
+ * highest numbered row and the lowest numbered column where the variable will fit. "
+ * (ES Spec)
+ */
+ void phase2()
+ {
+ int current_row = number_of_regs - 1;
+ foreach_list_safe (node, stored_vars[6]) {
+ delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+ unsigned components, size;
+ space_occupied(dla->producer->type, components, size);
+ while (current_row >= 0) {
+ if (is_range_free(current_row, 0, size, 2)) {
+ dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0);
+ mark_as_occupied(current_row, 0, size, 2);
+ dla->remove();
+ current_row -= size;
+ break;
+ } else if (is_range_free(current_row, 2, size, 2)) {
+ dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 2);
+ mark_as_occupied(current_row, 2, size, 2);
+ dla->remove();
+ current_row -= size;
+ break;
+ } else {
+ current_row --;
+ }
+ }
+ if (current_row < 0)
+ return;
+ }
+ }
+
+ /**
+ * "[1 Component variables] are packed in order of size, largest first.
+ * Each variable is placed in the column that leaves the least
+ * amount of space in the column and aligned to the lowest available rows within that column."
+ * (ES Spec)
+ */
+ void phase3()
+ {
+ foreach_list_safe (node, stored_vars[7]) {
+ delayed_location_assignment *dla = (class delayed_location_assignment *) node;
+ unsigned components, size;
+ space_occupied(dla->producer->type, components, size);
+ unsigned reordered_index[4];
+ // If we are here, no more space in first column
+ order(reordered_index);
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned free_space = number_of_regs - occupied_space_in_column[reordered_index[i]];
+ if (size > free_space)
+ continue;
+ else {
+ // Find first free row ; empty space is ensured to be contiguous
+ unsigned first_free_row = 0;
+ while (!is_range_free(first_free_row,reordered_index[i],1,1))
+ first_free_row ++ ;
+ dla->set_location_to_vars(producer_offset, consumer_offset, first_free_row, reordered_index[i]);
+ mark_as_occupied(first_free_row, reordered_index[i], size, 1);
+ dla->remove();
+ break;
+ }
+ }
+ }
+ }
+
+ static void report_error(gl_shader_program *prog, exec_list *failing_varyings)
+ {
+ foreach_list_const (node, failing_varyings) {
+ const delayed_location_assignment *dla = (delayed_location_assignment *) node;
+ linker_error(prog, "Fails to find sufficient resources for varying %s\n",
+ dla->producer->name);
+ }
+ }
+
+public:
+ buffer_representation(unsigned p_offset, unsigned c_offset, unsigned n)
+ : ctx(ralloc_context(NULL)), number_of_regs(n), producer_offset(p_offset), consumer_offset(c_offset)
+ {
+ is_occupied = rzalloc_array(ctx, bool, 4 * number_of_regs);
+ for (unsigned i = 0; i < 8; i++) {
+ stored_vars[i] = new (ctx) exec_list();
+ }
+ memset(occupied_space_in_column, 0, sizeof(occupied_space_in_column));
+ }
+
+ ~buffer_representation()
+ {
+ ralloc_free(ctx);
+ }
+
+ void insert(ir_variable *producer, ir_variable *consumer)
+ {
+ assert(producer);
+ if (consumer)
+ assert(producer->type == consumer->type);
+
+ delayed_location_assignment *dla = new (ctx) delayed_location_assignment(producer, consumer);
+ insert(dla, producer->type);
+ }
+
+ // Note : We need prog to report errors
+ bool try_pack(gl_shader_program *prog)
+ {
+ phase1();
+ if (!stored_vars[1]->is_empty() || !stored_vars[2]->is_empty() || !stored_vars[3]->is_empty() ||
+ !stored_vars[4]->is_empty() || !stored_vars[5]->is_empty()) {
+
+ /**
+ * "For 2 component variables, when there are no spare rows, ...
+ * Packing of any further 3 or 4 component variables will fail at this point."
+ * (ES Spec)
+ */
+ report_error(prog, stored_vars[1]);
+ report_error(prog, stored_vars[2]);
+ report_error(prog, stored_vars[3]);
+ report_error(prog, stored_vars[3]);
+ report_error(prog, stored_vars[4]);
+ report_error(prog, stored_vars[5]);
+ return false;
+
+ }
+
+ phase2();
+ if (!stored_vars[6]->is_empty()) {
+ report_error(prog, stored_vars[6]);
+ return false;
+ }
+ phase3();
+ if (!stored_vars[7]->is_empty()) {
+ report_error(prog, stored_vars[7]);
+ return false;
+ }
+
+ return true;
+ }
+
+
+};
+
+void count_varyings_per_interp(exec_list *lst, unsigned &none_interp, unsigned &flat_interp,
+ unsigned &smooth_interp, unsigned &noperspective_interp)
+{
+ none_interp = 0, flat_interp = 0, smooth_interp = 0, noperspective_interp = 0;
+
+ foreach_list_const(node, lst) {
+ const varying_info *vi = (class varying_info *) node;
+ switch (vi->produced->interpolation) {
+ case INTERP_QUALIFIER_NONE:
+ none_interp ++;
+ break;
+ case INTERP_QUALIFIER_SMOOTH:
+ smooth_interp ++;
+ break;
+ case INTERP_QUALIFIER_FLAT:
+ flat_interp ++;
+ break;
+ case INTERP_QUALIFIER_NOPERSPECTIVE:
+ noperspective_interp ++;
+ break;
+ }
+ }
+}
+
+void populate_varyings_per_interp(exec_list *lst, buffer_representation &none_interp, buffer_representation &flat_interp,
+ buffer_representation &smooth_interp,buffer_representation &noperspective_interp)
+{
+ foreach_list(node, lst) {
+ varying_info *vi = (class varying_info *) node;
+ switch (vi->produced->interpolation) {
+ case INTERP_QUALIFIER_NONE:
+ none_interp.insert(vi->produced, vi->consumed);
+ break;
+ case INTERP_QUALIFIER_SMOOTH:
+ smooth_interp.insert(vi->produced, vi->consumed);
+ break;
+ case INTERP_QUALIFIER_FLAT:
+ flat_interp.insert(vi->produced, vi->consumed);
+ break;
+ case INTERP_QUALIFIER_NOPERSPECTIVE:
+ noperspective_interp.insert(vi->produced, vi->consumed);
+ break;
+ }
+ }
+}
+
+}
+
+
+
+/**
+ * These function packs varyings according to ES Spec recommandation.
+ * It will trigger "linker_error" if packing fails.
+ * This function is conservative, it does not try to pack varyings with different interpolation
+ */
+bool
+varying_pack_no_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+ // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+ unsigned none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count;
+
+ pack::count_varyings_per_interp(lst, none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count);
+
+ unsigned total = none_interp_count + smooth_interp_count + flat_interp_count + noperspective_interp_count;
+ if (!total)
+ return true;
+
+ unsigned none_regs = (max_varyings * none_interp_count ) / total;
+ unsigned flat_regs = (max_varyings * flat_interp_count ) / total;
+ unsigned smooth_regs = (max_varyings * smooth_interp_count ) / total;
+ unsigned noperspective_regs = (max_varyings * noperspective_interp_count ) / total;
+
+ pack::buffer_representation none_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, none_regs);
+ pack::buffer_representation flat_interp(VERT_RESULT_VAR0 + none_regs, FRAG_ATTRIB_VAR0 + none_regs, flat_regs);
+ pack::buffer_representation smooth_interp(VERT_RESULT_VAR0 + none_regs + flat_regs, FRAG_ATTRIB_VAR0 + none_regs + flat_regs, smooth_regs);
+ pack::buffer_representation noperspective_interp(VERT_RESULT_VAR0 + none_regs + flat_regs + smooth_regs,
+ FRAG_ATTRIB_VAR0 + none_regs + flat_regs + smooth_regs,
+ noperspective_regs);
+
+ pack::populate_varyings_per_interp(lst, none_interp, flat_interp, smooth_interp, none_interp);
+
+ if (!none_interp.try_pack(prog))
+ return false;
+ if (!flat_interp.try_pack(prog))
+ return false;
+ if (!smooth_interp.try_pack(prog))
+ return false;
+ if (!noperspective_interp.try_pack(prog))
+ return false;
+
+ return true;
+}
+
+bool
+varying_pack_smooth_and_nopersp_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+ // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+ unsigned flat_interp_count, mixed_interp_count;
+
+ pack::count_varyings_per_interp(lst, mixed_interp_count, flat_interp_count, mixed_interp_count, mixed_interp_count);
+
+ unsigned total = flat_interp_count + mixed_interp_count;
+ if (!total)
+ return true;
+
+ unsigned flat_regs = (max_varyings * flat_interp_count ) / total;
+ unsigned mixed_interp_regs = (max_varyings * mixed_interp_count ) / total;
+
+ pack::buffer_representation flat_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, flat_regs);
+ pack::buffer_representation mixed_interp(VERT_RESULT_VAR0 + flat_regs, FRAG_ATTRIB_VAR0 + flat_regs, mixed_interp_regs);
+
+ pack::populate_varyings_per_interp(lst, mixed_interp, flat_interp, mixed_interp, mixed_interp);
+
+ if (!flat_interp.try_pack(prog))
+ return false;
+ if (!mixed_interp.try_pack(prog))
+ return false;
+
+ return true;
+}
+
+bool
+varying_pack_no_constraints(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
+{
+ // Heuristic : we allocate regs for each interpolation type wrt number of variables using them.
+ unsigned total;
+
+ pack::count_varyings_per_interp(lst, total, total, total, total);
+
+ if (!total)
+ return true;
+
+ pack::buffer_representation varying_buffer(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, max_varyings);
+
+ pack::populate_varyings_per_interp(lst, varying_buffer, varying_buffer, varying_buffer, varying_buffer);
+
+ if (!varying_buffer.try_pack(prog))
+ return false;
+
+ return true;
+}
+
+// Fallback packing function
bool
varying_pack_avoid(exec_list *lst, gl_shader_program *prog, unsigned max_varyings)
{
@@ -1931,6 +2437,14 @@ assign_varying_locations(struct gl_context *ctx,
bool (* varying_pack)(exec_list *, gl_shader_program *, unsigned);
switch (ctx->ShaderCompilerOptions[0].VaryingsPackingConstraint) {
+ case PACKING_CONSTRAINT_NO_MIXED_INTERPOLATION:
+ varying_pack = varying_pack_no_mixed;
+ break;
+ case PACKING_CONSTRAINT_SMOOTH_NOPERSPECTIVE_MIXED:
+ varying_pack = varying_pack_smooth_and_nopersp_mixed;
+ break;
+ case PACKING_CONSTRAINT_NONE:
+ varying_pack = varying_pack_no_constraints;
default:
varying_pack = varying_pack_avoid;
break;
--
1.7.7
More information about the mesa-dev
mailing list