<p dir="ltr">On Nov 9, 2016 5:53 AM, "Timothy Arceri" <<a href="mailto:timothy.arceri@collabora.com">timothy.arceri@collabora.com</a>> wrote:<br>
><br>
> On Sat, 2016-11-05 at 13:03 -0400, Ilia Mirkin wrote:<br>
> > Instead of packing varyings into vec4's, keep track of how many<br>
> > components each slot uses and create varyings with matching types.<br>
> > This<br>
> > ensures that we don't end up using more components than the orginal<br>
> > shader, which is especially important for geometry shader output<br>
> > limits.<br>
> ><br>
> > Signed-off-by: Ilia Mirkin <<a href="mailto:imirkin@alum.mit.edu">imirkin@alum.mit.edu</a>><br>
> > ---<br>
> ><br>
> > This comes up for nouveau, where the limit is 1024 output components<br>
> > for a GS,<br>
> > and the hardware complains *loudly* if you even think about going<br>
> > over.<br>
><br>
> Since this is part of the justification for the change I'd like to see<br>
> this included in the commit message. IMO this screams this change is<br>
> important much more than the second paragraph does alone.</p>
<p dir="ltr">Ok.</p>
<p dir="ltr">><br>
> ><br>
> > src/compiler/glsl/ir_optimization.h | 4 +++-<br>
> > src/compiler/glsl/link_varyings.cpp | 16 +++++++++++++---<br>
> > src/compiler/glsl/lower_packed_varyings.cpp | 21 ++++++++++++++++---<br>
> > --<br>
> > 3 files changed, 32 insertions(+), 9 deletions(-)<br>
> ><br>
> > diff --git a/src/compiler/glsl/ir_optimization.h<br>
> > b/src/compiler/glsl/ir_optimization.h<br>
> > index 6f2bc32..8cee418 100644<br>
> > --- a/src/compiler/glsl/ir_optimization.h<br>
> > +++ b/src/compiler/glsl/ir_optimization.h<br>
> > @@ -136,7 +136,9 @@ void lower_shared_reference(struct<br>
> > gl_linked_shader *shader,<br>
> > void lower_ubo_reference(struct gl_linked_shader *shader,<br>
> > bool clamp_block_indices);<br>
> > void lower_packed_varyings(void *mem_ctx,<br>
> > - unsigned locations_used, ir_variable_mode<br>
> > mode,<br>
> > + unsigned locations_used,<br>
> > + const uint8_t *components,<br>
> > + ir_variable_mode mode,<br>
> > unsigned gs_input_vertices,<br>
> > gl_linked_shader *shader,<br>
> > bool disable_varying_packing, bool<br>
> > xfb_enabled);<br>
> > diff --git a/src/compiler/glsl/link_varyings.cpp<br>
> > b/src/compiler/glsl/link_varyings.cpp<br>
> > index 49843cc..a1dd133 100644<br>
> > --- a/src/compiler/glsl/link_varyings.cpp<br>
> > +++ b/src/compiler/glsl/link_varyings.cpp<br>
> > @@ -1217,6 +1217,7 @@ public:<br>
> > ~varying_matches();<br>
> > void record(ir_variable *producer_var, ir_variable<br>
> > *consumer_var);<br>
> > unsigned assign_locations(struct gl_shader_program *prog,<br>
> > + uint8_t *components,<br>
> > uint64_t reserved_slots);<br>
> > void store_locations() const;<br>
> > <br>
> > @@ -1482,6 +1483,7 @@ varying_matches::record(ir_variable<br>
> > *producer_var, ir_variable *consumer_var)<br>
> > */<br>
> > unsigned<br>
> > varying_matches::assign_locations(struct gl_shader_program *prog,<br>
> > + uint8_t *components,<br>
> > uint64_t reserved_slots)<br>
> > {<br>
> > /* If packing has been disabled then we cannot safely sort the<br>
> > varyings by<br>
> > @@ -1585,6 +1587,12 @@ varying_matches::assign_locations(struct<br>
> > gl_shader_program *prog,<br>
> > var->name);<br>
> > }<br>
> > <br>
> > + if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {<br>
> > + for (unsigned j = *location / 4u; j < slot_end / 4u; j++)<br>
> > + components[j] = 4;<br>
> > + components[slot_end / 4u] = (slot_end & 3) + 1;<br>
><br>
> Everything else in the patch looks good but doesn't this make the<br>
> component size one component to big?</p>
<p dir="ltr">The way I'm setting things up, components has values between 1..4. slot_end & 3 would be 0..3.</p>
<p dir="ltr">Remember that slot_end is inclusive, too.</p>
<p dir="ltr">><br>
> > + }<br>
> > +<br>
> > this->matches[i].generic_location = *location;<br>
> > <br>
> > *location = slot_end + 1;<br>
> > @@ -2203,7 +2211,9 @@ assign_varying_locations(struct gl_context<br>
> > *ctx,<br>
> > }<br>
> > }<br>
> > <br>
> > - const unsigned slots_used = matches.assign_locations(prog,<br>
> > reserved_slots);<br>
> > + uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};<br>
> > + const unsigned slots_used = matches.assign_locations(<br>
> > + prog, components, reserved_slots);<br>
> > matches.store_locations();<br>
> > <br>
> > for (unsigned i = 0; i < num_tfeedback_decls; ++i) {<br>
> > @@ -2263,13 +2273,13 @@ assign_varying_locations(struct gl_context<br>
> > *ctx,<br>
> > }<br>
> > <br>
> > if (producer) {<br>
> > - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,<br>
> > + lower_packed_varyings(mem_ctx, slots_used, components,<br>
> > ir_var_shader_out,<br>
> > 0, producer, disable_varying_packing,<br>
> > xfb_enabled);<br>
> > }<br>
> > <br>
> > if (consumer) {<br>
> > - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,<br>
> > + lower_packed_varyings(mem_ctx, slots_used, components,<br>
> > ir_var_shader_in,<br>
> > consumer_vertices, consumer,<br>
> > disable_varying_packing, xfb_enabled);<br>
> > }<br>
> > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > b/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > index 19bbe57..b16f25f 100644<br>
> > --- a/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > +++ b/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > @@ -164,7 +164,9 @@ namespace {<br>
> > class lower_packed_varyings_visitor<br>
> > {<br>
> > public:<br>
> > - lower_packed_varyings_visitor(void *mem_ctx, unsigned<br>
> > locations_used,<br>
> > + lower_packed_varyings_visitor(void *mem_ctx,<br>
> > + unsigned locations_used,<br>
> > + const uint8_t *components,<br>
> > ir_variable_mode mode,<br>
> > unsigned gs_input_vertices,<br>
> > exec_list *out_instructions,<br>
> > @@ -203,6 +205,8 @@ private:<br>
> > */<br>
> > const unsigned locations_used;<br>
> > <br>
> > + const uint8_t* components;<br>
> > +<br>
> > /**<br>
> > * Array of pointers to the packed varyings that have been<br>
> > created for each<br>
> > * generic varying slot. NULL entries in this array indicate<br>
> > varying slots<br>
> > @@ -241,12 +245,14 @@ private:<br>
> > } /* anonymous namespace */<br>
> > <br>
> > lower_packed_varyings_visitor::lower_packed_varyings_visitor(<br>
> > - void *mem_ctx, unsigned locations_used, ir_variable_mode mode,<br>
> > + void *mem_ctx, unsigned locations_used, const uint8_t<br>
> > *components,<br>
> > + ir_variable_mode mode,<br>
> > unsigned gs_input_vertices, exec_list *out_instructions,<br>
> > exec_list *out_variables, bool disable_varying_packing,<br>
> > bool xfb_enabled)<br>
> > : mem_ctx(mem_ctx),<br>
> > locations_used(locations_used),<br>
> > + components(components),<br>
> > packed_varyings((ir_variable **)<br>
> > rzalloc_array_size(mem_ctx,<br>
> > sizeof(*packed_varyings),<br>
> > locations_used)),<br>
> > @@ -607,10 +613,11 @@<br>
> > lower_packed_varyings_visitor::get_packed_varying_deref(<br>
> > if (this->packed_varyings[slot] == NULL) {<br>
> > char *packed_name = ralloc_asprintf(this->mem_ctx,<br>
> > "packed:%s", name);<br>
> > const glsl_type *packed_type;<br>
> > + assert(components[slot] != 0);<br>
> > if (unpacked_var->is_interpolation_flat())<br>
> > - packed_type = glsl_type::ivec4_type;<br>
> > + packed_type = glsl_type::get_instance(GLSL_TYPE_INT,<br>
> > components[slot], 1);<br>
> > else<br>
> > - packed_type = glsl_type::vec4_type;<br>
> > + packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,<br>
> > components[slot], 1);<br>
> > if (this->gs_input_vertices != 0) {<br>
> > packed_type =<br>
> > glsl_type::get_array_instance(packed_type,<br>
> > @@ -771,6 +778,7 @@<br>
> > lower_packed_varyings_return_splicer::visit_leave(ir_return *ret)<br>
> > <br>
> > void<br>
> > lower_packed_varyings(void *mem_ctx, unsigned locations_used,<br>
> > + const uint8_t *components,<br>
> > ir_variable_mode mode, unsigned<br>
> > gs_input_vertices,<br>
> > gl_linked_shader *shader, bool<br>
> > disable_varying_packing,<br>
> > bool xfb_enabled)<br>
> > @@ -781,7 +789,10 @@ lower_packed_varyings(void *mem_ctx, unsigned<br>
> > locations_used,<br>
> > ir_function_signature *main_func_sig<br>
> > = main_func->matching_signature(NULL, &void_parameters,<br>
> > false);<br>
> > exec_list new_instructions, new_variables;<br>
> > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used,<br>
> > mode,<br>
> > + lower_packed_varyings_visitor visitor(mem_ctx,<br>
> > + locations_used,<br>
> > + components,<br>
> > + mode,<br>
> > gs_input_vertices,<br>
> > &new_instructions,<br>
> > &new_variables,<br>
</p>