<p dir="ltr">On Nov 9, 2016 5:53 AM, "Timothy Arceri" <<a href="mailto:timothy.arceri@collabora.com">timothy.arceri@collabora.com</a>> wrote:<br>
><br>
> On Sat, 2016-11-05 at 13:03 -0400, Ilia Mirkin wrote:<br>
> > Instead of packing varyings into vec4's, keep track of how many<br>
> > components each slot uses and create varyings with matching types.<br>
> > This<br>
> > ensures that we don't end up using more components than the orginal<br>
> > shader, which is especially important for geometry shader output<br>
> > limits.<br>
> ><br>
> > Signed-off-by: Ilia Mirkin <<a href="mailto:imirkin@alum.mit.edu">imirkin@alum.mit.edu</a>><br>
> > ---<br>
> ><br>
> > This comes up for nouveau, where the limit is 1024 output components<br>
> > for a GS,<br>
> > and the hardware complains *loudly* if you even think about going<br>
> > over.<br>
><br>
> Since this is part of the justification for the change I'd like to see<br>
> this included in the commit message. IMO this screams this change is<br>
> important much more than the second paragraph does alone.</p>
<p dir="ltr">Ok.</p>
<p dir="ltr">><br>
> ><br>
> >  src/compiler/glsl/ir_optimization.h         |  4 +++-<br>
> >  src/compiler/glsl/link_varyings.cpp         | 16 +++++++++++++---<br>
> >  src/compiler/glsl/lower_packed_varyings.cpp | 21 ++++++++++++++++---<br>
> > --<br>
> >  3 files changed, 32 insertions(+), 9 deletions(-)<br>
> ><br>
> > diff --git a/src/compiler/glsl/ir_optimization.h<br>
> > b/src/compiler/glsl/ir_optimization.h<br>
> > index 6f2bc32..8cee418 100644<br>
> > --- a/src/compiler/glsl/ir_optimization.h<br>
> > +++ b/src/compiler/glsl/ir_optimization.h<br>
> > @@ -136,7 +136,9 @@ void lower_shared_reference(struct<br>
> > gl_linked_shader *shader,<br>
> >  void lower_ubo_reference(struct gl_linked_shader *shader,<br>
> >                           bool clamp_block_indices);<br>
> >  void lower_packed_varyings(void *mem_ctx,<br>
> > -                           unsigned locations_used, ir_variable_mode<br>
> > mode,<br>
> > +                           unsigned locations_used,<br>
> > +                           const uint8_t *components,<br>
> > +                           ir_variable_mode mode,<br>
> >                             unsigned gs_input_vertices,<br>
> >                             gl_linked_shader *shader,<br>
> >                             bool disable_varying_packing, bool<br>
> > xfb_enabled);<br>
> > diff --git a/src/compiler/glsl/link_varyings.cpp<br>
> > b/src/compiler/glsl/link_varyings.cpp<br>
> > index 49843cc..a1dd133 100644<br>
> > --- a/src/compiler/glsl/link_varyings.cpp<br>
> > +++ b/src/compiler/glsl/link_varyings.cpp<br>
> > @@ -1217,6 +1217,7 @@ public:<br>
> >     ~varying_matches();<br>
> >     void record(ir_variable *producer_var, ir_variable<br>
> > *consumer_var);<br>
> >     unsigned assign_locations(struct gl_shader_program *prog,<br>
> > +                             uint8_t *components,<br>
> >                               uint64_t reserved_slots);<br>
> >     void store_locations() const;<br>
> >  <br>
> > @@ -1482,6 +1483,7 @@ varying_matches::record(ir_variable<br>
> > *producer_var, ir_variable *consumer_var)<br>
> >   */<br>
> >  unsigned<br>
> >  varying_matches::assign_locations(struct gl_shader_program *prog,<br>
> > +                                  uint8_t *components,<br>
> >                                    uint64_t reserved_slots)<br>
> >  {<br>
> >     /* If packing has been disabled then we cannot safely sort the<br>
> > varyings by<br>
> > @@ -1585,6 +1587,12 @@ varying_matches::assign_locations(struct<br>
> > gl_shader_program *prog,<br>
> >                        var->name);<br>
> >        }<br>
> >  <br>
> > +      if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {<br>
> > +         for (unsigned j = *location / 4u; j < slot_end / 4u; j++)<br>
> > +            components[j] = 4;<br>
> > +         components[slot_end / 4u] = (slot_end & 3) + 1;<br>
><br>
> Everything else in the patch looks good but doesn't this make the<br>
> component size one component to big?</p>
<p dir="ltr">The way I'm setting things up, components has values between 1..4. slot_end & 3 would be 0..3.</p>
<p dir="ltr">Remember that slot_end is inclusive, too.</p>
<p dir="ltr">><br>
> > +      }<br>
> > +<br>
> >        this->matches[i].generic_location = *location;<br>
> >  <br>
> >        *location = slot_end + 1;<br>
> > @@ -2203,7 +2211,9 @@ assign_varying_locations(struct gl_context<br>
> > *ctx,<br>
> >        }<br>
> >     }<br>
> >  <br>
> > -   const unsigned slots_used = matches.assign_locations(prog,<br>
> > reserved_slots);<br>
> > +   uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};<br>
> > +   const unsigned slots_used = matches.assign_locations(<br>
> > +         prog, components, reserved_slots);<br>
> >     matches.store_locations();<br>
> >  <br>
> >     for (unsigned i = 0; i < num_tfeedback_decls; ++i) {<br>
> > @@ -2263,13 +2273,13 @@ assign_varying_locations(struct gl_context<br>
> > *ctx,<br>
> >     }<br>
> >  <br>
> >     if (producer) {<br>
> > -      lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,<br>
> > +      lower_packed_varyings(mem_ctx, slots_used, components,<br>
> > ir_var_shader_out,<br>
> >                              0, producer, disable_varying_packing,<br>
> >                              xfb_enabled);<br>
> >     }<br>
> >  <br>
> >     if (consumer) {<br>
> > -      lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,<br>
> > +      lower_packed_varyings(mem_ctx, slots_used, components,<br>
> > ir_var_shader_in,<br>
> >                              consumer_vertices, consumer,<br>
> >                              disable_varying_packing, xfb_enabled);<br>
> >     }<br>
> > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > b/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > index 19bbe57..b16f25f 100644<br>
> > --- a/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > +++ b/src/compiler/glsl/lower_packed_varyings.cpp<br>
> > @@ -164,7 +164,9 @@ namespace {<br>
> >  class lower_packed_varyings_visitor<br>
> >  {<br>
> >  public:<br>
> > -   lower_packed_varyings_visitor(void *mem_ctx, unsigned<br>
> > locations_used,<br>
> > +   lower_packed_varyings_visitor(void *mem_ctx,<br>
> > +                                 unsigned locations_used,<br>
> > +                                 const uint8_t *components,<br>
> >                                   ir_variable_mode mode,<br>
> >                                   unsigned gs_input_vertices,<br>
> >                                   exec_list *out_instructions,<br>
> > @@ -203,6 +205,8 @@ private:<br>
> >      */<br>
> >     const unsigned locations_used;<br>
> >  <br>
> > +   const uint8_t* components;<br>
> > +<br>
> >     /**<br>
> >      * Array of pointers to the packed varyings that have been<br>
> > created for each<br>
> >      * generic varying slot.  NULL entries in this array indicate<br>
> > varying slots<br>
> > @@ -241,12 +245,14 @@ private:<br>
> >  } /* anonymous namespace */<br>
> >  <br>
> >  lower_packed_varyings_visitor::lower_packed_varyings_visitor(<br>
> > -      void *mem_ctx, unsigned locations_used, ir_variable_mode mode,<br>
> > +      void *mem_ctx, unsigned locations_used, const uint8_t<br>
> > *components,<br>
> > +      ir_variable_mode mode,<br>
> >        unsigned gs_input_vertices, exec_list *out_instructions,<br>
> >        exec_list *out_variables, bool disable_varying_packing,<br>
> >        bool xfb_enabled)<br>
> >     : mem_ctx(mem_ctx),<br>
> >       locations_used(locations_used),<br>
> > +     components(components),<br>
> >       packed_varyings((ir_variable **)<br>
> >                       rzalloc_array_size(mem_ctx,<br>
> > sizeof(*packed_varyings),<br>
> >                                          locations_used)),<br>
> > @@ -607,10 +613,11 @@<br>
> > lower_packed_varyings_visitor::get_packed_varying_deref(<br>
> >     if (this->packed_varyings[slot] == NULL) {<br>
> >        char *packed_name = ralloc_asprintf(this->mem_ctx,<br>
> > "packed:%s", name);<br>
> >        const glsl_type *packed_type;<br>
> > +      assert(components[slot] != 0);<br>
> >        if (unpacked_var->is_interpolation_flat())<br>
> > -         packed_type = glsl_type::ivec4_type;<br>
> > +         packed_type = glsl_type::get_instance(GLSL_TYPE_INT,<br>
> > components[slot], 1);<br>
> >        else<br>
> > -         packed_type = glsl_type::vec4_type;<br>
> > +         packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,<br>
> > components[slot], 1);<br>
> >        if (this->gs_input_vertices != 0) {<br>
> >           packed_type =<br>
> >              glsl_type::get_array_instance(packed_type,<br>
> > @@ -771,6 +778,7 @@<br>
> > lower_packed_varyings_return_splicer::visit_leave(ir_return *ret)<br>
> >  <br>
> >  void<br>
> >  lower_packed_varyings(void *mem_ctx, unsigned locations_used,<br>
> > +                      const uint8_t *components,<br>
> >                        ir_variable_mode mode, unsigned<br>
> > gs_input_vertices,<br>
> >                        gl_linked_shader *shader, bool<br>
> > disable_varying_packing,<br>
> >                        bool xfb_enabled)<br>
> > @@ -781,7 +789,10 @@ lower_packed_varyings(void *mem_ctx, unsigned<br>
> > locations_used,<br>
> >     ir_function_signature *main_func_sig<br>
> >        = main_func->matching_signature(NULL, &void_parameters,<br>
> > false);<br>
> >     exec_list new_instructions, new_variables;<br>
> > -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used,<br>
> > mode,<br>
> > +   lower_packed_varyings_visitor visitor(mem_ctx,<br>
> > +                                         locations_used,<br>
> > +                                         components,<br>
> > +                                         mode,<br>
> >                                           gs_input_vertices,<br>
> >                                           &new_instructions,<br>
> >                                           &new_variables,<br>
</p>