[Mesa-dev] [PATCH V3 1/6] glsl: order indices for samplers inside a struct array
Jason Ekstrand
jason at jlekstrand.net
Thu Sep 10 21:36:36 PDT 2015
On Thu, Sep 10, 2015 at 7:43 PM, Timothy Arceri <t_arceri at yahoo.com.au> wrote:
> On Tue, 2015-09-08 at 14:14 -0700, Jason Ekstrand wrote:
>> On Mon, Sep 7, 2015 at 3:09 PM, Timothy Arceri <t_arceri at yahoo.com.au
>> > wrote:
>> > On Mon, 2015-09-07 at 11:24 -0700, Jason Ekstrand wrote:
>> > > On Tue, Sep 1, 2015 at 7:44 PM, Timothy Arceri <
>> > > t_arceri at yahoo.com.au>
>> > > wrote:
>> > > > This allows the correct offset to be easily calculated for
>> > > > indirect
>> > > > indexing when a struct array contains multiple samplers, or any
>> > > > crazy
>> > > > nesting.
>> > > >
>> > > > The indices for the folling struct will now look like this:
>> > > > Sampler index: 0 Name: s[0].tex
>> > > > Sampler index: 1 Name: s[1].tex
>> > > > Sampler index: 2 Name: s[0].si.tex
>> > > > Sampler index: 3 Name: s[1].si.tex
>> > > > Sampler index: 4 Name: s[0].si.tex2
>> > > > Sampler index: 5 Name: s[1].si.tex2
>> > > >
>> > > > Before this change it looked like this:
>> > > > Sampler index: 0 Name: s[0].tex
>> > > > Sampler index: 3 Name: s[1].tex
>> > > > Sampler index: 1 Name: s[0].si.tex
>> > > > Sampler index: 4 Name: s[1].si.tex
>> > > > Sampler index: 2 Name: s[0].si.tex2
>> > > > Sampler index: 5 Name: s[1].si.tex2
>> > > >
>> > > > struct S_inner {
>> > > > sampler2D tex;
>> > > > sampler2D tex2;
>> > > > };
>> > > >
>> > > > struct S {
>> > > > sampler2D tex;
>> > > > S_inner si;
>> > > > };
>> > > >
>> > > > uniform S s[2];
>> > > >
>> > > > V2: rename struct array counter to have better name
>> > > > ---
>> > > > src/glsl/link_uniforms.cpp | 112
>> > > > ++++++++++++++++++++++++++++++++++++++--
>> > > > -----
>> > > > src/glsl/linker.h | 4 +-
>> > > > 2 files changed, 98 insertions(+), 18 deletions(-)
>> > > >
>> > > > diff --git a/src/glsl/link_uniforms.cpp
>> > > > b/src/glsl/link_uniforms.cpp
>> > > > index 254086d..5402c99 100644
>> > > > --- a/src/glsl/link_uniforms.cpp
>> > > > +++ b/src/glsl/link_uniforms.cpp
>> > > > @@ -28,6 +28,7 @@
>> > > > #include "glsl_symbol_table.h"
>> > > > #include "program/hash_table.h"
>> > > > #include "program.h"
>> > > > +#include "util/hash_table.h"
>> > > >
>> > > > /**
>> > > > * \file link_uniforms.cpp
>> > > > @@ -63,14 +64,17 @@ program_resource_visitor::process(const
>> > > > glsl_type
>> > > > *type, const char *name)
>> > > > assert(type->without_array()->is_record()
>> > > > || type->without_array()->is_interface());
>> > > >
>> > > > + unsigned record_array_count = 1;
>> > > > char *name_copy = ralloc_strdup(NULL, name);
>> > > > - recursion(type, &name_copy, strlen(name), false, NULL,
>> > > > false);
>> > > > + recursion(type, &name_copy, strlen(name), false, NULL,
>> > > > false,
>> > > > + record_array_count);
>> > > > ralloc_free(name_copy);
>> > > > }
>> > > >
>> > > > void
>> > > > program_resource_visitor::process(ir_variable *var)
>> > > > {
>> > > > + unsigned record_array_count = 1;
>> > > > const glsl_type *t = var->type;
>> > > > const bool row_major =
>> > > > var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
>> > > > @@ -111,7 +115,8 @@
>> > > > program_resource_visitor::process(ir_variable *var)
>> > > > * lowering is only applied to non-uniform interface
>> > > > blocks, so
>> > > > we
>> > > > * can safely pass false for row_major.
>> > > > */
>> > > > - recursion(var->type, &name, new_length, row_major,
>> > > > NULL, false);
>> > > > + recursion(var->type, &name, new_length, row_major,
>> > > > NULL, false,
>> > > > + record_array_count);
>> > > > }
>> > > > ralloc_free(name);
>> > > > } else if (var->data.from_named_ifc_block_nonarray) {
>> > > > @@ -135,19 +140,23 @@
>> > > > program_resource_visitor::process(ir_variable *var)
>> > > > * is only applied to non-uniform interface blocks, so
>> > > > we can
>> > > > safely
>> > > > * pass false for row_major.
>> > > > */
>> > > > - recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false);
>> > > > + recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false,
>> > > > + record_array_count);
>> > > > ralloc_free(name);
>> > > > } else if (t->without_array()->is_record()) {
>> > > > char *name = ralloc_strdup(NULL, var->name);
>> > > > - recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false);
>> > > > + recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false,
>> > > > + record_array_count);
>> > > > ralloc_free(name);
>> > > > } else if (t->is_interface()) {
>> > > > char *name = ralloc_strdup(NULL, var->type->name);
>> > > > - recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false);
>> > > > + recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false,
>> > > > + record_array_count);
>> > > > ralloc_free(name);
>> > > > } else if (t->is_array() && t->fields.array
>> > > > ->is_interface()) {
>> > > > char *name = ralloc_strdup(NULL, var->type->fields.array
>> > > > ->name);
>> > > > - recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false);
>> > > > + recursion(var->type, &name, strlen(name), row_major,
>> > > > NULL, false,
>> > > > + record_array_count);
>> > > > ralloc_free(name);
>> > > > } else {
>> > > > this->visit_field(t, var->name, row_major, NULL, false);
>> > > > @@ -158,7 +167,8 @@ void
>> > > > program_resource_visitor::recursion(const glsl_type *t, char
>> > > > **name,
>> > > > size_t name_length, bool
>> > > > row_major,
>> > > > const glsl_type
>> > > > *record_type,
>> > > > - bool last_field)
>> > > > + bool last_field,
>> > > > + unsigned
>> > > > record_array_count)
>> > > > {
>> > > > /* Records need to have each field processed individually.
>> > > > *
>> > > > @@ -205,7 +215,7 @@ program_resource_visitor::recursion(const
>> > > > glsl_type
>> > > > *t, char **name,
>> > > > recursion(t->fields.structure[i].type, name,
>> > > > new_length,
>> > > > field_row_major,
>> > > > record_type,
>> > > > - (i + 1) == t->length);
>> > > > + (i + 1) == t->length, record_array_count);
>> > > >
>> > > > /* Only the first leaf-field of the record gets
>> > > > called with the
>> > > > * record type pointer.
>> > > > @@ -222,6 +232,8 @@ program_resource_visitor::recursion(const
>> > > > glsl_type
>> > > > *t, char **name,
>> > > > if (record_type == NULL && t->fields.array->is_record())
>> > > > record_type = t->fields.array;
>> > > >
>> > > > + record_array_count *= t->length;
>> > >
>> > > It's possible that I'm not understanding something, but I don't
>> > > think
>> > > this recursion does what you think it does. The only time you
>> > > ever
>> > > manipulate record_array_count is to multiply it by an array
>> > > length so
>> > > the final record_array_count will be the product of the lengths
>> > > of all
>> > > arrays encountered. If you have the following structure:
>> > >
>> > > struct S {
>> > > sampler2D A[5];
>> > > sampler2D B[3];
>> > > sampler3D C[2];
>> > > };
>> > >
>> > > Then, by the time you get done with B and on to C, the
>> > > record_array_count will be 15, not 8 like is what I think you
>> > > want.
>> > > Also, nowhere in here do you predicate that on is_sampler() so if
>> > > I
>> > > replaced B with an array of floats, you'd still get 15 but I
>> > > think you
>> > > would want 5. Am I missing something?
>> >
>> > Yes you are ;)
>> >
>> > This happens within
>> >
>> > if (t->without_array()->is_record() ||
>> > t->without_array()->is_interface() ||
>> > (t->is_array() && t->fields.array->is_array()))
>> >
>> > So record_array_count (maybe could have a better name) only gets
>> > increased if
>> > the struct is an array, or AoA or the member is an AoA.
>> >
>> > In your example above record_array_count would remain at the
>> > default of 1.
>> >
>> > For something a little more complex it would end up like this:
>> >
>> > struct S [2] {
>> > sampler2D A[5]; // record_array_count == 2
>> > sampler2D B[2][3]; // record_array_count == 4
>> > sampler3D C[2]; // record_array_count == 2
>> > };
>> >
>> > It ends up back at 2 because the recursion is doing its job as we
>> > pass the
>> > value of record_array_count rather than a pointer to it in the next
>> > recusive
>> > call.
>> >
>> > There is no need to predicate on is_sampler() as record_array_count
>> > is
>> > essentially an individualised outter array dimension count for each
>> > struct
>> > member. Does that make sense?
>> >
>> > As a result AoA will also use this count to allocate there space
>> > later on
>> > although they don't really need to as we don't need to worry about
>> > these kind
>> > of offsets but its just extra code to stop it for no real gain.
>> >
>> > We are doing the count for interface too but we just don't use it
>> > for those.
>> >
>> > Does that all make sense?
>>
>> Wow... You're right. I can't read... I thought that
>> record_array_count was an in-out variable in your recursion. It
>> makes
>> much more sense now. Feel free to ignore the bogus comments.
>>
>> > > --Jason
>> > >
>> > > > +
>> > > > for (unsigned i = 0; i < t->length; i++) {
>> > > > size_t new_length = name_length;
>> > > >
>> > > > @@ -230,7 +242,7 @@ program_resource_visitor::recursion(const
>> > > > glsl_type
>> > > > *t, char **name,
>> > > >
>> > > > recursion(t->fields.array, name, new_length,
>> > > > row_major,
>> > > > record_type,
>> > > > - (i + 1) == t->length);
>> > > > + (i + 1) == t->length, record_array_count);
>> > > >
>> > > > /* Only the first leaf-field of the record gets
>> > > > called with the
>> > > > * record type pointer.
>> > > > @@ -238,6 +250,7 @@ program_resource_visitor::recursion(const
>> > > > glsl_type
>> > > > *t, char **name,
>> > > > record_type = NULL;
>> > > > }
>> > > > } else {
>> > > > + this->set_record_array_count(record_array_count);
>> > > > this->visit_field(t, *name, row_major, record_type,
>> > > > last_field);
>> > > > }
>> > > > }
>> > > > @@ -268,6 +281,11 @@
>> > > > program_resource_visitor::leave_record(const
>> > > > glsl_type *, const char *, bool)
>> > > > {
>> > > > }
>> > > >
>> > > > +void
>> > > > +program_resource_visitor::set_record_array_count(unsigned)
>> > > > +{
>> > > > +}
>> > > > +
>> > > > namespace {
>> > > >
>> > > > /**
>> > > > @@ -432,6 +450,7 @@ public:
>> > > > this->next_sampler = 0;
>> > > > this->next_image = 0;
>> > > > this->next_subroutine = 0;
>> > > > + this->record_array_count = 1;
>> > > > memset(this->targets, 0, sizeof(this->targets));
>> > > > }
>> > > >
>> > > > @@ -440,6 +459,7 @@ public:
>> > > > {
>> > > > current_var = var;
>> > > > field_counter = 0;
>> > > > + this->record_next_sampler = new string_to_uint_map;
>> > > >
>> > > > ubo_block_index = -1;
>> > > > if (var->is_in_buffer_block()) {
>> > > > @@ -493,6 +513,8 @@ public:
>> > > > process(var);
>> > > > } else
>> > > > process(var);
>> > > > + }
>> > > > + delete this->record_next_sampler;
>> > > > }
>> > > >
>> > > > int ubo_block_index;
>> > > > @@ -501,17 +523,60 @@ public:
>> > > >
>> > > > private:
>> > > > void handle_samplers(const glsl_type *base_type,
>> > > > - struct gl_uniform_storage *uniform)
>> > > > + struct gl_uniform_storage *uniform,
>> > > > const char
>> > > > *name)
>> > > > {
>> > > > if (base_type->is_sampler()) {
>> > > > - uniform->sampler[shader_type].index = this
>> > > > ->next_sampler;
>> > > > uniform->sampler[shader_type].active = true;
>> > > >
>> > > > - /* Increment the sampler by 1 for non-arrays and by
>> > > > the number
>> > > > of
>> > > > - * array elements for arrays.
>> > > > - */
>> > > > - this->next_sampler +=
>> > > > - MAX2(1, uniform->array_elements);
>> > > > + /* Handle multiple samplers inside struct arrays */
>> > > > + if (this->record_array_count > 1) {
>> > > > + unsigned inner_array_size = MAX2(1, uniform
>> > > > ->array_elements);
>> > > > + char *name_copy = ralloc_strdup(NULL, name);
>> > > > +
>> > > > + /* Remove all array subscripts from the sampler
>> > > > name */
>> > > > + char *str_start;
>> > > > + const char *str_end;
>> > > > + while((str_start = strchr(name_copy, '[')) &&
>> > > > + (str_end = strchr(name_copy, ']'))) {
>> > > > + memmove(str_start, str_end + 1, 1 +
>> > > > strlen(str_end));
>> > > > + }
>> > > > +
>> > > > + unsigned index = 0;
>> > > > + if (this->record_next_sampler->get(index,
>> > > > name_copy)) {
>> > > > + /* Set the base sampler index for the uniform,
>> > > > increment
>> > > > the
>> > > > + * next index and return as everything else has
>> > > > already
>> > > > been
>> > > > + * initialised.
>> > > > + */
Can we update this comment to something like:
In this case, we've already seen this uniform so we just use the next
sampler index recorded the last time we visited.
>> > > > + uniform->sampler[shader_type].index = index;
>> > > > + index = inner_array_size + uniform
>> > > > ->sampler[shader_type].index;
>> > > > + this->record_next_sampler->put(index,
>> > > > name_copy);
>> > > > +
>> > > > + ralloc_free(name_copy);
>> > > > + return;
>> > > > + } else {
And here, could you add something like this to the beginning of the
following comment:
We've never seen this uniform before so we need to allocate enough
indices to store it.
>> > > > + /* Nested struct arrays behave like arrays of
>> > > > arrays so we
>> > > > need
>> > > > + * to increase the index by the total number of
>> > > > elements
>> > > > of the
>> > > > + * sampler in case there is more than one
>> > > > sampler inside
>> > > > the
>> > > > + * structs. This allows the offset to be easily
>> > > > calculated
>> > > > for
>> > > > + * indirect indexing.
>> > > > + */
>> > > > + uniform->sampler[shader_type].index = this
>> > > > ->next_sampler;
>> > > > + this->next_sampler +=
>> > > > + inner_array_size * this->record_array_count;
>>
>> It's not clear to me how this is related to the fact that we're in
>> the
>> else case. Why don't we need to multiply by record_array_count in
>> both cases? It seems to me like not having seen the given string
>> shouldn't make a difference for that.
>
> Here we are basically reserving all the indices we will need for the
> sampler indices to be in order. this->next_sampler is incremented so it
> can be used for assigning indices for other samplers so we only need to
> do this once when the sampler is first seen.
>
> The if block is then just assigning the reserved indices as we recurse
> over them.
>
> It's not the prettiest code but without ordering the indices we would
> need to do some nasty offset calculations in the nir lowering code,
> this seemed like the better solution.
Ok, that makes sense. I missed the whole "We're allocating things" part.
>
>>
>> --Jason
>>
>> > > > +
>> > > > + /* Store the next index for future passes over
>> > > > the struct
>> > > > array
>> > > > + */
>> > > > + index = uniform->sampler[shader_type].index +
>> > > > inner_array_size;
>> > > > + this->record_next_sampler->put(index,
>> > > > name_copy);
>> > > > + ralloc_free(name_copy);
These three lines are common to both cases. Can we move them to after
the if statement?
With those changes,
Reviewed-by: Jason Ekstrand <jason.ekstrand at intel.com>
>> > > > + }
>> > > > + } else {
>> > > > + /* Increment the sampler by 1 for non-arrays and
>> > > > by the
>> > > > number of
>> > > > + * array elements for arrays.
>> > > > + */
>> > > > + uniform->sampler[shader_type].index = this
>> > > > ->next_sampler;
>> > > > + this->next_sampler += MAX2(1, uniform
>> > > > ->array_elements);
>> > > > + }
>> > > >
>> > > > const gl_texture_index target = base_type
>> > > > ->sampler_index();
>> > > > const unsigned shadow = base_type->sampler_shadow;
>> > > > @@ -564,6 +629,11 @@ private:
>> > > > }
>> > > > }
>> > > >
>> > > > + virtual void set_record_array_count(unsigned
>> > > > record_array_count)
>> > > > + {
>> > > > + this->record_array_count = record_array_count;
>> > > > + }
>> > > > +
>> > > > virtual void visit_field(const glsl_type *type, const char
>> > > > *name,
>> > > > bool row_major)
>> > > > {
>> > > > @@ -615,7 +685,7 @@ private:
>> > > > }
>> > > >
>> > > > /* This assigns uniform indices to sampler and image
>> > > > uniforms. */
>> > > > - handle_samplers(base_type, &this->uniforms[id]);
>> > > > + handle_samplers(base_type, &this->uniforms[id], name);
>> > > > handle_images(base_type, &this->uniforms[id]);
>> > > > handle_subroutines(base_type, &this->uniforms[id]);
>> > > >
>> > > > @@ -704,6 +774,14 @@ private:
>> > > > unsigned next_image;
>> > > > unsigned next_subroutine;
>> > > >
>> > > > + /* Stores total struct array elements including nested
>> > > > structs */
>> > > > + unsigned record_array_count;
>> > > > +
>> > > > + /* Map for temporarily storing next sampler index when
>> > > > handling
>> > > > samplers in
>> > > > + * struct arrays.
>> > > > + */
>> > > > + struct string_to_uint_map *record_next_sampler;
>> > > > +
>> > > > public:
>> > > > union gl_constant_value *values;
>> > > >
>> > > > diff --git a/src/glsl/linker.h b/src/glsl/linker.h
>> > > > index ce3dc32..40e6c73 100644
>> > > > --- a/src/glsl/linker.h
>> > > > +++ b/src/glsl/linker.h
>> > > > @@ -181,6 +181,8 @@ protected:
>> > > > virtual void leave_record(const glsl_type *type, const char
>> > > > *name,
>> > > > bool row_major);
>> > > >
>> > > > + virtual void set_record_array_count(unsigned
>> > > > record_array_count);
>> > > > +
>> > > > private:
>> > > > /**
>> > > > * \param name_length Length of the current name \b not
>> > > > including the
>> > > > @@ -191,7 +193,7 @@ private:
>> > > > */
>> > > > void recursion(const glsl_type *t, char **name, size_t
>> > > > name_length,
>> > > > bool row_major, const glsl_type
>> > > > *record_type,
>> > > > - bool last_field);
>> > > > + bool last_field, unsigned
>> > > > record_array_count);
>> > > > };
>> > > >
>> > > > void
>> > > > --
>> > > > 2.4.3
>> > > >
>> > > > _______________________________________________
>> > > > mesa-dev mailing list
>> > > > mesa-dev at lists.freedesktop.org
>> > > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list