[Mesa-dev] [PATCH] nir: Add a pass for gathering transform feedback info
Jason Ekstrand
jason at jlekstrand.net
Mon Oct 8 15:58:35 UTC 2018
On Mon, Oct 8, 2018 at 10:22 AM Alejandro Piñeiro <apinheiro at igalia.com>
wrote:
> I was not able to finish trying to get ARB_gl_spirv using this pass. The
> major difference is that on ARB_gl_spirv (and afaiu on GLSL too) we are
> merging the info of all the available xfb varyings from all the stages,
> while this pass gathers info from a individual nir shader (so one
> individual stage).
>
> Having said so, while using this pass, I found some issues/questions,
> see below inline.
>
>
> On 05/10/18 16:13, Jason Ekstrand wrote:
> > This is different from the GL_ARB_spirv pass because it generates a much
> > simpler data structure that isn't tied to OpenGL and mtypes.h.
> > ---
> > src/compiler/Makefile.sources | 4 +-
> > src/compiler/nir/meson.build | 2 +
> > src/compiler/nir/nir_gather_xfb_info.c | 150 +++++++++++++++++++++++++
> > src/compiler/nir/nir_xfb_info.h | 59 ++++++++++
> > 4 files changed, 214 insertions(+), 1 deletion(-)
> > create mode 100644 src/compiler/nir/nir_gather_xfb_info.c
> > create mode 100644 src/compiler/nir/nir_xfb_info.h
> >
> > diff --git a/src/compiler/Makefile.sources
> b/src/compiler/Makefile.sources
> > index d3b06564832..46ed5e47b46 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -216,6 +216,7 @@ NIR_FILES = \
> > nir/nir_format_convert.h \
> > nir/nir_from_ssa.c \
> > nir/nir_gather_info.c \
> > + nir/nir_gather_xfb_info.c \
> > nir/nir_gs_count_vertices.c \
> > nir/nir_inline_functions.c \
> > nir/nir_instr_set.c \
> > @@ -307,7 +308,8 @@ NIR_FILES = \
> > nir/nir_validate.c \
> > nir/nir_vla.h \
> > nir/nir_worklist.c \
> > - nir/nir_worklist.h
> > + nir/nir_worklist.h \
> > + nir/nir_xfb_info.h
> >
> > SPIRV_GENERATED_FILES = \
> > spirv/spirv_info.c \
> > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> > index 090aa7a628f..b416e561eb0 100644
> > --- a/src/compiler/nir/meson.build
> > +++ b/src/compiler/nir/meson.build
> > @@ -100,6 +100,7 @@ files_libnir = files(
> > 'nir_format_convert.h',
> > 'nir_from_ssa.c',
> > 'nir_gather_info.c',
> > + 'nir_gather_xfb_info.c',
> > 'nir_gs_count_vertices.c',
> > 'nir_inline_functions.c',
> > 'nir_instr_set.c',
> > @@ -192,6 +193,7 @@ files_libnir = files(
> > 'nir_vla.h',
> > 'nir_worklist.c',
> > 'nir_worklist.h',
> > + 'nir_xfb_info.h',
> > '../spirv/GLSL.ext.AMD.h',
> > '../spirv/GLSL.std.450.h',
> > '../spirv/gl_spirv.c',
> > diff --git a/src/compiler/nir/nir_gather_xfb_info.c
> b/src/compiler/nir/nir_gather_xfb_info.c
> > new file mode 100644
> > index 00000000000..a53703bb9bf
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_gather_xfb_info.c
> > @@ -0,0 +1,150 @@
> > +/*
> > + * Copyright © 2018 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "nir_xfb_info.h"
> > +
> > +#include <util/u_math.h>
> > +
> > +static void
> > +add_var_xfb_outputs(nir_xfb_info *xfb,
> > + nir_variable *var,
> > + unsigned *location,
> > + unsigned *offset,
> > + const struct glsl_type *type)
> > +{
> > + if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
> > + unsigned length = glsl_get_length(type);
> > + const struct glsl_type *child_type = glsl_get_array_element(type);
> > + for (unsigned i = 0; i < length; i++)
> > + add_var_xfb_outputs(xfb, var, location, offset, child_type);
> > + } else if (glsl_type_is_struct(type)) {
> > + unsigned length = glsl_get_length(type);
> > + for (unsigned i = 0; i < length; i++) {
> > + const struct glsl_type *child_type =
> glsl_get_struct_field(type, i);
> > + add_var_xfb_outputs(xfb, var, location, offset, child_type);
> > + }
> > + } else {
> > + assert(var->data.xfb_buffer < NIR_MAX_XFB_BUFFERS);
> > + if (xfb->buffers_written & (1 << var->data.xfb_buffer)) {
> > + assert(xfb->strides[var->data.xfb_buffer] ==
> var->data.xfb_stride);
> > + assert(xfb->buffer_to_stream[var->data.xfb_buffer] ==
> var->data.stream);
> > + } else {
> > + xfb->buffers_written |= (1 << var->data.xfb_buffer);
> > + xfb->strides[var->data.xfb_buffer] = var->data.xfb_stride;
> > + xfb->buffer_to_stream[var->data.xfb_buffer] = var->data.stream;
> > + }
> > +
> > + assert(var->data.stream < NIR_MAX_XFB_STREAMS);
> > + xfb->streams_written |= (1 << var->data.stream);
> > +
> > + unsigned comp_slots = glsl_get_component_slots(type);
> > + unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
> > + assert(attrib_slots == glsl_count_attribute_slots(type, false));
> > +
> > + /* Ensure that we don't have, for instance, a dvec2 with a
> location_frac
> > + * of 2 which would make it crass a location boundary even though
> it
> > + * fits in a single slot. However, you can have a dvec3 which
> crosses
> > + * the slot boundary with a location_frac of 2.
> > + */
> > + assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
> attrib_slots);
> > +
> > + assert(var->data.location_frac + comp_slots <= 8);
> > + uint8_t comp_mask = ((1 << comp_slots) - 1) <<
> var->data.location_frac;
> > +
> > + assert(attrib_slots <= 2);
> > + for (unsigned s = 0; s < attrib_slots; s++) {
> > + nir_xfb_output_info *output =
> &xfb->outputs[xfb->output_count++];
> > +
> > + output->buffer = var->data.xfb_buffer;
> > + output->offset = *offset;
> > + output->location = *location;
> > + output->component_mask = (comp_mask >> (s * 4)) & 0xf;
> > +
> > + (*location)++;
> > + *offset += comp_slots * 4;
> > + }
> > + }
> > +}
> > +
> > +static int
> > +compare_xfb_output_offsets(const void *_a, const void *_b)
> > +{
> > + const nir_xfb_output_info *a = _a, *b = _b;
> > + return a->offset - b->offset;
> > +}
> > +
> > +nir_xfb_info *
> > +nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
> > +{
> > + assert(shader->info.stage == MESA_SHADER_VERTEX ||
> > + shader->info.stage == MESA_SHADER_TESS_EVAL ||
> > + shader->info.stage == MESA_SHADER_GEOMETRY);
> > +
> > + /* Compute the number of outputs we have. This is simply the number
> of
> > + * cumulative locations consumed by all the variables. If a
> location is
> > + * represented by multiple variables, then they each count
> separately in
> > + * number of outputs.
> > + */
> > + unsigned num_outputs = 0;
> > + nir_foreach_variable(var, &shader->outputs) {
> > + if (var->data.explicit_xfb_buffer ||
> > + var->data.explicit_xfb_stride) {
> > + assert(var->data.explicit_xfb_buffer &&
> > + var->data.explicit_xfb_stride &&
> > + var->data.explicit_offset);
>
> OpenGL specific: FWIW, I'm getting this assert raised really easy. It
> seems that glslangValidator adds a implicitly-generated output for
> gl_PerVertex, and although it sets the stride and buffer, it doesn't do
> the same for the offset. Will check a little glslang later, and likely
> open a issue for it.
>
Yes, there are GLSLang bugs. I believe those are getting addressed.
> + num_outputs += glsl_count_attribute_slots(var->type, false);
> > + }
> > + }
> > + if (num_outputs == 0)
> > + return NULL;
> > +
> > + nir_xfb_info *xfb = rzalloc_size(mem_ctx,
> nir_xfb_info_size(num_outputs));
> > +
> > + /* Walk the list of outputs and add them to the array */
> > + nir_foreach_variable(var, &shader->outputs) {
> > + if (var->data.explicit_xfb_buffer ||
> > + var->data.explicit_xfb_stride) {
> > + unsigned location = var->data.location;
> > + unsigned offset = var->data.offset;
> > + add_var_xfb_outputs(xfb, var, &location, &offset, var->type);
> > + }
> > + }
> > + assert(xfb->output_count == num_outputs);
> > +
> > + /* Everything is easier in the state setup code if the list is
> sorted in
> > + * order of output offset.
> > + */
> > + qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
> > + compare_xfb_output_offsets);
> > +
> > + /* Finally, do a sanity check */
> > + unsigned max_offset[NIR_MAX_XFB_BUFFERS] = { };
> > + for (unsigned i = 0; i < xfb->output_count; i++) {
> > + assert(xfb->outputs[i].offset >=
> max_offset[xfb->outputs[i].buffer]);
>
> Shouldn't this be offset <= max_offset?
>
No, this sanity check (which I should have commented better) is checking
for overlap. Since we know that the array is already sorted by offset, we
can easily check for overlap by asserting that the offsets are
monotonically increasing on a per-buffer basis.
> > + assert(xfb->outputs[i].component_mask != 0);
> > + unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
> > + max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset +
> slots * 4;
>
> Additionally, I'm getting the previous assert triggered with tests using
> doubles, so I'm guessing if we are taking into account them here.
>
That's very possible. This hasn't been tested with doubles though I did
think about them while writing it. The component_mask is supposed to be in
32-bit units so any doubles should show up as two slots per component. So,
yes, I'm sure there's a bug and we need better testing before this lands.
--Jason
> > + }
> > +
> > + return xfb;
> > +}
> > diff --git a/src/compiler/nir/nir_xfb_info.h
> b/src/compiler/nir/nir_xfb_info.h
> > new file mode 100644
> > index 00000000000..9b543df5f47
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_xfb_info.h
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Copyright © 2018 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#ifndef NIR_XFB_INFO_H
> > +#define NIR_XFB_INFO_H
> > +
> > +#include "nir.h"
> > +
> > +#define NIR_MAX_XFB_BUFFERS 4
> > +#define NIR_MAX_XFB_STREAMS 4
> > +
> > +typedef struct {
> > + uint8_t buffer;
> > + uint16_t offset;
> > + uint8_t location;
> > + uint8_t component_mask;
> > +} nir_xfb_output_info;
> > +
> > +typedef struct {
> > + uint8_t buffers_written;
> > + uint8_t streams_written;
> > +
> > + uint16_t strides[NIR_MAX_XFB_BUFFERS];
> > + uint8_t buffer_to_stream[NIR_MAX_XFB_STREAMS];
> > +
> > + uint16_t output_count;
> > + nir_xfb_output_info outputs[0];
> > +} nir_xfb_info;
> > +
> > +static inline size_t
> > +nir_xfb_info_size(uint16_t output_count)
> > +{
> > + return sizeof(nir_xfb_info) + sizeof(nir_xfb_output_info) *
> output_count;
> > +}
> > +
> > +nir_xfb_info *
> > +nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx);
> > +
> > +#endif /* NIR_XFB_INFO_H */
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181008/43202338/attachment-0001.html>
More information about the mesa-dev
mailing list