[Mesa-dev] [PATCH V2] glsl: disable varying packing when its not safe
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Wed Feb 3 07:44:45 UTC 2016
On Tue, 2016-02-02 at 16:50 +1100, Timothy Arceri wrote:
> In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation
> qualifiers will match between stages so we cannot safely pack
> varyings using the current packing pass in Mesa.
>
> We also disable packing on outward facing SSO as these too are
> outside
> the rule that guarantees the interpolation qualifiers match.
>
> We do however enable packing on individual arrays, structs, and
> matrices as these are required by the transform feedback code and it
> is still safe to do so.
>
> Finally we also enable packing when a varying is only used for
> transform feedback and its not a SSO.
>
> This fixes all remaining rendering issues with the dEQP SSO tests,
> the only issues remaining with thoses tests are to do with
> validation.
>
> Note: There is still one remaining SSO bug that this patch doesn't
> fix.
> Their is a chance that VS -> TCS will have mismatching interfaces
> because we pack VS output in case its used by transform feedback but
> don't pack TCS input for performance reasons. This patch will make
> the
> situation better but doesn't fix it.
>
> V2: Make is_varying_packing_safe() a function in the varying_matches
> class, fix spelling (Matt) and make sure to remove the outer array
> when dealing with Geom and Tess shaders where appropriate.
> Lastly fix piglit regression in new piglit test and document the
> undefined behaviour it depends on:
> arb_separate_shader_objects/execution/vs-gs-linking.shader_test
>
> Cc: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> Cc: Tapani Pälli <tapani.palli at intel.com>
> ---
> This patch applies on top of my ARB_enhanced_layouts component
> packing
> series [1].
>
> The GLES 3.1 and GL 4.4 changes were tested by changing the code to
> always disable packing and running the changes in Intel CI system.
>
> This patch would also make it possible for a backend to do less
> packing
> in GLSL IR if it wished to do so now that the transform feedback
> requirements
> have been isolated.
>
> [1] http://patchwork.freedesktop.org/series/2101/
>
> src/compiler/glsl/ir.h | 7 +
> src/compiler/glsl/ir_optimization.h | 2 +-
> src/compiler/glsl/link_varyings.cpp | 214
> +++++++++++++++++++++-------
> src/compiler/glsl/lower_packed_varyings.cpp | 37 +++--
> 4 files changed, 202 insertions(+), 58 deletions(-)
>
> diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
> index e4513f0..83bb74b 100644
> --- a/src/compiler/glsl/ir.h
> +++ b/src/compiler/glsl/ir.h
> @@ -736,6 +736,13 @@ public:
> unsigned is_unmatched_generic_inout:1;
>
> /**
> + * Is this varying used only by transform feedback?
> + *
> + * This is used by the linker to decide if its safe to pack
> the varying.
> + */
> + unsigned is_xfb_only:1;
> +
> + /**
> * If non-zero, then this variable may be packed along with
> other variables
> * into a single varying slot, so this offset should be
> applied when
> * accessing components. For example, an offset of 1 means
> that the x
> diff --git a/src/compiler/glsl/ir_optimization.h
> b/src/compiler/glsl/ir_optimization.h
> index a115c46..e201242 100644
> --- a/src/compiler/glsl/ir_optimization.h
> +++ b/src/compiler/glsl/ir_optimization.h
> @@ -126,7 +126,7 @@ void lower_packed_varyings(void *mem_ctx,
> unsigned locations_used, ir_variable_mode
> mode,
> gl_shader *shader, unsigned
> base_location,
> bool disable_varying_packing,
> - bool has_enhanced_layouts);
> + bool xfb_enabled, bool
> has_enhanced_layouts);
> bool lower_vector_insert(exec_list *instructions, bool
> lower_nonconstant_index);
> bool lower_vector_derefs(gl_shader *shader);
> void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
> diff --git a/src/compiler/glsl/link_varyings.cpp
> b/src/compiler/glsl/link_varyings.cpp
> index 2974e76..8efaa33 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -878,7 +878,7 @@ namespace {
> class varying_matches
> {
> public:
> - varying_matches(bool disable_varying_packing,
> + varying_matches(bool disable_varying_packing, bool xfb_enabled,
> gl_shader_stage producer_stage,
> gl_shader_stage consumer_stage);
> ~varying_matches();
> @@ -888,14 +888,30 @@ public:
> void store_locations() const;
>
> private:
> + bool is_varying_packing_safe(const glsl_type *type,
> + const ir_variable *var);
> +
> /**
> * If true, this driver disables varying packing, so all varyings
> need to
> * be aligned on slot boundaries, and take up a number of slots
> equal to
> * their number of matrix columns times their array size.
> + *
> + * Packing may also be disabled because our current packing
> method is not
> + * safe in SSO or versions of OpenGL where interpolation
> qualifiers are not
> + * guaranteed to match across stages.
> */
> const bool disable_varying_packing;
>
> /**
> + * If true, this driver has transform feedback enabled. The
> transform
> + * feedback code requires at least some packing be done even when
> varying
> + * packing is disabled, fortunately where transform feedback
> requires
> + * packing it's safe to override the disabled setting. See
> + * is_varying_packing_safe().
> + */
> + const bool xfb_enabled;
> +
> + /**
> * Enum representing the order in which varyings are packed
> within a
> * packing class.
> *
> @@ -914,6 +930,7 @@ private:
> static unsigned compute_packing_class(const ir_variable *var);
> static packing_order_enum compute_packing_order(const ir_variable
> *var);
> static int match_comparator(const void *x_generic, const void
> *y_generic);
> + static int xfb_comparator(const void *x_generic, const void
> *y_generic);
>
> /**
> * Structure recording the relationship between a single producer
> output
> @@ -969,9 +986,11 @@ private:
> } /* anonymous namespace */
>
> varying_matches::varying_matches(bool disable_varying_packing,
> + bool xfb_enabled,
> gl_shader_stage producer_stage,
> gl_shader_stage consumer_stage)
> : disable_varying_packing(disable_varying_packing),
> + xfb_enabled(xfb_enabled),
> producer_stage(producer_stage),
> consumer_stage(consumer_stage)
> {
> @@ -994,6 +1013,24 @@ varying_matches::~varying_matches()
>
>
> /**
> + * Packing is always safe on individual arrays, structure and
> matices. It is
matrices
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
Thanks,
Sam
> + * also safe if the varying is only used for transform feedback.
> + */
> +bool
> +varying_matches::is_varying_packing_safe(const glsl_type *type,
> + const ir_variable *var)
> +{
> + if (consumer_stage == MESA_SHADER_TESS_EVAL ||
> + consumer_stage == MESA_SHADER_TESS_CTRL ||
> + producer_stage == MESA_SHADER_TESS_CTRL)
> + return false;
> +
> + return xfb_enabled && (type->is_array() || type->is_record() ||
> + type->is_matrix() || var-
> >data.is_xfb_only);
> +}
> +
> +
> +/**
> * Record the given producer/consumer variable pair in the list of
> variables
> * that should later be assigned locations.
> *
> @@ -1072,7 +1109,7 @@ varying_matches::record(ir_variable
> *producer_var, ir_variable *consumer_var)
> = this->compute_packing_class(var);
> this->matches[this->num_matches].packing_order
> = this->compute_packing_order(var);
> - if (this->disable_varying_packing) {
> + if (this->disable_varying_packing &&
> !is_varying_packing_safe(type, var)) {
> unsigned slots = type->count_attribute_slots(false);
> this->matches[this->num_matches].num_components = slots * 4;
> } else {
> @@ -1098,37 +1135,28 @@ varying_matches::assign_locations(struct
> gl_shader_program *prog,
> uint64_t reserved_slots,
> bool separate_shader)
> {
> - /* We disable varying sorting for separate shader programs for
> the
> - * following reasons:
> - *
> - * 1/ All programs must sort the code in the same order to
> guarantee the
> - * interface matching. However varying_matches::record() will
> change the
> - * interpolation qualifier of some stages.
> - *
> - * 2/ GLSL version 4.50 removes the matching constrain on the
> interpolation
> - * qualifier.
> - *
> - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40
> spec:
> - *
> - * "The type and presence of interpolation qualifiers of
> variables with
> - * the same name declared in all linked shaders for the same
> cross-stage
> - * interface must match, otherwise the link command will fail.
> - *
> - * When comparing an output from one stage to an input of a
> subsequent
> - * stage, the input and output don't match if their
> interpolation
> - * qualifiers (or lack thereof) are not the same."
> - *
> - * "It is a link-time error if, within the same stage, the
> interpolation
> - * qualifiers of variables of the same name do not match."
> + /* If packing has been disabled then we cannot safely sort the
> varyings by
> + * class as it may mean we are using a version of OpenGL where
> + * interpolation qualifiers are not guaranteed to be matching
> across
> + * shaders, sorting in this case could result in mismatching
> shader
> + * interfaces.
> + * When packing is disabled the sort orders varyings used by
> transform
> + * feedback first, but also depends on *undefined behaviour* of
> qsort to
> + * reverse the order of the varyings. See: xfb_comparator().
> */
> - if (!separate_shader) {
> + if (!this->disable_varying_packing) {
> /* Sort varying matches into an order that makes them easy to
> pack. */
> qsort(this->matches, this->num_matches, sizeof(*this-
> >matches),
> &varying_matches::match_comparator);
> + } else {
> + /* Only sort varyings that are only used by transform
> feedback. */
> + qsort(this->matches, this->num_matches, sizeof(*this-
> >matches),
> + &varying_matches::xfb_comparator);
> }
>
> unsigned generic_location = 0;
> unsigned generic_patch_location = MAX_VARYING*4;
> + bool previous_var_xfb_only = false;
>
> for (unsigned i = 0; i < this->num_matches; i++) {
> unsigned *location = &generic_location;
> @@ -1152,16 +1180,30 @@ varying_matches::assign_locations(struct
> gl_shader_program *prog,
> /* Advance to the next slot if this varying has a different
> packing
> * class than the previous one, and we're not already on a
> slot
> * boundary.
> + *
> + * Also advance to the next slot if packing is disabled. This
> makes sure
> + * we don't assign varyings the same locations which is
> possible
> + * because we still pack individual arrays, records and
> matrices even
> + * when packing is disabled. Note we don't advance to the next
> slot if
> + * we can pack varyings together that are only used for
> transform
> + * feedback.
> */
> - if (i > 0 &&
> - this->matches[i - 1].packing_class
> - != this->matches[i].packing_class) {
> + if ((this->disable_varying_packing &&
> + !(previous_var_xfb_only && var->data.is_xfb_only)) ||
> + (i > 0 && this->matches[i - 1].packing_class
> + != this->matches[i].packing_class )) {
> *location = ALIGN(*location, 4);
> }
>
> + previous_var_xfb_only = var->data.is_xfb_only;
> +
> unsigned num_elements = type-
> >count_attribute_slots(is_vertex_input);
> - unsigned slot_end = this->disable_varying_packing ? 4 :
> - type->without_array()->vector_elements;
> + unsigned slot_end;
> + if (this->disable_varying_packing &&
> + !is_varying_packing_safe(type, var))
> + slot_end = 4;
> + else
> + slot_end = type->without_array()->vector_elements;
> slot_end += *location - 1;
>
> /* FIXME: We could be smarter in the below code and loop back
> over
> @@ -1185,7 +1227,8 @@ varying_matches::assign_locations(struct
> gl_shader_program *prog,
> /* Increase the slot to make sure there is enough room for
> next
> * array element.
> */
> - if (this->disable_varying_packing)
> + if (this->disable_varying_packing &&
> + !is_varying_packing_safe(type, var))
> slot_end += 4;
> else
> slot_end += type->without_array()->vector_elements;
> @@ -1311,6 +1354,32 @@ varying_matches::match_comparator(const void
> *x_generic, const void *y_generic)
>
>
> /**
> + * Comparison function passed to qsort() to sort varyings used only
> by
> + * transform feedback when packing of other varyings is disabled.
> + */
> +int
> +varying_matches::xfb_comparator(const void *x_generic, const void
> *y_generic)
> +{
> + const match *x = (const match *) x_generic;
> +
> + if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
> + return match_comparator(x_generic, y_generic);
> +
> + /* FIXME: When the comparator returns 0 it means the elements
> being
> + * compared are equivalent. However the qsort documentation says:
> + *
> + * "The order of equivalent elements is undefined."
> + *
> + * In practice the sort ends up reversing the order of the
> varyings which
> + * means locations are also assigned in this reversed order and
> happens to
> + * be what we want. This is also whats happening in
> + * varying_matches::match_comparator().
> + */
> + return 0;
> +}
> +
> +
> +/**
> * Is the given variable a varying variable to be counted against
> the
> * limit in ctx->Const.MaxVarying?
> * This includes variables such as texcoords, colors and generic
> @@ -1628,26 +1697,67 @@ assign_varying_locations(struct gl_context
> *ctx,
> unsigned num_tfeedback_decls,
> tfeedback_decl *tfeedback_decls)
> {
> - if (ctx->Const.DisableVaryingPacking) {
> - /* Transform feedback code assumes varyings are packed, so if
> the driver
> - * has disabled varying packing, make sure it does not support
> transform
> - * feedback.
> - */
> - assert(!ctx->Extensions.EXT_transform_feedback);
> - }
> + /* Transform feedback code assumes varying arrays are packed, so
> if the
> + * driver has disabled varying packing, make sure to at least
> enable
> + * packing required by transform feedback.
> + */
> + bool xfb_enabled = ctx->Extensions.EXT_transform_feedback;
> +
> + /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is no
> guarantee
> + * that interpolation qualifiers will match between shaders in
> these
> + * versions. We also disable packing for the outer facing
> interfaces for
> + * SSO as they also do not have the same restriction during
> linking.
> + *
> + * Packing is still enabled on individual arrays, structs, and
> matrices as
> + * these are required by the transform feedback code and it is
> still safe
> + * to do so. We also enable packing when a varying is only used
> for
> + * transform feedback and its not a SSO.
> + *
> + * Varying packing currently only packs together varyings with
> matching
> + * interpolation qualifiers as the backends assume all packed
> components
> + * are to be processed in the same way. Therefore we cannot do
> packing in
> + * these versions of GL without the risk of mismatching
> interfaces.
> + *
> + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30
> spec:
> + *
> + * "The type and presence of interpolation qualifiers of
> variables with
> + * the same name declared in all linked shaders for the same
> cross-stage
> + * interface must match, otherwise the link command will fail.
> + *
> + * When comparing an output from one stage to an input of a
> subsequent
> + * stage, the input and output don't match if their
> interpolation
> + * qualifiers (or lack thereof) are not the same."
> + *
> + * This text was also in at least revison 7 of the 4.40 spec but
> is no
> + * longer in revision 9 and not in the 4.50 spec. There is also
> no text in
> + * the GLSL ES spec that says these qualifiers must match across
> stages.
> + *
> + * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec:
> + *
> + * "The type and presence of the interpolation qualifiers and
> storage
> + * qualifiers and invariant qualifiers of variables with the
> same name
> + * declared in all linked shaders must match, otherwise the
> link command
> + * will fail."
> + *
> + * This text is no longer in the 3.1 or 3.2 specs.
> + */
> + bool disable_varying_packing = ctx->Const.DisableVaryingPacking;
> + if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
> + (ctx->API == API_OPENGLES2 && ctx->Version >= 31) ||
> + (prog->SeparateShader && (producer == NULL || consumer ==
> NULL)))
> + disable_varying_packing = true;
>
> /* Tessellation shaders treat inputs and outputs as shared memory
> and can
> * access inputs and outputs of other invocations.
> * Therefore, they can't be lowered to temps easily (and
> definitely not
> * efficiently).
> */
> - bool disable_varying_packing =
> - ctx->Const.DisableVaryingPacking ||
> + disable_varying_packing = disable_varying_packing ||
> (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
> (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
> (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
>
> - varying_matches matches(disable_varying_packing,
> + varying_matches matches(disable_varying_packing, xfb_enabled,
> producer ? producer->Stage :
> (gl_shader_stage)-1,
> consumer ? consumer->Stage :
> (gl_shader_stage)-1);
> hash_table *tfeedback_candidates
> @@ -1767,8 +1877,10 @@ assign_varying_locations(struct gl_context
> *ctx,
> return false;
> }
>
> - if (matched_candidate->toplevel_var-
> >data.is_unmatched_generic_inout)
> + if (matched_candidate->toplevel_var-
> >data.is_unmatched_generic_inout) {
> + matched_candidate->toplevel_var->data.is_xfb_only = 1;
> matches.record(matched_candidate->toplevel_var, NULL);
> + }
> }
>
> const uint64_t reserved_slots =
> @@ -1868,13 +1980,19 @@ assign_varying_locations(struct gl_context
> *ctx,
> if (vertex_attributes > 0)
> lower_packed_varyings(mem_ctx, vertex_attributes,
> ir_var_shader_in, producer,
> - VERT_ATTRIB_GENERIC0, true,
> + VERT_ATTRIB_GENERIC0, true, false,
> ctx-
> >Extensions.ARB_enhanced_layouts);
> }
>
> + /* TCS inputs are not packed so don't enable packing required
> for
> + * transform feedback if the the consumer is a TCS.
> + */
> + if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL)
> + xfb_enabled = false;
> +
> lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
> producer,
> - VARYING_SLOT_VAR0,
> - disable_varying_packing,
> + VARYING_SLOT_VAR0,
> disable_varying_packing,
> + xfb_enabled,
> ctx->Extensions.ARB_enhanced_layouts);
> }
>
> @@ -1891,13 +2009,13 @@ assign_varying_locations(struct gl_context
> *ctx,
> unsigned frag_outs = _mesa_bitcount_64(reserved_slots);
> if (frag_outs > 0)
> lower_packed_varyings(mem_ctx, frag_outs,
> ir_var_shader_out,
> - consumer, FRAG_RESULT_DATA0, true,
> + consumer, FRAG_RESULT_DATA0, true,
> false,
> ctx-
> >Extensions.ARB_enhanced_layouts);
> }
>
> lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
> consumer,
> - VARYING_SLOT_VAR0,
> - disable_varying_packing,
> + VARYING_SLOT_VAR0,
> disable_varying_packing,
> + xfb_enabled,
> ctx->Extensions.ARB_enhanced_layouts);
> }
>
> diff --git a/src/compiler/glsl/lower_packed_varyings.cpp
> b/src/compiler/glsl/lower_packed_varyings.cpp
> index d6d68fc..9cae548 100644
> --- a/src/compiler/glsl/lower_packed_varyings.cpp
> +++ b/src/compiler/glsl/lower_packed_varyings.cpp
> @@ -172,7 +172,8 @@ update_packed_array_type(const glsl_type *type,
> const glsl_type *packed_type)
>
> static bool
> needs_lowering(ir_variable *var, bool has_enhanced_layouts,
> - bool disable_varying_packing)
> + bool disable_varying_packing, bool xfb_enabled,
> + bool is_outer_array_vert_idx)
> {
> /* Don't lower varying with explicit location unless
> ARB_enhanced_layouts
> * is enabled, also don't try to pack structs with explicit
> location as
> @@ -186,11 +187,16 @@ needs_lowering(ir_variable *var, bool
> has_enhanced_layouts,
> /* Don't disable packing for explicit locations when
> ARB_enhanced_layouts
> * is supported.
> */
> - if (disable_varying_packing && !var->data.explicit_location)
> + const glsl_type *type =
> + is_outer_array_vert_idx ? var->type->fields.array : var->type;
> + if (disable_varying_packing && !var->data.is_xfb_only &&
> + !var->data.explicit_location &&
> + !((type->is_array() || type->is_record() || type-
> >is_matrix()) &&
> + xfb_enabled))
> return false;
>
> /* Things composed of vec4's don't need lowering everything else
> does. */
> - const glsl_type *type = var->type->without_array();
> + type = type->without_array();
> if (type->vector_elements == 4 && !type->is_double())
> return false;
> return true;
> @@ -285,6 +291,7 @@ public:
> exec_list *out_variables,
> unsigned base_location,
> bool disable_varying_packing,
> + bool xfb_enabled,
> bool has_enhanced_layouts);
>
> void run(struct gl_shader *shader);
> @@ -353,6 +360,7 @@ private:
> exec_list *out_variables;
>
> bool disable_varying_packing;
> + bool xfb_enabled;
> bool has_enhanced_layouts;
> };
>
> @@ -362,7 +370,8 @@
> lower_packed_varyings_visitor::lower_packed_varyings_visitor(
> void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
> bool is_outer_array_vert_idx, exec_list *out_instructions,
> exec_list *out_variables, unsigned base_location,
> - bool disable_varying_packing, bool has_enhanced_layouts)
> + bool disable_varying_packing, bool xfb_enabled,
> + bool has_enhanced_layouts)
> : mem_ctx(mem_ctx),
> base_location(base_location),
> locations_used(locations_used),
> @@ -374,6 +383,7 @@
> lower_packed_varyings_visitor::lower_packed_varyings_visitor(
> out_instructions(out_instructions),
> out_variables(out_variables),
> disable_varying_packing(disable_varying_packing),
> + xfb_enabled(xfb_enabled),
> has_enhanced_layouts(has_enhanced_layouts)
> {
> }
> @@ -388,7 +398,8 @@ lower_packed_varyings_visitor::run(struct
> gl_shader *shader)
>
> if (var->data.mode != this->mode ||
> var->data.location < (int) this->base_location ||
> - !needs_lowering(var, has_enhanced_layouts,
> disable_varying_packing))
> + !needs_lowering(var, has_enhanced_layouts,
> disable_varying_packing,
> + xfb_enabled, is_outer_array_vert_idx))
> continue;
>
> /* This lowering pass is only capable of packing floats and
> ints
> @@ -1118,7 +1129,7 @@ void
> lower_packed_varyings(void *mem_ctx, unsigned locations_used,
> ir_variable_mode mode, gl_shader *shader,
> unsigned base_location, bool
> disable_varying_packing,
> - bool has_enhanced_layouts)
> + bool xfb_enabled, bool has_enhanced_layouts)
> {
> ir_function *main_func = shader->symbols->get_function("main");
> exec_list void_parameters;
> @@ -1143,6 +1154,7 @@ lower_packed_varyings(void *mem_ctx, unsigned
> locations_used,
> &new_variables,
> base_location,
> disable_varying_packing,
> + xfb_enabled,
> has_enhanced_layouts);
> visitor.run(shader);
> if (mode == ir_var_shader_out) {
> @@ -1189,9 +1201,16 @@ lower_packed_varyings(void *mem_ctx, unsigned
> locations_used,
> continue;
>
> if (var->data.mode != mode ||
> - var->data.location < (int) base_location ||
> - !needs_lowering(var, has_enhanced_layouts, true))
> - continue;
> + var->data.location < (int) base_location)
> + continue;
> +
> + bool is_outer_array_vertex_idx = false;
> + if (!var->data.patch)
> + is_outer_array_vertex_idx = true;
> +
> + if (!needs_lowering(var, has_enhanced_layouts, true, false,
> + is_outer_array_vertex_idx))
> + continue;
>
> /* Clone the variable for program resource list before
> * it gets modified and lost.
More information about the mesa-dev
mailing list