[Mesa-dev] [PATCH 6/8] glsl: use ARB_enhahnced_layouts for packing where possible
Kenneth Graunke
kenneth at whitecape.org
Fri Apr 21 08:27:53 UTC 2017
On Monday, April 17, 2017 10:52:23 PM PDT Timothy Arceri wrote:
> From: Timothy Arceri <timothy.arceri at collabora.com>
>
> If packing doesn't cross locations we can easily make use of
> ARB_enhanced_layouts to do packing rather than using the GLSL IR
> lowering pass lower_packed_varyings().
>
> Shader-db Broadwell results:
>
> total instructions in shared programs: 12977822 -> 12977819 (-0.00%)
> instructions in affected programs: 1871 -> 1868 (-0.16%)
> helped: 4
> HURT: 3
>
> total cycles in shared programs: 246567288 -> 246567668 (0.00%)
> cycles in affected programs: 1370386 -> 1370766 (0.03%)
> helped: 592
> HURT: 733
> ---
> src/compiler/glsl/link_varyings.cpp | 68 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 68 insertions(+)
>
> diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
> index 37297be..5902421 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -1210,20 +1210,21 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
> namespace {
>
> /**
> * Data structure recording the relationship between outputs of one shader
> * stage (the "producer") and inputs of another (the "consumer").
> */
> class varying_matches
> {
> public:
> varying_matches(bool disable_varying_packing, bool xfb_enabled,
> + bool enhanced_layouts_enabled,
> gl_shader_stage producer_stage,
> gl_shader_stage consumer_stage);
> ~varying_matches();
> void record(ir_variable *producer_var, ir_variable *consumer_var);
> unsigned assign_locations(struct gl_shader_program *prog,
> uint8_t *components,
> uint64_t reserved_slots);
> void store_locations() const;
>
> private:
> @@ -1243,20 +1244,22 @@ private:
>
> /**
> * If true, this driver has transform feedback enabled. The transform
> * feedback code requires at least some packing be done even when varying
> * packing is disabled, fortunately where transform feedback requires
> * packing it's safe to override the disabled setting. See
> * is_varying_packing_safe().
> */
> const bool xfb_enabled;
>
> + const bool enhanced_layouts_enabled;
> +
> /**
> * Enum representing the order in which varyings are packed within a
> * packing class.
> *
> * Currently we pack vec4's first, then vec2's, then scalar values, then
> * vec3's. This order ensures that the only vectors that are at risk of
> * having to be "double parked" (split between two adjacent varying slots)
> * are the vec3's.
> */
> enum packing_order_enum {
> @@ -1319,24 +1322,26 @@ private:
> unsigned matches_capacity;
>
> gl_shader_stage producer_stage;
> gl_shader_stage consumer_stage;
> };
>
> } /* anonymous namespace */
>
> varying_matches::varying_matches(bool disable_varying_packing,
> bool xfb_enabled,
> + bool enhanced_layouts_enabled,
> gl_shader_stage producer_stage,
> gl_shader_stage consumer_stage)
> : disable_varying_packing(disable_varying_packing),
> xfb_enabled(xfb_enabled),
> + enhanced_layouts_enabled(enhanced_layouts_enabled),
> producer_stage(producer_stage),
> consumer_stage(consumer_stage)
> {
> /* Note: this initial capacity is rather arbitrarily chosen to be large
> * enough for many cases without wasting an unreasonable amount of space.
> * varying_matches::record() will resize the array if there are more than
> * this number of varyings.
> */
> this->matches_capacity = 8;
> this->matches = (match *)
> @@ -1615,37 +1620,99 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
> }
>
>
> /**
> * Update the producer and consumer shaders to reflect the locations
> * assignments that were made by varying_matches::assign_locations().
> */
> void
> varying_matches::store_locations() const
> {
> + /* Check is location needs to be packed with lower_packed_varyings() or if
> + * we can just use ARB_enhanced_layouts packing.
> + */
> + bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
> + const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
> +
> for (unsigned i = 0; i < this->num_matches; i++) {
> ir_variable *producer_var = this->matches[i].producer_var;
> ir_variable *consumer_var = this->matches[i].consumer_var;
> unsigned generic_location = this->matches[i].generic_location;
> unsigned slot = generic_location / 4;
> unsigned offset = generic_location % 4;
>
> if (producer_var) {
> producer_var->data.location = VARYING_SLOT_VAR0 + slot;
> producer_var->data.location_frac = offset;
> }
>
> if (consumer_var) {
> assert(consumer_var->data.location == -1);
> consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
> consumer_var->data.location_frac = offset;
> }
> +
> + /* Find locations suitable for native packing via
> + * ARB_enhanced_layouts.
> + */
> + if (producer_var && consumer_var) {
> + if (enhanced_layouts_enabled) {
> + const glsl_type *type =
> + get_varying_type(producer_var, producer_stage);
> + if (type->is_array() || type->is_matrix() || type->is_record() ||
> + type->is_double()) {
> + unsigned comp_slots = type->component_slots() + offset;
> + unsigned slots = comp_slots / 4;
> + if (comp_slots % 4)
> + slots += 1;
> +
> + for (unsigned j = 0; j < slots; j++) {
> + pack_loc[slot + j] = true;
> + }
> + } else if (offset + type->vector_elements > 4) {
> + pack_loc[slot] = true;
> + pack_loc[slot + 1] = true;
> + } else {
> + loc_type[slot][offset] = type;
> + }
> + }
> + }
> + }
> +
> + /* Attempt to use ARB_enhanced_layouts for more efficient packing if
> + * suitable.
> + */
> + if (enhanced_layouts_enabled) {
> + for (unsigned i = 0; i < this->num_matches; i++) {
> + ir_variable *producer_var = this->matches[i].producer_var;
> + ir_variable *consumer_var = this->matches[i].consumer_var;
> + unsigned generic_location = this->matches[i].generic_location;
> + unsigned slot = generic_location / 4;
> +
> + if (pack_loc[slot] || !producer_var || !consumer_var)
> + continue;
> +
> + const glsl_type *type =
> + get_varying_type(producer_var, producer_stage);
> + bool type_match = true;
> + for (unsigned j = 0; j < 4; j++) {
> + if (loc_type[slot][j]) {
> + if (type->base_type != loc_type[slot][j]->base_type)
> + type_match = false;
> + }
> + }
> +
> + if (type_match) {
> + producer_var->data.explicit_location = 1;
> + consumer_var->data.explicit_location = 1;
You probably want to set
producer_var->data.explicit_component = 1;
consumer_var->data.explicit_component = 1;
as well. It doesn't seem to affect code generation, but it does make
ir_print_visitor actually print the component info.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170421/88fb4c3b/attachment.sig>
More information about the mesa-dev
mailing list