[Mesa-dev] [PATCH] glsl: break the gl_FragData array into separate gl_FragData[i] variables

Marek Olšák maraeo at gmail.com
Sun Oct 27 20:58:35 CET 2013


On Sun, Oct 27, 2013 at 4:19 PM, Paul Berry <stereotype441 at gmail.com> wrote:
> On 20 October 2013 09:20, Marek Olšák <maraeo at gmail.com> wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> This avoids a defect in lower_output_reads.
>>
>> The problem is lower_output_reads treats the gl_FragData array as a single
>> variable. It first redirects all output writes to a temporary variable
>> (array)
>> and then writes the whole temporary variable to the output, generating
>> assignments to all elements of gl_FragData.
>
>
> Can you go into more detail about why this is a problem?  At first glance it
> seems like it should be fine, because failing to assign to an element of
> gl_FragData is supposed to result in undefined data going to that fragment
> output.  So lowering to a shader that does assign to that element of
> gl_FragData seems like it should be harmless.  What am I missing here?

Thanks for the review. The problem is drivers cannot eliminate useless
writes to gl_FragData, and each enabled gl_FragData output decreases
performance. The GLSL compiler cannot eliminate the writes either,
because gl_FragData is an array.

Maybe the GLSL compiler should resize arrays based on which elements
are written, so that unused elements are not declared, but this is not
enough for gl_FragData, where the i-th output can be written, but
(i-1)-th output doesn't have to be.

>
>>
>>
>> BTW this pass can be modified to lower all arrays, not just inputs and
>> outputs.
>> The question is whether it is worth it.
>> ---
>>  src/glsl/opt_dead_builtin_varyings.cpp | 163
>> ++++++++++++++++++++++++++-------
>>  1 file changed, 132 insertions(+), 31 deletions(-)
>>
>> diff --git a/src/glsl/opt_dead_builtin_varyings.cpp
>> b/src/glsl/opt_dead_builtin_varyings.cpp
>> index 7e8cd43..72ff9df 100644
>> --- a/src/glsl/opt_dead_builtin_varyings.cpp
>> +++ b/src/glsl/opt_dead_builtin_varyings.cpp
>> @@ -42,9 +42,11 @@
>>   * If any texture coordinate slots can be eliminated, the gl_TexCoord
>> array is
>>   * broken down into separate vec4 variables with locations equal to
>>   * VARYING_SLOT_TEX0 + i.
>> + *
>> + * The same is done for the gl_FragData fragment shader output.
>>   */
>>
>> -#include "main/imports.h" /* for snprintf */
>> +#include "main/core.h" /* for snprintf and ARRAY_SIZE */
>>  #include "ir.h"
>>  #include "ir_rvalue_visitor.h"
>>  #include "ir_optimization.h"
>> @@ -60,10 +62,14 @@ namespace {
>>  class varying_info_visitor : public ir_hierarchical_visitor {
>>  public:
>>     /* "mode" can be either ir_var_shader_in or ir_var_shader_out */
>> -   varying_info_visitor(ir_variable_mode mode)
>> +   varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs =
>> false)
>>        : lower_texcoord_array(true),
>>          texcoord_array(NULL),
>>          texcoord_usage(0),
>> +        find_frag_outputs(find_frag_outputs),
>> +        lower_fragdata_array(true),
>> +        fragdata_array(NULL),
>> +        fragdata_usage(0),
>>          color_usage(0),
>>          tfeedback_color_usage(0),
>>          fog(NULL),
>> @@ -79,8 +85,27 @@ public:
>>     {
>>        ir_variable *var = ir->variable_referenced();
>>
>> -      if (var && var->mode == this->mode &&
>> -          var->location == VARYING_SLOT_TEX0) {
>> +      if (!var || var->mode != this->mode)
>> +         return visit_continue;
>> +
>> +      if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0)
>> {
>> +         this->fragdata_array = var;
>> +
>> +         ir_constant *index = ir->array_index->as_constant();
>> +         if (index == NULL) {
>> +            /* This is variable indexing. */
>> +            this->fragdata_usage |= (1 << var->type->array_size()) - 1;
>> +            this->lower_fragdata_array = false;
>> +         }
>> +         else {
>> +            this->fragdata_usage |= 1 << index->get_uint_component(0);
>> +         }
>> +
>> +         /* Don't visit the leaves of ir_dereference_array. */
>> +         return visit_continue_with_parent;
>> +      }
>> +
>> +      if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0)
>> {
>>           this->texcoord_array = var;
>>
>>           ir_constant *index = ir->array_index->as_constant();
>> @@ -105,8 +130,17 @@ public:
>>     {
>>        ir_variable *var = ir->variable_referenced();
>>
>> -      if (var->mode == this->mode && var->type->is_array() &&
>> -          var->location == VARYING_SLOT_TEX0) {
>> +      if (var->mode != this->mode || !var->type->is_array())
>> +         return visit_continue;
>> +
>> +      if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0)
>> {
>> +         /* This is a whole array dereference. */
>> +         this->fragdata_usage |= (1 << var->type->array_size()) - 1;
>> +         this->lower_fragdata_array = false;
>> +         return visit_continue;
>> +      }
>> +
>> +      if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0)
>> {
>>           /* This is a whole array dereference like "gl_TexCoord = x;",
>>            * there's probably no point in lowering that.
>>            */
>> @@ -121,6 +155,10 @@ public:
>>        if (var->mode != this->mode)
>>           return visit_continue;
>>
>> +      /* Nothing to do here for fragment outputs. */
>> +      if (this->find_frag_outputs)
>> +         return visit_continue;
>> +
>>        /* Handle colors and fog. */
>>        switch (var->location) {
>>        case VARYING_SLOT_COL0:
>> @@ -185,12 +223,20 @@ public:
>>        if (!this->texcoord_array) {
>>           this->lower_texcoord_array = false;
>>        }
>> +      if (!this->fragdata_array) {
>> +         this->lower_fragdata_array = false;
>> +      }
>>     }
>>
>>     bool lower_texcoord_array;
>>     ir_variable *texcoord_array;
>>     unsigned texcoord_usage; /* bitmask */
>>
>> +   bool find_frag_outputs; /* false if it's looking for varyings */
>> +   bool lower_fragdata_array;
>> +   ir_variable *fragdata_array;
>> +   unsigned fragdata_usage; /* bitmask */
>> +
>>     ir_variable *color[2];
>>     ir_variable *backcolor[2];
>>     unsigned color_usage; /* bitmask */
>> @@ -222,6 +268,7 @@ public:
>>     {
>>        void *const ctx = ir;
>>
>> +      memset(this->new_fragdata, 0, sizeof(this->new_fragdata));
>>        memset(this->new_texcoord, 0, sizeof(this->new_texcoord));
>>        memset(this->new_color, 0, sizeof(this->new_color));
>>        memset(this->new_backcolor, 0, sizeof(this->new_backcolor));
>> @@ -236,31 +283,16 @@ public:
>>         * occurences of gl_TexCoord will be replaced with.
>>         */
>>        if (info->lower_texcoord_array) {
>> -         for (int i = MAX_TEXTURE_COORD_UNITS-1; i >= 0; i--) {
>> -            if (info->texcoord_usage & (1 << i)) {
>> -               char name[32];
>> -
>> -               if (!(external_texcoord_usage & (1 << i))) {
>> -                  /* This varying is unused in the next stage. Declare
>> -                   * a temporary instead of an output. */
>> -                  snprintf(name, 32, "gl_%s_TexCoord%i_dummy", mode_str,
>> i);
>> -                  this->new_texcoord[i] =
>> -                     new (ctx) ir_variable(glsl_type::vec4_type, name,
>> -                                           ir_var_temporary);
>> -               }
>> -               else {
>> -                  snprintf(name, 32, "gl_%s_TexCoord%i", mode_str, i);
>> -                  this->new_texcoord[i] =
>> -                     new(ctx) ir_variable(glsl_type::vec4_type, name,
>> -                                          info->mode);
>> -                  this->new_texcoord[i]->location = VARYING_SLOT_TEX0 +
>> i;
>> -                  this->new_texcoord[i]->explicit_location = true;
>> -                  this->new_texcoord[i]->explicit_index = 0;
>> -               }
>> -
>> -               ir->head->insert_before(new_texcoord[i]);
>> -            }
>> -         }
>> +         prepare_array(ir, this->new_texcoord,
>> ARRAY_SIZE(this->new_texcoord),
>> +                       VARYING_SLOT_TEX0, "TexCoord", mode_str,
>> +                       info->texcoord_usage, external_texcoord_usage);
>> +      }
>> +
>> +      /* Handle gl_FragData in the same way like gl_TexCoord. */
>> +      if (info->lower_fragdata_array) {
>> +         prepare_array(ir, this->new_fragdata,
>> ARRAY_SIZE(this->new_fragdata),
>> +                       FRAG_RESULT_DATA0, "FragData", mode_str,
>> +                       info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) -
>> 1);
>>        }
>>
>>        /* Create dummy variables which will replace set-but-unused color
>> and
>> @@ -301,6 +333,41 @@ public:
>>        visit_list_elements(this, ir);
>>     }
>>
>> +   void prepare_array(exec_list *ir,
>> +                      struct ir_variable **new_var,
>> +                      int max_elements, unsigned start_location,
>> +                      const char *var_name, const char *mode_str,
>> +                      unsigned usage, unsigned external_usage)
>> +   {
>> +      void *const ctx = ir;
>> +
>> +      for (int i = max_elements-1; i >= 0; i--) {
>> +         if (usage & (1 << i)) {
>> +            char name[32];
>> +
>> +            if (!(external_usage & (1 << i))) {
>> +               /* This varying is unused in the next stage. Declare
>> +                * a temporary instead of an output. */
>> +               snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name,
>> i);
>> +               new_var[i] =
>> +                  new (ctx) ir_variable(glsl_type::vec4_type, name,
>> +                                        ir_var_temporary);
>> +            }
>> +            else {
>> +               snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i);
>> +               new_var[i] =
>> +                  new(ctx) ir_variable(glsl_type::vec4_type, name,
>> +                                       this->info->mode);
>> +               new_var[i]->location = start_location + i;
>> +               new_var[i]->explicit_location = true;
>> +               new_var[i]->explicit_index = 0;
>> +            }
>> +
>> +            ir->head->insert_before(new_var[i]);
>> +         }
>> +      }
>> +   }
>> +
>>     virtual ir_visitor_status visit(ir_variable *var)
>>     {
>>        /* Remove the gl_TexCoord array. */
>> @@ -309,6 +376,12 @@ public:
>>           var->remove();
>>        }
>>
>> +      /* Remove the gl_FragData array. */
>> +      if (this->info->lower_fragdata_array &&
>> +          var == this->info->fragdata_array) {
>> +         var->remove();
>> +      }
>> +
>>        /* Replace set-but-unused color and fog outputs with dummy
>> variables. */
>>        for (int i = 0; i < 2; i++) {
>>           if (var == this->info->color[i] && this->new_color[i]) {
>> @@ -350,6 +423,19 @@ public:
>>           }
>>        }
>>
>> +      /* Same for gl_FragData. */
>> +      if (this->info->lower_fragdata_array) {
>> +         /* gl_TexCoord[i] occurence */
>
>
> This should say "gl_FragData[i] occurrence"

Will fix.

>
>>
>> +         ir_dereference_array *const da =
>> (*rvalue)->as_dereference_array();
>> +
>> +         if (da && da->variable_referenced() ==
>> this->info->fragdata_array) {
>> +            unsigned i =
>> da->array_index->as_constant()->get_uint_component(0);
>> +
>> +            *rvalue = new(ctx)
>> ir_dereference_variable(this->new_fragdata[i]);
>> +            return;
>> +         }
>> +      }
>> +
>>        /* Replace set-but-unused color and fog outputs with dummy
>> variables. */
>>        ir_dereference_variable *const dv =
>> (*rvalue)->as_dereference_variable();
>>        if (!dv)
>> @@ -392,6 +478,7 @@ public:
>>
>>  private:
>>     const varying_info_visitor *info;
>> +   ir_variable *new_fragdata[MAX_DRAW_BUFFERS];
>>     ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
>>     ir_variable *new_color[2];
>>     ir_variable *new_backcolor[2];
>> @@ -408,6 +495,15 @@ lower_texcoord_array(exec_list *ir, const
>> varying_info_visitor *info)
>>                              1 | 2, true);
>>  }
>>
>> +static void
>> +lower_fragdata_array(exec_list *ir)
>> +{
>> +   varying_info_visitor info(ir_var_shader_out, true);
>> +   info.get(ir, 0, NULL);
>> +
>> +   replace_varyings_visitor(ir, &info, 0, 0, 0);
>> +}
>> +
>>
>>  void
>>  do_dead_builtin_varyings(struct gl_context *ctx,
>> @@ -487,4 +583,9 @@ do_dead_builtin_varyings(struct gl_context *ctx,
>>                                 producer_info.color_usage,
>>                                 producer_info.has_fog);
>>     }
>
>
> At the top of the do_dead_builtin_varyings() function there's this logic:
>
>    /* This optimization has no effect with the core context and GLES2,
> because
>     * the built-in varyings we're eliminating here are not available there.
>     *
>     * EXT_separate_shader_objects doesn't allow this optimization,
>     * because a program object can be bound partially (e.g. only one
>     * stage of a program object can be bound).
>     */
>    if (ctx->API == API_OPENGL_CORE ||
>        ctx->API == API_OPENGLES2 ||
>        ctx->Extensions.EXT_separate_shader_objects) {
>       return;
>    }
>
> Does this need to be changed so that when EXT_separate_shader_objects is
> present, we still go ahead and lower gl_FragData?

Good point. Yes, we want to always lower gl_FragData.

Marek


More information about the mesa-dev mailing list