[Mesa-dev] [PATCH] glsl: break the gl_FragData array into separate gl_FragData[i] variables
Paul Berry
stereotype441 at gmail.com
Sun Oct 27 16:19:28 CET 2013
On 20 October 2013 09:20, Marek Olšák <maraeo at gmail.com> wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This avoids a defect in lower_output_reads.
>
> The problem is lower_output_reads treats the gl_FragData array as a single
> variable. It first redirects all output writes to a temporary variable
> (array)
> and then writes the whole temporary variable to the output, generating
> assignments to all elements of gl_FragData.
>
Can you go into more detail about why this is a problem? At first glance
it seems like it should be fine, because failing to assign to an element of
gl_FragData is supposed to result in undefined data going to that fragment
output. So lowering to a shader that does assign to that element of
gl_FragData seems like it should be harmless. What am I missing here?
>
> BTW this pass can be modified to lower all arrays, not just inputs and
> outputs.
> The question is whether it is worth it.
> ---
> src/glsl/opt_dead_builtin_varyings.cpp | 163
> ++++++++++++++++++++++++++-------
> 1 file changed, 132 insertions(+), 31 deletions(-)
>
> diff --git a/src/glsl/opt_dead_builtin_varyings.cpp
> b/src/glsl/opt_dead_builtin_varyings.cpp
> index 7e8cd43..72ff9df 100644
> --- a/src/glsl/opt_dead_builtin_varyings.cpp
> +++ b/src/glsl/opt_dead_builtin_varyings.cpp
> @@ -42,9 +42,11 @@
> * If any texture coordinate slots can be eliminated, the gl_TexCoord
> array is
> * broken down into separate vec4 variables with locations equal to
> * VARYING_SLOT_TEX0 + i.
> + *
> + * The same is done for the gl_FragData fragment shader output.
> */
>
> -#include "main/imports.h" /* for snprintf */
> +#include "main/core.h" /* for snprintf and ARRAY_SIZE */
> #include "ir.h"
> #include "ir_rvalue_visitor.h"
> #include "ir_optimization.h"
> @@ -60,10 +62,14 @@ namespace {
> class varying_info_visitor : public ir_hierarchical_visitor {
> public:
> /* "mode" can be either ir_var_shader_in or ir_var_shader_out */
> - varying_info_visitor(ir_variable_mode mode)
> + varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs =
> false)
> : lower_texcoord_array(true),
> texcoord_array(NULL),
> texcoord_usage(0),
> + find_frag_outputs(find_frag_outputs),
> + lower_fragdata_array(true),
> + fragdata_array(NULL),
> + fragdata_usage(0),
> color_usage(0),
> tfeedback_color_usage(0),
> fog(NULL),
> @@ -79,8 +85,27 @@ public:
> {
> ir_variable *var = ir->variable_referenced();
>
> - if (var && var->mode == this->mode &&
> - var->location == VARYING_SLOT_TEX0) {
> + if (!var || var->mode != this->mode)
> + return visit_continue;
> +
> + if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0) {
> + this->fragdata_array = var;
> +
> + ir_constant *index = ir->array_index->as_constant();
> + if (index == NULL) {
> + /* This is variable indexing. */
> + this->fragdata_usage |= (1 << var->type->array_size()) - 1;
> + this->lower_fragdata_array = false;
> + }
> + else {
> + this->fragdata_usage |= 1 << index->get_uint_component(0);
> + }
> +
> + /* Don't visit the leaves of ir_dereference_array. */
> + return visit_continue_with_parent;
> + }
> +
> + if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0)
> {
> this->texcoord_array = var;
>
> ir_constant *index = ir->array_index->as_constant();
> @@ -105,8 +130,17 @@ public:
> {
> ir_variable *var = ir->variable_referenced();
>
> - if (var->mode == this->mode && var->type->is_array() &&
> - var->location == VARYING_SLOT_TEX0) {
> + if (var->mode != this->mode || !var->type->is_array())
> + return visit_continue;
> +
> + if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0) {
> + /* This is a whole array dereference. */
> + this->fragdata_usage |= (1 << var->type->array_size()) - 1;
> + this->lower_fragdata_array = false;
> + return visit_continue;
> + }
> +
> + if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0)
> {
> /* This is a whole array dereference like "gl_TexCoord = x;",
> * there's probably no point in lowering that.
> */
> @@ -121,6 +155,10 @@ public:
> if (var->mode != this->mode)
> return visit_continue;
>
> + /* Nothing to do here for fragment outputs. */
> + if (this->find_frag_outputs)
> + return visit_continue;
> +
> /* Handle colors and fog. */
> switch (var->location) {
> case VARYING_SLOT_COL0:
> @@ -185,12 +223,20 @@ public:
> if (!this->texcoord_array) {
> this->lower_texcoord_array = false;
> }
> + if (!this->fragdata_array) {
> + this->lower_fragdata_array = false;
> + }
> }
>
> bool lower_texcoord_array;
> ir_variable *texcoord_array;
> unsigned texcoord_usage; /* bitmask */
>
> + bool find_frag_outputs; /* false if it's looking for varyings */
> + bool lower_fragdata_array;
> + ir_variable *fragdata_array;
> + unsigned fragdata_usage; /* bitmask */
> +
> ir_variable *color[2];
> ir_variable *backcolor[2];
> unsigned color_usage; /* bitmask */
> @@ -222,6 +268,7 @@ public:
> {
> void *const ctx = ir;
>
> + memset(this->new_fragdata, 0, sizeof(this->new_fragdata));
> memset(this->new_texcoord, 0, sizeof(this->new_texcoord));
> memset(this->new_color, 0, sizeof(this->new_color));
> memset(this->new_backcolor, 0, sizeof(this->new_backcolor));
> @@ -236,31 +283,16 @@ public:
> * occurences of gl_TexCoord will be replaced with.
> */
> if (info->lower_texcoord_array) {
> - for (int i = MAX_TEXTURE_COORD_UNITS-1; i >= 0; i--) {
> - if (info->texcoord_usage & (1 << i)) {
> - char name[32];
> -
> - if (!(external_texcoord_usage & (1 << i))) {
> - /* This varying is unused in the next stage. Declare
> - * a temporary instead of an output. */
> - snprintf(name, 32, "gl_%s_TexCoord%i_dummy", mode_str,
> i);
> - this->new_texcoord[i] =
> - new (ctx) ir_variable(glsl_type::vec4_type, name,
> - ir_var_temporary);
> - }
> - else {
> - snprintf(name, 32, "gl_%s_TexCoord%i", mode_str, i);
> - this->new_texcoord[i] =
> - new(ctx) ir_variable(glsl_type::vec4_type, name,
> - info->mode);
> - this->new_texcoord[i]->location = VARYING_SLOT_TEX0 + i;
> - this->new_texcoord[i]->explicit_location = true;
> - this->new_texcoord[i]->explicit_index = 0;
> - }
> -
> - ir->head->insert_before(new_texcoord[i]);
> - }
> - }
> + prepare_array(ir, this->new_texcoord,
> ARRAY_SIZE(this->new_texcoord),
> + VARYING_SLOT_TEX0, "TexCoord", mode_str,
> + info->texcoord_usage, external_texcoord_usage);
> + }
> +
> + /* Handle gl_FragData in the same way like gl_TexCoord. */
> + if (info->lower_fragdata_array) {
> + prepare_array(ir, this->new_fragdata,
> ARRAY_SIZE(this->new_fragdata),
> + FRAG_RESULT_DATA0, "FragData", mode_str,
> + info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1);
> }
>
> /* Create dummy variables which will replace set-but-unused color
> and
> @@ -301,6 +333,41 @@ public:
> visit_list_elements(this, ir);
> }
>
> + void prepare_array(exec_list *ir,
> + struct ir_variable **new_var,
> + int max_elements, unsigned start_location,
> + const char *var_name, const char *mode_str,
> + unsigned usage, unsigned external_usage)
> + {
> + void *const ctx = ir;
> +
> + for (int i = max_elements-1; i >= 0; i--) {
> + if (usage & (1 << i)) {
> + char name[32];
> +
> + if (!(external_usage & (1 << i))) {
> + /* This varying is unused in the next stage. Declare
> + * a temporary instead of an output. */
> + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name,
> i);
> + new_var[i] =
> + new (ctx) ir_variable(glsl_type::vec4_type, name,
> + ir_var_temporary);
> + }
> + else {
> + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i);
> + new_var[i] =
> + new(ctx) ir_variable(glsl_type::vec4_type, name,
> + this->info->mode);
> + new_var[i]->location = start_location + i;
> + new_var[i]->explicit_location = true;
> + new_var[i]->explicit_index = 0;
> + }
> +
> + ir->head->insert_before(new_var[i]);
> + }
> + }
> + }
> +
> virtual ir_visitor_status visit(ir_variable *var)
> {
> /* Remove the gl_TexCoord array. */
> @@ -309,6 +376,12 @@ public:
> var->remove();
> }
>
> + /* Remove the gl_FragData array. */
> + if (this->info->lower_fragdata_array &&
> + var == this->info->fragdata_array) {
> + var->remove();
> + }
> +
> /* Replace set-but-unused color and fog outputs with dummy
> variables. */
> for (int i = 0; i < 2; i++) {
> if (var == this->info->color[i] && this->new_color[i]) {
> @@ -350,6 +423,19 @@ public:
> }
> }
>
> + /* Same for gl_FragData. */
> + if (this->info->lower_fragdata_array) {
> + /* gl_TexCoord[i] occurence */
>
This should say "gl_FragData[i] occurrence"
> + ir_dereference_array *const da =
> (*rvalue)->as_dereference_array();
> +
> + if (da && da->variable_referenced() ==
> this->info->fragdata_array) {
> + unsigned i =
> da->array_index->as_constant()->get_uint_component(0);
> +
> + *rvalue = new(ctx)
> ir_dereference_variable(this->new_fragdata[i]);
> + return;
> + }
> + }
> +
> /* Replace set-but-unused color and fog outputs with dummy
> variables. */
> ir_dereference_variable *const dv =
> (*rvalue)->as_dereference_variable();
> if (!dv)
> @@ -392,6 +478,7 @@ public:
>
> private:
> const varying_info_visitor *info;
> + ir_variable *new_fragdata[MAX_DRAW_BUFFERS];
> ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
> ir_variable *new_color[2];
> ir_variable *new_backcolor[2];
> @@ -408,6 +495,15 @@ lower_texcoord_array(exec_list *ir, const
> varying_info_visitor *info)
> 1 | 2, true);
> }
>
> +static void
> +lower_fragdata_array(exec_list *ir)
> +{
> + varying_info_visitor info(ir_var_shader_out, true);
> + info.get(ir, 0, NULL);
> +
> + replace_varyings_visitor(ir, &info, 0, 0, 0);
> +}
> +
>
> void
> do_dead_builtin_varyings(struct gl_context *ctx,
> @@ -487,4 +583,9 @@ do_dead_builtin_varyings(struct gl_context *ctx,
> producer_info.color_usage,
> producer_info.has_fog);
> }
>
At the top of the do_dead_builtin_varyings() function there's this logic:
/* This optimization has no effect with the core context and GLES2,
because
* the built-in varyings we're eliminating here are not available there.
*
* EXT_separate_shader_objects doesn't allow this optimization,
* because a program object can be bound partially (e.g. only one
* stage of a program object can be bound).
*/
if (ctx->API == API_OPENGL_CORE ||
ctx->API == API_OPENGLES2 ||
ctx->Extensions.EXT_separate_shader_objects) {
return;
}
Does this need to be changed so that when EXT_separate_shader_objects is
present, we still go ahead and lower gl_FragData?
> +
> + /* Finally, lower the gl_FragData array to separate variables. */
> + if (consumer->Type == GL_FRAGMENT_SHADER) {
> + lower_fragdata_array(consumer->ir);
> + }
> }
> --
> 1.8.1.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131027/06b66e12/attachment-0001.html>
More information about the mesa-dev
mailing list