[Mesa-dev] [PATCH] llvmpipe: fix layer/vp input into fs when not written by prior stages

Fri Dec 11 13:35:03 PST 2015

On 11/12/15 21:31, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> ARB_fragment_layer_viewport requires that if a fs reads layer or viewport
> index but it wasn't output by gs (or vs with other extensions), then it reads
> 0. This never worked for llvmpipe, and is surprisingly non-trivial to fix.
> The problem is the mechanism to handle non-existing outputs in draw is rather
> crude, it will simply redirect them to whatever is at output 0, thus later
> stages will just get garbage. So, rather than trying to fix this up (which
> looks non-trivial), fix this up in llvmpipe setup by detecting this case there
> and output a fixed zero directly.
> While here, also optimize the hw vertex layout a bit - previously if the gs
> outputted layer (or vp) and the fs read those inputs, we'd add them twice
> to the vertex layout, which is unnecessary.
> And do some minor cleanup, slots don't require that many bits, there was some
> bogus (but harmless) float/int mixup for psize slot too, make the slots all
> signed instead of mixed signed/unsigned (they can't actually be 0 when valid,
> since position is at 0 always at this point, but avoid confusion as they can
> be at slot number 0 at vs output), and make sure they are properly initialized
> (layer and vp index slot were not which looked fishy as they might not have
> got set back to zero when changing from a gs which outputs them to one which
> does not).
>
> This fixes the failures in piglit's arb_fragment_layer_viewport group
> (3 each for layer and vp).
>
> v2: switch to all signed instead of all unsigned slots.
> ---
>   src/gallium/drivers/llvmpipe/lp_bld_interp.h    |  3 +-
>   src/gallium/drivers/llvmpipe/lp_context.h       | 18 ++++--
>   src/gallium/drivers/llvmpipe/lp_setup.c         |  2 +-
>   src/gallium/drivers/llvmpipe/lp_setup_context.h |  8 +--
>   src/gallium/drivers/llvmpipe/lp_setup_point.c   |  2 +-
>   src/gallium/drivers/llvmpipe/lp_state_derived.c | 75 ++++++++++++++++++-------
>   src/gallium/drivers/llvmpipe/lp_state_setup.c   | 25 ++++++---
>   src/gallium/drivers/llvmpipe/lp_state_setup.h   |  1 -
>   8 files changed, 91 insertions(+), 43 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
> index 9029d2a..0a52642 100644
> --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
> +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
> @@ -63,7 +63,8 @@ enum lp_interp {
>      LP_INTERP_LINEAR,
>      LP_INTERP_PERSPECTIVE,
>      LP_INTERP_POSITION,
> -   LP_INTERP_FACING
> +   LP_INTERP_FACING,
> +   LP_INTERP_ZERO
>   };
>
>   struct lp_shader_input {
> diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
> index c9a5d67..70ec032 100644
> --- a/src/gallium/drivers/llvmpipe/lp_context.h
> +++ b/src/gallium/drivers/llvmpipe/lp_context.h
> @@ -108,22 +108,28 @@ struct llvmpipe_context {
>      struct vertex_info vertex_info;
>
>      /** Which vertex shader output slot contains color */
> -   int color_slot[2];
> +   int8_t color_slot[2];
>
>      /** Which vertex shader output slot contains bcolor */
> -   int bcolor_slot[2];
> +   int8_t bcolor_slot[2];
>
>      /** Which vertex shader output slot contains point size */
> -   int psize_slot;
> +   int8_t psize_slot;
>
>      /** Which vertex shader output slot contains viewport index */
> -   int viewport_index_slot;
> +   int8_t viewport_index_slot;
>
>      /** Which geometry shader output slot contains layer */
> -   int layer_slot;
> +   int8_t layer_slot;
>
>      /** A fake frontface output for unfilled primitives */
> -   int face_slot;
> +   int8_t face_slot;
> +
> +   /** Which output slot is used for the fake vp index info */
> +   int8_t fake_vpindex_slot;
> +
> +   /** Which output slot is used for the fake layer info */
> +   int8_t fake_layer_slot;
>
>      /** Depth format and bias settings. */
>      boolean floating_point_depth;
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
> index 1778b13..ddbb88e 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
> @@ -1207,7 +1207,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
>         /* Will probably need to move this somewhere else, just need
>          * to know about vertex shader point size attribute.
>          */
> -      setup->psize = lp->psize_slot;
> +      setup->psize_slot = lp->psize_slot;
>         setup->viewport_index_slot = lp->viewport_index_slot;
>         setup->layer_slot = lp->layer_slot;
>         setup->face_slot = lp->face_slot;
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> index 2410e23..80acd74 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
> +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> @@ -105,10 +105,10 @@ struct lp_setup_context
>      float pixel_offset;
>      float line_width;
>      float point_size;
> -   float psize;
> -   unsigned viewport_index_slot;
> -   unsigned layer_slot;
> -   int face_slot;
> +   int8_t psize_slot;
> +   int8_t viewport_index_slot;
> +   int8_t layer_slot;
> +   int8_t face_slot;
>
>      struct pipe_framebuffer_state fb;
>      struct u_rect framebuffer;
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
> index 75544b5..14c389f 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
> +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
> @@ -328,7 +328,7 @@ try_setup_point( struct lp_setup_context *setup,
>      struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
>      /* x/y positions in fixed point */
>      const struct lp_setup_variant_key *key = &setup->setup.variant->key;
> -   const int sizeAttr = setup->psize;
> +   const int sizeAttr = setup->psize_slot;
>      const float size
>         = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
>         : setup->point_size;
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
> index a25e832..2b4b07c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
> @@ -55,10 +55,19 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
>
>      draw_prepare_shader_outputs(llvmpipe->draw);
>
> +   /*
> +    * Those can't actually be 0 (because pos is always at 0).
> +    * But use ints anyway to avoid confusion (in vs outputs, they
> +    * can very well be at pos 0).
> +    */
>      llvmpipe->color_slot[0] = -1;
>      llvmpipe->color_slot[1] = -1;
>      llvmpipe->bcolor_slot[0] = -1;
>      llvmpipe->bcolor_slot[1] = -1;
> +   llvmpipe->viewport_index_slot = -1;
> +   llvmpipe->layer_slot = -1;
> +   llvmpipe->face_slot = -1;
> +   llvmpipe->psize_slot = -1;
>
>      /*
>       * Match FS inputs against VS outputs, emitting the necessary
> @@ -90,10 +99,34 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
>         }
>
>         if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
> -         llvmpipe->face_slot = vinfo->num_attribs;
> +         llvmpipe->face_slot = (int)vinfo->num_attribs;
>            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
>         } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
>            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> +      /*
> +       * For vp index and layer, if the fs requires them but the vs doesn't
> +       * provide them, store the slot - we'll later replace the data directly
> +       * with zero (as required by ARB_fragment_layer_viewport). This is
> +       * because draw itself just redirects them to whatever was at output 0.
> +       * We'll also store the real vpindex/layer slot for setup use.
> +       */
> +      } else if (lpfs->info.base.input_semantic_name[i] ==
> +                 TGSI_SEMANTIC_VIEWPORT_INDEX) {
> +         if (vs_index >= 0) {
> +            llvmpipe->viewport_index_slot = (int)vinfo->num_attribs;
> +         }
> +         else {
> +            llvmpipe->fake_vpindex_slot = (int)vinfo->num_attribs;
> +         }
> +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> +      } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
> +         if (vs_index >= 0) {
> +            llvmpipe->layer_slot = (int)vinfo->num_attribs;
> +         }
> +         else {
> +            llvmpipe->fake_layer_slot = (int)vinfo->num_attribs;
> +         }
> +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
>         } else {
>            /*
>             * Emit the requested fs attribute for all but position.
> @@ -101,6 +134,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
>            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
>         }
>      }
> +
>      /* Figure out if we need bcolor as well.
>       */
>      for (i = 0; i < 2; i++) {
> @@ -113,37 +147,36 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
>         }
>      }
>
> -
>      /* Figure out if we need pointsize as well.
>       */
>      vs_index = draw_find_shader_output(llvmpipe->draw,
>                                         TGSI_SEMANTIC_PSIZE, 0);
>
>      if (vs_index >= 0) {
> -      llvmpipe->psize_slot = vinfo->num_attribs;
> +      llvmpipe->psize_slot = (int)vinfo->num_attribs;
>         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
>      }
>
> -   /* Figure out if we need viewport index */
> -   vs_index = draw_find_shader_output(llvmpipe->draw,
> -                                      TGSI_SEMANTIC_VIEWPORT_INDEX,
> -                                      0);
> -   if (vs_index >= 0) {
> -      llvmpipe->viewport_index_slot = vinfo->num_attribs;
> -      draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> -   } else {
> -      llvmpipe->viewport_index_slot = 0;
> +   /* Figure out if we need viewport index (if it wasn't already in fs input) */
> +   if (llvmpipe->viewport_index_slot < 0) {
> +      vs_index = draw_find_shader_output(llvmpipe->draw,
> +                                         TGSI_SEMANTIC_VIEWPORT_INDEX,
> +                                         0);
> +      if (vs_index >= 0) {
> +         llvmpipe->viewport_index_slot = (int)vinfo->num_attribs;
> +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> +      }
>      }
>
> -   /* Figure out if we need layer */
> -   vs_index = draw_find_shader_output(llvmpipe->draw,
> -                                      TGSI_SEMANTIC_LAYER,
> -                                      0);
> -   if (vs_index >= 0) {
> -      llvmpipe->layer_slot = vinfo->num_attribs;
> -      draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> -   } else {
> -      llvmpipe->layer_slot = 0;
> +   /* Figure out if we need layer (if it wasn't already in fs input) */
> +   if (llvmpipe->layer_slot < 0) {
> +      vs_index = draw_find_shader_output(llvmpipe->draw,
> +                                         TGSI_SEMANTIC_LAYER,
> +                                         0);
> +      if (vs_index >= 0) {
> +         llvmpipe->layer_slot = (int)vinfo->num_attribs;
> +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
> +      }
>      }
>
>      draw_compute_vertex_size(vinfo);
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> index 64215be..8172a56 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
> @@ -602,6 +602,13 @@ emit_tri_coef( struct gallivm_state *gallivm,
>             */
>            break;
>
> +      case LP_INTERP_ZERO:
> +         /*
> +          * The information we get from the output is bogus, replace it
> +          * with zero.
> +          */
> +         emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
> +         break;
>         case LP_INTERP_FACING:
>            emit_facing_coef(gallivm, args, slot+1);
>            break;
> @@ -848,14 +855,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
>      key->size = Offset(struct lp_setup_variant_key,
>                         inputs[key->num_inputs]);
>
> -   key->color_slot  = lp->color_slot [0];
> +   key->color_slot = lp->color_slot[0];
>      key->bcolor_slot = lp->bcolor_slot[0];
> -   key->spec_slot   = lp->color_slot [1];
> -   key->bspec_slot  = lp->bcolor_slot[1];
> -   assert(key->color_slot  == lp->color_slot [0]);
> -   assert(key->bcolor_slot == lp->bcolor_slot[0]);
> -   assert(key->spec_slot   == lp->color_slot [1]);
> -   assert(key->bspec_slot  == lp->bcolor_slot[1]);
> +   key->spec_slot = lp->color_slot[1];
> +   key->bspec_slot = lp->bcolor_slot[1];
>
>      /*
>       * If depth is floating point, depth bias is calculated with respect
> @@ -876,7 +879,13 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
>      key->pad = 0;
>      memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
>      for (i = 0; i < key->num_inputs; i++) {
> -      if (key->inputs[i].interp == LP_INTERP_COLOR) {
> +      if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
> +         if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
> +             key->inputs[i].src_index == lp->fake_layer_slot) {
> +            key->inputs[i].interp = LP_INTERP_ZERO;
> +         }
> +      }
> +      else if (key->inputs[i].interp == LP_INTERP_COLOR) {
>            if (lp->rasterizer->flatshade)
>               key->inputs[i].interp = LP_INTERP_CONSTANT;
>            else
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
> index 82af835..9ad2444 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_setup.h
> +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
> @@ -18,7 +18,6 @@ struct lp_setup_variant_key {
>      unsigned size:16;
>      unsigned num_inputs:8;
>      int color_slot:8;
> -
>      int bcolor_slot:8;
>      int spec_slot:8;
>      int bspec_slot:8;
>

Looks great. Thanks for the update.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>