[Mesa-dev] [PATCH 8/8] draw: fix draw pipeline stages wrt changing vertex sizes

Jose Fonseca jfonseca at vmware.com
Tue Dec 22 07:32:49 PST 2015


On 22/12/15 03:00, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> The vertex size can change in fetch_pipeline_prepare, if drivers use
> the draw_prepare_shader_outputs hook (similar to what the llvm path already
> does). Albeit this is hugely confusing and very error prone.
> Also sort-of prepare the wide point stage for using draw_prepare_shader_outputs
> as well, but don't hook it up, it might not make sense (will still crash if
> drivers use both draw_prepare_shader_outputs and widepoint stage).
> And add some comments explaining how this works (or rather, doesn't really).
> This needs some overhaul I'm just not quite sure yet how it should look...
> ---
>   src/gallium/auxiliary/draw/draw_context.c          | 14 ++++
>   src/gallium/auxiliary/draw/draw_pipe.h             |  2 +
>   src/gallium/auxiliary/draw/draw_pipe_unfilled.c    | 10 +--
>   src/gallium/auxiliary/draw/draw_pipe_wide_point.c  | 81 ++++++++++++----------
>   .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  | 47 +++++++------
>   5 files changed, 92 insertions(+), 62 deletions(-)
>
> diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
> index 16a261c..de5fd59 100644
> --- a/src/gallium/auxiliary/draw/draw_context.c
> +++ b/src/gallium/auxiliary/draw/draw_context.c
> @@ -603,13 +603,27 @@ draw_get_shader_info(const struct draw_context *draw)
>    * the face-side information, but using this method we can
>    * inject another shader output which passes the original
>    * face side information to the backend.

Let's add a new line here.

> + * FIXME: this mechanism is quite brittle. And because it is
> + * primitive-independent we're allocating outputs which we might never
> + * need (which is why it's not very feasible for wide-point here).
> + * A big problem is also that these prepare hooks are called both in
> + * advance (i.e. from here for _some_ drivers) and later (when pipeline
> + * is run first). This is incredibly error prone... unfilled/prim_assembler
> + * must be (and can only be) called in advance (because otherwise there
> + * wouldn't be enough memory allocated). The reason late alloc works for
> + * aapoint, aaline and widepoint is that these use new verts which are
> + * large enough (albeit valgrind still says invalid read in dup_vert,
> + * apparently reading past the old vert size as it'll copy in the new size).
>    */
>   void
>   draw_prepare_shader_outputs(struct draw_context *draw)
>   {
>      draw_remove_extra_vertex_attribs(draw);
> +   /* order matters (pipeline order)! */
>      draw_prim_assembler_prepare_outputs(draw->ia);
>      draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
> +/*   if (draw->pipeline.wide_point)
> +      draw_wide_point_prepare_outputs(draw, draw->pipeline.wide_point);*/
>      if (draw->pipeline.aapoint)
>         draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
>      if (draw->pipeline.aaline)
> diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
> index e69dcbd..e7e7eef 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe.h
> +++ b/src/gallium/auxiliary/draw/draw_pipe.h
> @@ -101,6 +101,8 @@ void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *hea
>   void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header);
>   void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header);
>
> +void draw_wide_point_prepare_outputs(struct draw_context *context,
> +                                     struct draw_stage *stage);
>   void draw_aapoint_prepare_outputs(struct draw_context *context,
>                                     struct draw_stage *stage);
>   void draw_aaline_prepare_outputs(struct draw_context *context,
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
> index 2517d61..0c6b3ba 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
> @@ -189,7 +189,7 @@ static void unfilled_tri( struct draw_stage *stage,
>   }
>
>
> -static void unfilled_first_tri( struct draw_stage *stage,
> +static void unfilled_first_tri( struct draw_stage *stage,
>   				struct prim_header *header )
>   {
>      struct unfilled_stage *unfilled = unfilled_stage(stage);
> @@ -204,12 +204,14 @@ static void unfilled_first_tri( struct draw_stage *stage,
>
>
>
> -static void unfilled_flush( struct draw_stage *stage,
> -			    unsigned flags )
> +static void unfilled_flush(struct draw_stage *stage,
> +                           unsigned flags)
>   {
> -   stage->next->flush( stage->next, flags );
> +   stage->next->flush(stage->next, flags);
>
>      stage->tri = unfilled_first_tri;
> +
> +   draw_remove_extra_vertex_attribs(stage->draw);
>   }
>
>
> diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
> index adb6120..3271532 100644
> --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
> +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
> @@ -212,6 +212,8 @@ widepoint_first_point(struct draw_stage *stage,
>         wide->ybias = -0.125;
>      }
>
> +   draw_wide_point_prepare_outputs(draw, draw->pipeline.wide_point);
> +
>      /* Disable triangle culling, stippling, unfilled mode etc. */
>      r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
>      draw->suspend_flushing = TRUE;
> @@ -227,42 +229,6 @@ widepoint_first_point(struct draw_stage *stage,
>         stage->point = draw_pipe_passthrough_point;
>      }
>
> -   draw_remove_extra_vertex_attribs(draw);
> -
> -   if (rast->point_quad_rasterization) {
> -      const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
> -      uint i;
> -
> -      assert(fs);
> -
> -      wide->num_texcoord_gen = 0;
> -
> -      /* Loop over fragment shader inputs looking for the PCOORD input or inputs
> -       * for which bit 'k' in sprite_coord_enable is set.
> -       */
> -      for (i = 0; i < fs->info.num_inputs; i++) {
> -         int slot;
> -         const unsigned sn = fs->info.input_semantic_name[i];
> -         const unsigned si = fs->info.input_semantic_index[i];
> -
> -         if (sn == wide->sprite_coord_semantic) {
> -            /* Note that sprite_coord_enable is a bitfield of 32 bits. */
> -            if (si >= 32 || !(rast->sprite_coord_enable & (1 << si)))
> -               continue;
> -         } else if (sn != TGSI_SEMANTIC_PCOORD) {
> -            continue;
> -         }
> -
> -         /* OK, this generic attribute needs to be replaced with a
> -          * sprite coord (see above).
> -          */
> -         slot = draw_alloc_extra_vertex_attrib(draw, sn, si);
> -
> -         /* add this slot to the texcoord-gen list */
> -         wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
> -      }
> -   }
> -
>      wide->psize_slot = -1;
>      if (rast->point_size_per_vertex) {
>         /* find PSIZ vertex output */
> @@ -312,6 +278,49 @@ static void widepoint_destroy( struct draw_stage *stage )
>   }
>
>
> +void
> +draw_wide_point_prepare_outputs(struct draw_context *draw,
> +                                struct draw_stage *stage)
> +{
> +   struct widepoint_stage *wide = widepoint_stage(stage);
> +   const struct pipe_rasterizer_state *rast = draw->rasterizer;
> +
> +   if (rast->point_quad_rasterization) {
> +      const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
> +      uint i;
> +
> +      assert(fs);
> +
> +      wide->num_texcoord_gen = 0;
> +
> +      /* Loop over fragment shader inputs looking for the PCOORD input or inputs
> +       * for which bit 'k' in sprite_coord_enable is set.
> +       */
> +      for (i = 0; i < fs->info.num_inputs; i++) {
> +         int slot;
> +         const unsigned sn = fs->info.input_semantic_name[i];
> +         const unsigned si = fs->info.input_semantic_index[i];
> +
> +         if (sn == wide->sprite_coord_semantic) {
> +            /* Note that sprite_coord_enable is a bitfield of 32 bits. */
> +            if (si >= 32 || !(rast->sprite_coord_enable & (1 << si)))
> +               continue;
> +         } else if (sn != TGSI_SEMANTIC_PCOORD) {
> +            continue;
> +         }
> +
> +         /* OK, this generic attribute needs to be replaced with a
> +          * sprite coord (see above).
> +          */
> +         slot = draw_alloc_extra_vertex_attrib(draw, sn, si);
> +
> +         /* add this slot to the texcoord-gen list */
> +         wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
> +      }
> +   }
> +}
> +
> +
>   struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
>   {
>      struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
> diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> index aa20b91..8be9046 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> @@ -80,14 +80,9 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
>      unsigned instance_id_index = ~0;
>      const unsigned gs_out_prim = (gs ? gs->output_primitive :
>                                    u_assembled_prim(prim));
> -   unsigned nr_vs_outputs = draw_total_vs_outputs(draw);
> -   unsigned nr = MAX2(vs->info.num_inputs, nr_vs_outputs);
>      unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
>                            gs_out_prim == PIPE_PRIM_POINTS;
> -
> -   if (gs) {
> -      nr = MAX2(nr, gs->info.num_outputs + 1);
> -   }
> +   unsigned nr;
>
>      /* Scan for instanceID system value.
>       */
> @@ -101,16 +96,6 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
>      fpme->input_prim = prim;
>      fpme->opt = opt;
>
> -   /* Always leave room for the vertex header whether we need it or
> -    * not.  It's hard to get rid of it in particular because of the
> -    * viewport code in draw_pt_post_vs.c.
> -    */
> -   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
> -
> -   draw_pt_fetch_prepare( fpme->fetch,
> -                          vs->info.num_inputs,
> -                          fpme->vertex_size,
> -                          instance_id_index );
>      draw_pt_post_vs_prepare( fpme->post_vs,
>                               draw->clip_xy,
>                               draw->clip_z,
> @@ -124,9 +109,9 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
>      draw_pt_so_emit_prepare( fpme->so_emit, FALSE );
>
>      if (!(opt & PT_PIPELINE)) {
> -      draw_pt_emit_prepare( fpme->emit,
> -			    gs_out_prim,
> -                            max_vertices );
> +      draw_pt_emit_prepare(fpme->emit,
> +                           gs_out_prim,
> +                           max_vertices);
>
>         *max_vertices = MAX2( *max_vertices, 4096 );
>      }
> @@ -135,12 +120,30 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
>         *max_vertices = 4096;
>      }
>
> +   /* Get the number of float[4] attributes per vertex.
> +    * Note: this must be done after draw_pt_emit_prepare() since that
> +    * can effect the vertex size.
> +    */
> +   nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
> +   /* XXX llvm path doesn't have that */


It might be worth elaborate here: is this a mere remark, or something 
that needs fixing?

> +   if (gs) {
> +      nr = MAX2(nr, gs->info.num_outputs + 1);
> +   }
> +
> +   /* Always leave room for the vertex header whether we need it or
> +    * not.  It's hard to get rid of it in particular because of the
> +    * viewport code in draw_pt_post_vs.c.
> +    */
> +   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
> +
> +   draw_pt_fetch_prepare(fpme->fetch,
> +                         vs->info.num_inputs,
> +                         fpme->vertex_size,
> +                         instance_id_index);
> +
>      /* No need to prepare the shader.
>       */
>      vs->prepare(vs, draw);
> -
> -   /* Make sure that the vertex size didn't change at any point above */
> -   assert(nr_vs_outputs == draw_total_vs_outputs(draw));
>   }
>
>
>

Otherwise it looks alright, AFAICT, but I confess this is all a bit 
beyond me.

svga driver also uses this, so it might be worth waiting for Brian to 
take a look.


Jose



More information about the mesa-dev mailing list