[Mesa-dev] [PATCH 8/8] draw: fix draw pipeline stages wrt changing vertex sizes

Mon Dec 21 19:00:27 PST 2015

From: Roland Scheidegger <sroland at vmware.com>

The vertex size can change in fetch_pipeline_prepare, if drivers use
the draw_prepare_shader_outputs hook (similar to what the llvm path already
does). Albeit this is hugely confusing and very error prone.
Also sort-of prepare the wide point stage for using draw_prepare_shader_outputs
as well, but don't hook it up, it might not make sense (will still crash if
drivers use both draw_prepare_shader_outputs and widepoint stage).
And add some comments explaining how this works (or rather, doesn't really).
This needs some overhaul I'm just not quite sure yet how it should look...
---
 src/gallium/auxiliary/draw/draw_context.c          | 14 ++++
 src/gallium/auxiliary/draw/draw_pipe.h             |  2 +
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c    | 10 +--
 src/gallium/auxiliary/draw/draw_pipe_wide_point.c  | 81 ++++++++++++----------
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  | 47 +++++++------
 5 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 16a261c..de5fd59 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -603,13 +603,27 @@ draw_get_shader_info(const struct draw_context *draw)
  * the face-side information, but using this method we can
  * inject another shader output which passes the original
  * face side information to the backend.
+ * FIXME: this mechanism is quite brittle. And because it is
+ * primitive-independent we're allocating outputs which we might never
+ * need (which is why it's not very feasible for wide-point here).
+ * A big problem is also that these prepare hooks are called both in
+ * advance (i.e. from here for _some_ drivers) and later (when pipeline
+ * is run first). This is incredibly error prone... unfilled/prim_assembler
+ * must be (and can only be) called in advance (because otherwise there
+ * wouldn't be enough memory allocated). The reason late alloc works for
+ * aapoint, aaline and widepoint is that these use new verts which are
+ * large enough (albeit valgrind still says invalid read in dup_vert,
+ * apparently reading past the old vert size as it'll copy in the new size).
  */
 void
 draw_prepare_shader_outputs(struct draw_context *draw)
 {
    draw_remove_extra_vertex_attribs(draw);
+   /* order matters (pipeline order)! */
    draw_prim_assembler_prepare_outputs(draw->ia);
    draw_unfilled_prepare_outputs(draw, draw->pipeline.unfilled);
+/*   if (draw->pipeline.wide_point)
+      draw_wide_point_prepare_outputs(draw, draw->pipeline.wide_point);*/
    if (draw->pipeline.aapoint)
       draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
    if (draw->pipeline.aaline)
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
index e69dcbd..e7e7eef 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -101,6 +101,8 @@ void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *hea
 void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header);
 void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header);
 
+void draw_wide_point_prepare_outputs(struct draw_context *context,
+                                     struct draw_stage *stage);
 void draw_aapoint_prepare_outputs(struct draw_context *context,
                                   struct draw_stage *stage);
 void draw_aaline_prepare_outputs(struct draw_context *context,
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 2517d61..0c6b3ba 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -189,7 +189,7 @@ static void unfilled_tri( struct draw_stage *stage,
 }
 
 
-static void unfilled_first_tri( struct draw_stage *stage, 
+static void unfilled_first_tri( struct draw_stage *stage,
 				struct prim_header *header )
 {
    struct unfilled_stage *unfilled = unfilled_stage(stage);
@@ -204,12 +204,14 @@ static void unfilled_first_tri( struct draw_stage *stage,
 
 
 
-static void unfilled_flush( struct draw_stage *stage,
-			    unsigned flags )
+static void unfilled_flush(struct draw_stage *stage,
+                           unsigned flags)
 {
-   stage->next->flush( stage->next, flags );
+   stage->next->flush(stage->next, flags);
 
    stage->tri = unfilled_first_tri;
+
+   draw_remove_extra_vertex_attribs(stage->draw);
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index adb6120..3271532 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -212,6 +212,8 @@ widepoint_first_point(struct draw_stage *stage,
       wide->ybias = -0.125;
    }
 
+   draw_wide_point_prepare_outputs(draw, draw->pipeline.wide_point);
+
    /* Disable triangle culling, stippling, unfilled mode etc. */
    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
    draw->suspend_flushing = TRUE;
@@ -227,42 +229,6 @@ widepoint_first_point(struct draw_stage *stage,
       stage->point = draw_pipe_passthrough_point;
    }
 
-   draw_remove_extra_vertex_attribs(draw);
-
-   if (rast->point_quad_rasterization) {
-      const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
-      uint i;
-
-      assert(fs);
-
-      wide->num_texcoord_gen = 0;
-
-      /* Loop over fragment shader inputs looking for the PCOORD input or inputs
-       * for which bit 'k' in sprite_coord_enable is set.
-       */
-      for (i = 0; i < fs->info.num_inputs; i++) {
-         int slot;
-         const unsigned sn = fs->info.input_semantic_name[i];
-         const unsigned si = fs->info.input_semantic_index[i];
-
-         if (sn == wide->sprite_coord_semantic) {
-            /* Note that sprite_coord_enable is a bitfield of 32 bits. */
-            if (si >= 32 || !(rast->sprite_coord_enable & (1 << si)))
-               continue;
-         } else if (sn != TGSI_SEMANTIC_PCOORD) {
-            continue;
-         }
-
-         /* OK, this generic attribute needs to be replaced with a
-          * sprite coord (see above).
-          */
-         slot = draw_alloc_extra_vertex_attrib(draw, sn, si);
-
-         /* add this slot to the texcoord-gen list */
-         wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
-      }
-   }
-
    wide->psize_slot = -1;
    if (rast->point_size_per_vertex) {
       /* find PSIZ vertex output */
@@ -312,6 +278,49 @@ static void widepoint_destroy( struct draw_stage *stage )
 }
 
 
+void
+draw_wide_point_prepare_outputs(struct draw_context *draw,
+                                struct draw_stage *stage)
+{
+   struct widepoint_stage *wide = widepoint_stage(stage);
+   const struct pipe_rasterizer_state *rast = draw->rasterizer;
+
+   if (rast->point_quad_rasterization) {
+      const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
+      uint i;
+
+      assert(fs);
+
+      wide->num_texcoord_gen = 0;
+
+      /* Loop over fragment shader inputs looking for the PCOORD input or inputs
+       * for which bit 'k' in sprite_coord_enable is set.
+       */
+      for (i = 0; i < fs->info.num_inputs; i++) {
+         int slot;
+         const unsigned sn = fs->info.input_semantic_name[i];
+         const unsigned si = fs->info.input_semantic_index[i];
+
+         if (sn == wide->sprite_coord_semantic) {
+            /* Note that sprite_coord_enable is a bitfield of 32 bits. */
+            if (si >= 32 || !(rast->sprite_coord_enable & (1 << si)))
+               continue;
+         } else if (sn != TGSI_SEMANTIC_PCOORD) {
+            continue;
+         }
+
+         /* OK, this generic attribute needs to be replaced with a
+          * sprite coord (see above).
+          */
+         slot = draw_alloc_extra_vertex_attrib(draw, sn, si);
+
+         /* add this slot to the texcoord-gen list */
+         wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
+      }
+   }
+}
+
+
 struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
 {
    struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index aa20b91..8be9046 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -80,14 +80,9 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
    unsigned instance_id_index = ~0;
    const unsigned gs_out_prim = (gs ? gs->output_primitive :
                                  u_assembled_prim(prim));
-   unsigned nr_vs_outputs = draw_total_vs_outputs(draw);
-   unsigned nr = MAX2(vs->info.num_inputs, nr_vs_outputs);
    unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
                          gs_out_prim == PIPE_PRIM_POINTS;
-
-   if (gs) {
-      nr = MAX2(nr, gs->info.num_outputs + 1);
-   }
+   unsigned nr;
 
    /* Scan for instanceID system value.
     */
@@ -101,16 +96,6 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
    fpme->input_prim = prim;
    fpme->opt = opt;
 
-   /* Always leave room for the vertex header whether we need it or
-    * not.  It's hard to get rid of it in particular because of the
-    * viewport code in draw_pt_post_vs.c.
-    */
-   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-
-   draw_pt_fetch_prepare( fpme->fetch,
-                          vs->info.num_inputs,
-                          fpme->vertex_size,
-                          instance_id_index );
    draw_pt_post_vs_prepare( fpme->post_vs,
                             draw->clip_xy,
                             draw->clip_z,
@@ -124,9 +109,9 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
    draw_pt_so_emit_prepare( fpme->so_emit, FALSE );
 
    if (!(opt & PT_PIPELINE)) {
-      draw_pt_emit_prepare( fpme->emit,
-			    gs_out_prim,
-                            max_vertices );
+      draw_pt_emit_prepare(fpme->emit,
+                           gs_out_prim,
+                           max_vertices);
 
       *max_vertices = MAX2( *max_vertices, 4096 );
    }
@@ -135,12 +120,30 @@ fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
       *max_vertices = 4096;
    }
 
+   /* Get the number of float[4] attributes per vertex.
+    * Note: this must be done after draw_pt_emit_prepare() since that
+    * can effect the vertex size.
+    */
+   nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
+   /* XXX llvm path doesn't have that */
+   if (gs) {
+      nr = MAX2(nr, gs->info.num_outputs + 1);
+   }
+
+   /* Always leave room for the vertex header whether we need it or
+    * not.  It's hard to get rid of it in particular because of the
+    * viewport code in draw_pt_post_vs.c.
+    */
+   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+   draw_pt_fetch_prepare(fpme->fetch,
+                         vs->info.num_inputs,
+                         fpme->vertex_size,
+                         instance_id_index);
+
    /* No need to prepare the shader.
     */
    vs->prepare(vs, draw);
-
-   /* Make sure that the vertex size didn't change at any point above */
-   assert(nr_vs_outputs == draw_total_vs_outputs(draw));
 }
 
 
-- 
2.1.4