[Mesa-dev] [PATCH] i965 gen6: Fix transform feedback of triangle strips.

Fri Dec 23 13:02:56 PST 2011

On 12/23/2011 09:37 AM, Paul Berry wrote:
> When rendering triangle strips, vertices come down the pipeline in the
> order specified, even though this causes alternate triangles to have
> reversed winding order.  For example, if the vertices are ABCDE, then
> the GS is invoked on triangles ABC, BCD, and CDE, even though this
> means that triangle BCD is in the reverse of the normal winding order.
> The hardware automatically flags the triangles with reversed winding
> order as _3DPRIM_TRISTRIP_REVERSE, so that face culling and two-sided
> coloring can be adjusted to account for the reversed order.
> 
> In order to ensure that winding order is correct when streaming
> vertices out to a transform feedback buffer, we need to alter the
> ordering of BCD to BDC when the first provoking vertex convention is
> in use, and to CBD when the last provoking vertex convention is in
> use.
> 
> To do this, we precompute an array of indices indicating where each
> vertex will be placed in the transform feedback buffer; normally this
> is SVBI[0] + (0, 1, 2), indicating that vertex order should be
> preserved.  When the primitive type is _3DPRIM_TRISTRIP_REVERSE, we
> change this order to either SVBI[0] + (0, 2, 1) or SVBI[0] + (1, 0,
> 2), depending on the provoking vertex convention.
> 
> Fixes piglit tests "EXT_transform_feedback/tessellation
> triangle_strip" on Gen6.

It's really too bad we can't use the 3DSTATE_GS "Reorder Enable" bit.
It's supposed to take care of this, but it looks like it only does PV
first mode on Sandybridge.  Turning it on makes triangle_strip wireframe
fail, and nothing else pass.

I had a bit of trouble following this patch, but it looks okay to me.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

> ---
>  src/mesa/drivers/dri/i965/brw_gs.h      |    6 ++
>  src/mesa/drivers/dri/i965/brw_gs_emit.c |   84 ++++++++++++++++++++++++-------
>  2 files changed, 72 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
> index 7bf2248..2ab8b72 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.h
> +++ b/src/mesa/drivers/dri/i965/brw_gs.h
> @@ -83,6 +83,12 @@ struct brw_gs_compile {
>        struct brw_reg vertex[MAX_GS_VERTS];
>        struct brw_reg header;
>        struct brw_reg temp;
> +
> +      /**
> +       * Register holding destination indices for streamed buffer writes.
> +       * Only used for SOL programs.
> +       */
> +      struct brw_reg destination_indices;
>     } reg;
>  
>     /* Number of registers used to store vertex data */
> diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> index 1f96a16..607ee75 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> @@ -45,13 +45,16 @@
>  /**
>   * Allocate registers for GS.
>   *
> - * If svbi_payload_enable is true, then the thread will be spawned with the
> - * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to hold the
> - * streamed vertex buffer indices.
> + * If sol_program is true, then:
> + *
> + * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
> + *   1 needs to be set aside to hold the streamed vertex buffer indices.
> + *
> + * - The thread will need to use the destination_indices register.
>   */
>  static void brw_gs_alloc_regs( struct brw_gs_compile *c,
>  			       GLuint nr_verts,
> -                               bool svbi_payload_enable )
> +                               bool sol_program )
>  {
>     GLuint i = 0,j;
>  
> @@ -60,7 +63,7 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c,
>     c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
>  
>     /* Streamed vertex buffer indices */
> -   if (svbi_payload_enable)
> +   if (sol_program)
>        c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
>  
>     /* Payload vertices plus space for more generated vertices:
> @@ -73,6 +76,11 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c,
>     c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
>     c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
>  
> +   if (sol_program) {
> +      c->reg.destination_indices =
> +         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> +   }
> +
>     c->prog_data.urb_read_length = c->nr_regs; 
>     c->prog_data.total_grf = i;
>  }
> @@ -351,16 +359,17 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
>  
>     if (key->num_transform_feedback_bindings > 0) {
>        unsigned vertex, binding;
> +      struct brw_reg destination_indices_uw =
> +         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
> +
>        /* Note: since we use the binding table to keep track of buffer offsets
>         * and stride, the GS doesn't need to keep track of a separate pointer
>         * into each buffer; it uses a single pointer which increments by 1 for
>         * each vertex.  So we use SVBI0 for this pointer, regardless of whether
>         * transform feedback is in interleaved or separate attribs mode.
> +       *
> +       * Make sure that the buffers have enough room for all the vertices.
>         */
> -      brw_MOV(p, get_element_ud(c->reg.header, 5),
> -              get_element_ud(c->reg.SVBI, 0));
> -
> -      /* Make sure that the buffers have enough room for all the vertices. */
>        brw_ADD(p, get_element_ud(c->reg.temp, 0),
>  	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
>        brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
> @@ -368,10 +377,58 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
>  	         get_element_ud(c->reg.SVBI, 4));
>        brw_IF(p, BRW_EXECUTE_1);
>  
> +      /* Compute the destination indices to write to.  Usually we use SVBI[0]
> +       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
> +       * vertices come down the pipeline in reversed winding order, so we need
> +       * to flip the order when writing to the transform feedback buffer.  To
> +       * ensure that flatshading accuracy is preserved, we need to write them
> +       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
> +       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
> +       * the last provoking vertex convention.
> +       *
> +       * Note: since brw_imm_v can only be used in instructions in
> +       * packed-word execution mode, and SVBI is a double-word, we need to
> +       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
> +       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
> +       * using a separate instruction.  Also, since the immediate constant is
> +       * expressed as packed words, and we need to load double-words into
> +       * destination_indices, we need to intersperse zeros to fill the upper
> +       * halves of each double-word.
> +       */
> +      brw_MOV(p, destination_indices_uw,
> +              brw_imm_v(0x00020100)); /* (0, 1, 2) */
> +      if (num_verts == 3) {
> +         /* Get primitive type into temp register. */
> +         brw_AND(p, get_element_ud(c->reg.temp, 0),
> +                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
> +
> +         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
> +          * an 8-wide comparison so that the conditional MOV that follows
> +          * moves all 8 words correctly.
> +          */
> +         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
> +                 get_element_ud(c->reg.temp, 0),
> +                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
> +
> +         /* If so, then overwrite destination_indices_uw with the appropriate
> +          * reordering.
> +          */
> +         brw_MOV(p, destination_indices_uw,
> +                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
> +                                         : 0x00020001)); /* (1, 0, 2) */
> +         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
> +      }
> +      brw_ADD(p, c->reg.destination_indices,
> +              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
> +
>        /* For each vertex, generate code to output each varying using the
>         * appropriate binding table entry.
>         */
>        for (vertex = 0; vertex < num_verts; ++vertex) {
> +         /* Set up the correct destination index for this vertex */
> +         brw_MOV(p, get_element_ud(c->reg.header, 5),
> +                 get_element_ud(c->reg.destination_indices, vertex));
> +
>           for (binding = 0; binding < key->num_transform_feedback_bindings;
>                ++binding) {
>              unsigned char vert_result =
> @@ -398,15 +455,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
>                            SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
>                            final_write); /* send_commit_msg */
>           }
> -
> -         /* If there are more vertices to output, increment the pointer so
> -          * that we will start outputting to the next location in the
> -          * transform feedback buffers.
> -          */
> -         if (vertex != num_verts - 1) {
> -            brw_ADD(p, get_element_ud(c->reg.header, 5),
> -                    get_element_ud(c->reg.header, 5), brw_imm_ud(1));
> -         }
>        }
>        brw_ENDIF(p);
>