[Mesa-dev] [PATCH] i965 gen6: Fix transform feedback of triangle strips.

Paul Berry stereotype441 at gmail.com
Fri Dec 23 13:28:47 PST 2011


On 23 December 2011 13:02, Kenneth Graunke <kenneth at whitecape.org> wrote:

> On 12/23/2011 09:37 AM, Paul Berry wrote:
> > When rendering triangle strips, vertices come down the pipeline in the
> > order specified, even though this causes alternate triangles to have
> > reversed winding order.  For example, if the vertices are ABCDE, then
> > the GS is invoked on triangles ABC, BCD, and CDE, even though this
> > means that triangle BCD is in the reverse of the normal winding order.
> > The hardware automatically flags the triangles with reversed winding
> > order as _3DPRIM_TRISTRIP_REVERSE, so that face culling and two-sided
> > coloring can be adjusted to account for the reversed order.
> >
> > In order to ensure that winding order is correct when streaming
> > vertices out to a transform feedback buffer, we need to alter the
> > ordering of BCD to BDC when the first provoking vertex convention is
> > in use, and to CBD when the last provoking vertex convention is in
> > use.
> >
> > To do this, we precompute an array of indices indicating where each
> > vertex will be placed in the transform feedback buffer; normally this
> > is SVBI[0] + (0, 1, 2), indicating that vertex order should be
> > preserved.  When the primitive type is _3DPRIM_TRISTRIP_REVERSE, we
> > change this order to either SVBI[0] + (0, 2, 1) or SVBI[0] + (1, 0,
> > 2), depending on the provoking vertex convention.
> >
> > Fixes piglit tests "EXT_transform_feedback/tessellation
> > triangle_strip" on Gen6.
>
> It's really too bad we can't use the 3DSTATE_GS "Reorder Enable" bit.
> It's supposed to take care of this, but it looks like it only does PV
> first mode on Sandybridge.  Turning it on makes triangle_strip wireframe
> fail, and nothing else pass.
>

Yeah, I concur.  That bit is *so* close to what we need, but not quite
close enough.


>
> I had a bit of trouble following this patch, but it looks okay to me.
>

Ok.  I won't push it until this evening, so let me know if you can think of
a way to improve the readability (I'm at a loss for how to improve it).


>
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
>
> > ---
> >  src/mesa/drivers/dri/i965/brw_gs.h      |    6 ++
> >  src/mesa/drivers/dri/i965/brw_gs_emit.c |   84
> ++++++++++++++++++++++++-------
> >  2 files changed, 72 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_gs.h
> b/src/mesa/drivers/dri/i965/brw_gs.h
> > index 7bf2248..2ab8b72 100644
> > --- a/src/mesa/drivers/dri/i965/brw_gs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_gs.h
> > @@ -83,6 +83,12 @@ struct brw_gs_compile {
> >        struct brw_reg vertex[MAX_GS_VERTS];
> >        struct brw_reg header;
> >        struct brw_reg temp;
> > +
> > +      /**
> > +       * Register holding destination indices for streamed buffer
> writes.
> > +       * Only used for SOL programs.
> > +       */
> > +      struct brw_reg destination_indices;
> >     } reg;
> >
> >     /* Number of registers used to store vertex data */
> > diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c
> b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > index 1f96a16..607ee75 100644
> > --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > @@ -45,13 +45,16 @@
> >  /**
> >   * Allocate registers for GS.
> >   *
> > - * If svbi_payload_enable is true, then the thread will be spawned with
> the
> > - * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to
> hold the
> > - * streamed vertex buffer indices.
> > + * If sol_program is true, then:
> > + *
> > + * - The thread will be spawned with the "SVBI Payload Enable" bit set,
> so GRF
> > + *   1 needs to be set aside to hold the streamed vertex buffer indices.
> > + *
> > + * - The thread will need to use the destination_indices register.
> >   */
> >  static void brw_gs_alloc_regs( struct brw_gs_compile *c,
> >                              GLuint nr_verts,
> > -                               bool svbi_payload_enable )
> > +                               bool sol_program )
> >  {
> >     GLuint i = 0,j;
> >
> > @@ -60,7 +63,7 @@ static void brw_gs_alloc_regs( struct brw_gs_compile
> *c,
> >     c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
> >
> >     /* Streamed vertex buffer indices */
> > -   if (svbi_payload_enable)
> > +   if (sol_program)
> >        c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> >
> >     /* Payload vertices plus space for more generated vertices:
> > @@ -73,6 +76,11 @@ static void brw_gs_alloc_regs( struct brw_gs_compile
> *c,
> >     c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> >     c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> >
> > +   if (sol_program) {
> > +      c->reg.destination_indices =
> > +         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> > +   }
> > +
> >     c->prog_data.urb_read_length = c->nr_regs;
> >     c->prog_data.total_grf = i;
> >  }
> > @@ -351,16 +359,17 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> >
> >     if (key->num_transform_feedback_bindings > 0) {
> >        unsigned vertex, binding;
> > +      struct brw_reg destination_indices_uw =
> > +         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
> > +
> >        /* Note: since we use the binding table to keep track of buffer
> offsets
> >         * and stride, the GS doesn't need to keep track of a separate
> pointer
> >         * into each buffer; it uses a single pointer which increments by
> 1 for
> >         * each vertex.  So we use SVBI0 for this pointer, regardless of
> whether
> >         * transform feedback is in interleaved or separate attribs mode.
> > +       *
> > +       * Make sure that the buffers have enough room for all the
> vertices.
> >         */
> > -      brw_MOV(p, get_element_ud(c->reg.header, 5),
> > -              get_element_ud(c->reg.SVBI, 0));
> > -
> > -      /* Make sure that the buffers have enough room for all the
> vertices. */
> >        brw_ADD(p, get_element_ud(c->reg.temp, 0),
> >                get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
> >        brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
> > @@ -368,10 +377,58 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> >                get_element_ud(c->reg.SVBI, 4));
> >        brw_IF(p, BRW_EXECUTE_1);
> >
> > +      /* Compute the destination indices to write to.  Usually we use
> SVBI[0]
> > +       * + (0, 1, 2).  However, for odd-numbered triangles in
> tristrips, the
> > +       * vertices come down the pipeline in reversed winding order, so
> we need
> > +       * to flip the order when writing to the transform feedback
> buffer.  To
> > +       * ensure that flatshading accuracy is preserved, we need to
> write them
> > +       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
> > +       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're
> using
> > +       * the last provoking vertex convention.
> > +       *
> > +       * Note: since brw_imm_v can only be used in instructions in
> > +       * packed-word execution mode, and SVBI is a double-word, we need
> to
> > +       * first move the appropriate immediate constant ((0, 1, 2), (0,
> 2, 1),
> > +       * or (1, 0, 2)) to the destination_indices register, and then
> add SVBI
> > +       * using a separate instruction.  Also, since the immediate
> constant is
> > +       * expressed as packed words, and we need to load double-words
> into
> > +       * destination_indices, we need to intersperse zeros to fill the
> upper
> > +       * halves of each double-word.
> > +       */
> > +      brw_MOV(p, destination_indices_uw,
> > +              brw_imm_v(0x00020100)); /* (0, 1, 2) */
> > +      if (num_verts == 3) {
> > +         /* Get primitive type into temp register. */
> > +         brw_AND(p, get_element_ud(c->reg.temp, 0),
> > +                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
> > +
> > +         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do
> this as
> > +          * an 8-wide comparison so that the conditional MOV that
> follows
> > +          * moves all 8 words correctly.
> > +          */
> > +         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
> > +                 get_element_ud(c->reg.temp, 0),
> > +                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
> > +
> > +         /* If so, then overwrite destination_indices_uw with the
> appropriate
> > +          * reordering.
> > +          */
> > +         brw_MOV(p, destination_indices_uw,
> > +                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
> > +                                         : 0x00020001)); /* (1, 0, 2) */
> > +         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
> > +      }
> > +      brw_ADD(p, c->reg.destination_indices,
> > +              c->reg.destination_indices, get_element_ud(c->reg.SVBI,
> 0));
> > +
> >        /* For each vertex, generate code to output each varying using the
> >         * appropriate binding table entry.
> >         */
> >        for (vertex = 0; vertex < num_verts; ++vertex) {
> > +         /* Set up the correct destination index for this vertex */
> > +         brw_MOV(p, get_element_ud(c->reg.header, 5),
> > +                 get_element_ud(c->reg.destination_indices, vertex));
> > +
> >           for (binding = 0; binding <
> key->num_transform_feedback_bindings;
> >                ++binding) {
> >              unsigned char vert_result =
> > @@ -398,15 +455,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> >                            SURF_INDEX_SOL_BINDING(binding), /*
> binding_table_index */
> >                            final_write); /* send_commit_msg */
> >           }
> > -
> > -         /* If there are more vertices to output, increment the pointer
> so
> > -          * that we will start outputting to the next location in the
> > -          * transform feedback buffers.
> > -          */
> > -         if (vertex != num_verts - 1) {
> > -            brw_ADD(p, get_element_ud(c->reg.header, 5),
> > -                    get_element_ud(c->reg.header, 5), brw_imm_ud(1));
> > -         }
> >        }
> >        brw_ENDIF(p);
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20111223/843699da/attachment.htm>


More information about the mesa-dev mailing list