[Mesa-dev] [PATCH] i965 gen6: Fix transform feedback of triangle strips.
Paul Berry
stereotype441 at gmail.com
Fri Dec 23 13:28:47 PST 2011
On 23 December 2011 13:02, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On 12/23/2011 09:37 AM, Paul Berry wrote:
> > When rendering triangle strips, vertices come down the pipeline in the
> > order specified, even though this causes alternate triangles to have
> > reversed winding order. For example, if the vertices are ABCDE, then
> > the GS is invoked on triangles ABC, BCD, and CDE, even though this
> > means that triangle BCD is in the reverse of the normal winding order.
> > The hardware automatically flags the triangles with reversed winding
> > order as _3DPRIM_TRISTRIP_REVERSE, so that face culling and two-sided
> > coloring can be adjusted to account for the reversed order.
> >
> > In order to ensure that winding order is correct when streaming
> > vertices out to a transform feedback buffer, we need to alter the
> > ordering of BCD to BDC when the first provoking vertex convention is
> > in use, and to CBD when the last provoking vertex convention is in
> > use.
> >
> > To do this, we precompute an array of indices indicating where each
> > vertex will be placed in the transform feedback buffer; normally this
> > is SVBI[0] + (0, 1, 2), indicating that vertex order should be
> > preserved. When the primitive type is _3DPRIM_TRISTRIP_REVERSE, we
> > change this order to either SVBI[0] + (0, 2, 1) or SVBI[0] + (1, 0,
> > 2), depending on the provoking vertex convention.
> >
> > Fixes piglit tests "EXT_transform_feedback/tessellation
> > triangle_strip" on Gen6.
>
> It's really too bad we can't use the 3DSTATE_GS "Reorder Enable" bit.
> It's supposed to take care of this, but it looks like it only does PV
> first mode on Sandybridge. Turning it on makes triangle_strip wireframe
> fail, and nothing else pass.
>
Yeah, I concur. That bit is *so* close to what we need, but not quite
close enough.
>
> I had a bit of trouble following this patch, but it looks okay to me.
>
Ok. I won't push it until this evening, so let me know if you can think of
a way to improve the readability (I'm at a loss for how to improve it).
>
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
>
> > ---
> > src/mesa/drivers/dri/i965/brw_gs.h | 6 ++
> > src/mesa/drivers/dri/i965/brw_gs_emit.c | 84
> ++++++++++++++++++++++++-------
> > 2 files changed, 72 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_gs.h
> b/src/mesa/drivers/dri/i965/brw_gs.h
> > index 7bf2248..2ab8b72 100644
> > --- a/src/mesa/drivers/dri/i965/brw_gs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_gs.h
> > @@ -83,6 +83,12 @@ struct brw_gs_compile {
> > struct brw_reg vertex[MAX_GS_VERTS];
> > struct brw_reg header;
> > struct brw_reg temp;
> > +
> > + /**
> > + * Register holding destination indices for streamed buffer
> writes.
> > + * Only used for SOL programs.
> > + */
> > + struct brw_reg destination_indices;
> > } reg;
> >
> > /* Number of registers used to store vertex data */
> > diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c
> b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > index 1f96a16..607ee75 100644
> > --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
> > @@ -45,13 +45,16 @@
> > /**
> > * Allocate registers for GS.
> > *
> > - * If svbi_payload_enable is true, then the thread will be spawned with
> the
> > - * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to
> hold the
> > - * streamed vertex buffer indices.
> > + * If sol_program is true, then:
> > + *
> > + * - The thread will be spawned with the "SVBI Payload Enable" bit set,
> so GRF
> > + * 1 needs to be set aside to hold the streamed vertex buffer indices.
> > + *
> > + * - The thread will need to use the destination_indices register.
> > */
> > static void brw_gs_alloc_regs( struct brw_gs_compile *c,
> > GLuint nr_verts,
> > - bool svbi_payload_enable )
> > + bool sol_program )
> > {
> > GLuint i = 0,j;
> >
> > @@ -60,7 +63,7 @@ static void brw_gs_alloc_regs( struct brw_gs_compile
> *c,
> > c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
> >
> > /* Streamed vertex buffer indices */
> > - if (svbi_payload_enable)
> > + if (sol_program)
> > c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> >
> > /* Payload vertices plus space for more generated vertices:
> > @@ -73,6 +76,11 @@ static void brw_gs_alloc_regs( struct brw_gs_compile
> *c,
> > c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> > c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> >
> > + if (sol_program) {
> > + c->reg.destination_indices =
> > + retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
> > + }
> > +
> > c->prog_data.urb_read_length = c->nr_regs;
> > c->prog_data.total_grf = i;
> > }
> > @@ -351,16 +359,17 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> >
> > if (key->num_transform_feedback_bindings > 0) {
> > unsigned vertex, binding;
> > + struct brw_reg destination_indices_uw =
> > + vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
> > +
> > /* Note: since we use the binding table to keep track of buffer
> offsets
> > * and stride, the GS doesn't need to keep track of a separate
> pointer
> > * into each buffer; it uses a single pointer which increments by
> 1 for
> > * each vertex. So we use SVBI0 for this pointer, regardless of
> whether
> > * transform feedback is in interleaved or separate attribs mode.
> > + *
> > + * Make sure that the buffers have enough room for all the
> vertices.
> > */
> > - brw_MOV(p, get_element_ud(c->reg.header, 5),
> > - get_element_ud(c->reg.SVBI, 0));
> > -
> > - /* Make sure that the buffers have enough room for all the
> vertices. */
> > brw_ADD(p, get_element_ud(c->reg.temp, 0),
> > get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
> > brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
> > @@ -368,10 +377,58 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> > get_element_ud(c->reg.SVBI, 4));
> > brw_IF(p, BRW_EXECUTE_1);
> >
> > + /* Compute the destination indices to write to. Usually we use
> SVBI[0]
> > + * + (0, 1, 2). However, for odd-numbered triangles in
> tristrips, the
> > + * vertices come down the pipeline in reversed winding order, so
> we need
> > + * to flip the order when writing to the transform feedback
> buffer. To
> > + * ensure that flatshading accuracy is preserved, we need to
> write them
> > + * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
> > + * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're
> using
> > + * the last provoking vertex convention.
> > + *
> > + * Note: since brw_imm_v can only be used in instructions in
> > + * packed-word execution mode, and SVBI is a double-word, we need
> to
> > + * first move the appropriate immediate constant ((0, 1, 2), (0,
> 2, 1),
> > + * or (1, 0, 2)) to the destination_indices register, and then
> add SVBI
> > + * using a separate instruction. Also, since the immediate
> constant is
> > + * expressed as packed words, and we need to load double-words
> into
> > + * destination_indices, we need to intersperse zeros to fill the
> upper
> > + * halves of each double-word.
> > + */
> > + brw_MOV(p, destination_indices_uw,
> > + brw_imm_v(0x00020100)); /* (0, 1, 2) */
> > + if (num_verts == 3) {
> > + /* Get primitive type into temp register. */
> > + brw_AND(p, get_element_ud(c->reg.temp, 0),
> > + get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
> > +
> > + /* Test if primitive type is TRISTRIP_REVERSE. We need to do
> this as
> > + * an 8-wide comparison so that the conditional MOV that
> follows
> > + * moves all 8 words correctly.
> > + */
> > + brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
> > + get_element_ud(c->reg.temp, 0),
> > + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
> > +
> > + /* If so, then overwrite destination_indices_uw with the
> appropriate
> > + * reordering.
> > + */
> > + brw_MOV(p, destination_indices_uw,
> > + brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */
> > + : 0x00020001)); /* (1, 0, 2) */
> > + brw_set_predicate_control(p, BRW_PREDICATE_NONE);
> > + }
> > + brw_ADD(p, c->reg.destination_indices,
> > + c->reg.destination_indices, get_element_ud(c->reg.SVBI,
> 0));
> > +
> > /* For each vertex, generate code to output each varying using the
> > * appropriate binding table entry.
> > */
> > for (vertex = 0; vertex < num_verts; ++vertex) {
> > + /* Set up the correct destination index for this vertex */
> > + brw_MOV(p, get_element_ud(c->reg.header, 5),
> > + get_element_ud(c->reg.destination_indices, vertex));
> > +
> > for (binding = 0; binding <
> key->num_transform_feedback_bindings;
> > ++binding) {
> > unsigned char vert_result =
> > @@ -398,15 +455,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct
> brw_gs_prog_key *key,
> > SURF_INDEX_SOL_BINDING(binding), /*
> binding_table_index */
> > final_write); /* send_commit_msg */
> > }
> > -
> > - /* If there are more vertices to output, increment the pointer
> so
> > - * that we will start outputting to the next location in the
> > - * transform feedback buffers.
> > - */
> > - if (vertex != num_verts - 1) {
> > - brw_ADD(p, get_element_ud(c->reg.header, 5),
> > - get_element_ud(c->reg.header, 5), brw_imm_ud(1));
> > - }
> > }
> > brw_ENDIF(p);
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20111223/843699da/attachment.htm>
More information about the mesa-dev
mailing list