[Mesa-stable] [Mesa-dev] [PATCH 2/3 v2] i965: Swap the order of the vertex ID and edge flag attributes

Ben Widawsky ben at bwidawsk.net
Tue Aug 18 18:27:42 PDT 2015


On Mon, Jul 13, 2015 at 06:01:14PM +0100, Neil Roberts wrote:
> The edge flag data on Gen6+ is passed through the fixed function hardware as
> an extra attribute. According to the PRM it must be the last valid
> VERTEX_ELEMENT structure. However if the vertex ID is also used then another
> extra element is added to source the VID. This made it so the vertex ID is in
> the wrong register in the vertex shader and the edge attribute is no longer in
> the last element.

It took me forever to find the docs which stated the edge flag must be last...
could you maybe add a PRM location to the code comment, and/or commit message?

> 
> v2: Also implement for BDW+
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84677
> Cc: "10.6 10.5" <mesa-stable at lists.freedesktop.org>
> ---
>  src/mesa/drivers/dri/i965/brw_draw_upload.c  | 30 +++++++--------
>  src/mesa/drivers/dri/i965/gen8_draw_upload.c | 56 +++++++++++++++++++++-------
>  2 files changed, 57 insertions(+), 29 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 320e40e..67a8dfd 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -787,21 +787,6 @@ static void brw_emit_vertices(struct brw_context *brw)
>                      ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
>     }
>  
> -   if (brw->gen >= 6 && gen6_edgeflag_input) {
> -      uint32_t format =
> -         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> -
> -      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
> -                GEN6_VE0_VALID |
> -                GEN6_VE0_EDGE_FLAG_ENABLE |
> -                (format << BRW_VE0_FORMAT_SHIFT) |
> -                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
> -      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> -   }
> -
>     if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
>        uint32_t dw0 = 0, dw1 = 0;
>        uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
> @@ -842,6 +827,21 @@ static void brw_emit_vertices(struct brw_context *brw)
>        OUT_BATCH(dw1);
>     }
>  
> +   if (brw->gen >= 6 && gen6_edgeflag_input) {
> +      uint32_t format =
> +         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> +
> +      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
> +                GEN6_VE0_VALID |
> +                GEN6_VE0_EDGE_FLAG_ENABLE |
> +                (format << BRW_VE0_FORMAT_SHIFT) |
> +                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
> +      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> +                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> +                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> +                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> +   }
> +
>     ADVANCE_BATCH();
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> index f7d9952..2bac5ff 100644
> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> @@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
>     uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
> +   bool uses_edge_flag;
>  
>     brw_prepare_vertices(brw);
>     brw_prepare_shader_draw_parameters(brw);
>  
> +   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
> +                     ctx->Polygon.BackMode != GL_FILL);
> +
>     if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
>        unsigned vue = brw->vb.nr_enabled;
>  
> -      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
> -                "Using VID/IID with edgeflags, need to reorder the "
> -                "vertex attributes");
> +      /* The element for the edge flags must always be last, so we have to
> +       * insert the SGVS before it in that case.
> +       */
> +      if (uses_edge_flag) {
> +         assert(vue > 0);
> +         vue--;
> +      }
> +

I don't get this. Shouldn't we be pushing the element with the edge flag to the
end? As an example, suppose nr_enabled = 3. Won't this make us write the SGVs
out to vue 2, which presumable had something of value before?

Also, I just noticed:
"It is INVALID to store both the VertexID and InstanceID in the same
element/component location within the VUE."

Aren't we potentially doing that today (and still with your patch)?

>        WARN_ONCE(vue >= 33,
>                  "Trying to insert VID/IID past 33rd vertex element, "
>                  "need to reorder the vertex attrbutes.");
> @@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
>        ADVANCE_BATCH();
>     }
>  
> -   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
> +   /* Normally we don't need an element for the SGVS attribute because the
> +    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
> +    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
> +    * vertex ID is used then it needs an element for the base vertex buffer.
> +    * Additionally if there is an edge flag element then the SGVS can't be
> +    * inserted past that so we need a dummy element to ensure that the edge
> +    * flag is the last one.
> +    */
> +   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
> +                              (brw->vs.prog_data->uses_instanceid &&
> +                               uses_edge_flag));
> +   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
>  
>     /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
>      * presumably for VertexID/InstanceID.
> @@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
>                  (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
>     }
>  
> +   if (needs_sgvs_element) {
> +      if (brw->vs.prog_data->uses_vertexid) {
> +         OUT_BATCH(GEN6_VE0_VALID |
> +                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
> +                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
> +         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> +      } else {
> +         OUT_BATCH(GEN6_VE0_VALID);
> +         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> +                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> +      }
> +   }
> +
>     if (gen6_edgeflag_input) {
>        uint32_t format =
>           brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> @@ -206,16 +244,6 @@ gen8_emit_vertices(struct brw_context *brw)
>                  (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
>                  (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
>     }
> -
> -   if (brw->vs.prog_data->uses_vertexid) {
> -      OUT_BATCH(GEN6_VE0_VALID |
> -                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
> -                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
> -      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> -                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> -   }
>     ADVANCE_BATCH();
>  
>     for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {

Empirically, this patch is needed, since without it, I cannot get the test to
pass. However, since I don't understand it well enough, I am going to push 1/3,
and wait until you get back from vacation to continue with this.


More information about the mesa-stable mailing list