[Mesa-stable] [Mesa-dev] [PATCH 2/3 v2] i965: Swap the order of the vertex ID and edge flag attributes
Ben Widawsky
ben at bwidawsk.net
Tue Aug 18 18:27:42 PDT 2015
On Mon, Jul 13, 2015 at 06:01:14PM +0100, Neil Roberts wrote:
> The edge flag data on Gen6+ is passed through the fixed function hardware as
> an extra attribute. According to the PRM it must be the last valid
> VERTEX_ELEMENT structure. However if the vertex ID is also used then another
> extra element is added to source the VID. This made it so the vertex ID is in
> the wrong register in the vertex shader and the edge attribute is no longer in
> the last element.
It took me forever to find the docs which stated the edge flag must be last...
could you maybe add a PRM location to the code comment, and/or commit message?
>
> v2: Also implement for BDW+
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84677
> Cc: "10.6 10.5" <mesa-stable at lists.freedesktop.org>
> ---
> src/mesa/drivers/dri/i965/brw_draw_upload.c | 30 +++++++--------
> src/mesa/drivers/dri/i965/gen8_draw_upload.c | 56 +++++++++++++++++++++-------
> 2 files changed, 57 insertions(+), 29 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 320e40e..67a8dfd 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -787,21 +787,6 @@ static void brw_emit_vertices(struct brw_context *brw)
> ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
> }
>
> - if (brw->gen >= 6 && gen6_edgeflag_input) {
> - uint32_t format =
> - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> -
> - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
> - GEN6_VE0_VALID |
> - GEN6_VE0_EDGE_FLAG_ENABLE |
> - (format << BRW_VE0_FORMAT_SHIFT) |
> - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
> - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> - }
> -
> if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
> uint32_t dw0 = 0, dw1 = 0;
> uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
> @@ -842,6 +827,21 @@ static void brw_emit_vertices(struct brw_context *brw)
> OUT_BATCH(dw1);
> }
>
> + if (brw->gen >= 6 && gen6_edgeflag_input) {
> + uint32_t format =
> + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> +
> + OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
> + GEN6_VE0_VALID |
> + GEN6_VE0_EDGE_FLAG_ENABLE |
> + (format << BRW_VE0_FORMAT_SHIFT) |
> + (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
> + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> + }
> +
> ADVANCE_BATCH();
> }
>
> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> index f7d9952..2bac5ff 100644
> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> @@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
> {
> struct gl_context *ctx = &brw->ctx;
> uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
> + bool uses_edge_flag;
>
> brw_prepare_vertices(brw);
> brw_prepare_shader_draw_parameters(brw);
>
> + uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
> + ctx->Polygon.BackMode != GL_FILL);
> +
> if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
> unsigned vue = brw->vb.nr_enabled;
>
> - WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
> - "Using VID/IID with edgeflags, need to reorder the "
> - "vertex attributes");
> + /* The element for the edge flags must always be last, so we have to
> + * insert the SGVS before it in that case.
> + */
> + if (uses_edge_flag) {
> + assert(vue > 0);
> + vue--;
> + }
> +
I don't get this. Shouldn't we be pushing the element with the edge flag to the
end? As an example, suppose nr_enabled = 3. Won't this make us write the SGVs
out to vue 2, which presumable had something of value before?
Also, I just noticed:
"It is INVALID to store both the VertexID and InstanceID in the same
element/component location within the VUE."
Aren't we potentially doing that today (and still with your patch)?
> WARN_ONCE(vue >= 33,
> "Trying to insert VID/IID past 33rd vertex element, "
> "need to reorder the vertex attrbutes.");
> @@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
> ADVANCE_BATCH();
> }
>
> - unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
> + /* Normally we don't need an element for the SGVS attribute because the
> + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
> + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
> + * vertex ID is used then it needs an element for the base vertex buffer.
> + * Additionally if there is an edge flag element then the SGVS can't be
> + * inserted past that so we need a dummy element to ensure that the edge
> + * flag is the last one.
> + */
> + bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
> + (brw->vs.prog_data->uses_instanceid &&
> + uses_edge_flag));
> + unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
>
> /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
> * presumably for VertexID/InstanceID.
> @@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
> (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
> }
>
> + if (needs_sgvs_element) {
> + if (brw->vs.prog_data->uses_vertexid) {
> + OUT_BATCH(GEN6_VE0_VALID |
> + brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
> + BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
> + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> + } else {
> + OUT_BATCH(GEN6_VE0_VALID);
> + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> + }
> + }
> +
> if (gen6_edgeflag_input) {
> uint32_t format =
> brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> @@ -206,16 +244,6 @@ gen8_emit_vertices(struct brw_context *brw)
> (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> }
> -
> - if (brw->vs.prog_data->uses_vertexid) {
> - OUT_BATCH(GEN6_VE0_VALID |
> - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
> - BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
> - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
> - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
> - }
> ADVANCE_BATCH();
>
> for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
Empirically, this patch is needed, since without it, I cannot get the test to
pass. However, since I don't understand it well enough, I am going to push 1/3,
and wait until you get back from vacation to continue with this.
More information about the mesa-stable
mailing list