[Mesa-dev] [PATCH v02 34/37] i965: Port gen4+ emit vertices code to genxml.

Fri Apr 28 22:23:38 UTC 2017

On Monday, April 24, 2017 3:19:29 PM PDT Rafael Antognolli wrote:
> Some code that was placed in brw_draw_upload.c and exported to be used
> by gen8+ was also moved to genX_state_upload, and the respective symbols
> are not exported anymore.
> 
> v2:
>    - Remove code from brw_draw_upload too
>    - Emit vertices for gen4-5 too.
>    - Use helper to setup brw_address (Kristian)
>    - Use macros for MOCS values.
>    - Do not use #ifndef NDEBUG on code that is actually used (Ken)
> 
> Signed-off-by: Rafael Antognolli <rafael.antognolli at intel.com>

There's a lot of code here that isn't generation specific, and isn't
taking advantage of conditional-compilation.  Can we leave most of it
in brw_draw_upload.c?  Basically, just have the vertices atom and the
VERTEX_BUFFER_STATE function in genX_state_upload.c.

The VERTEX_BUFFER_STATE code looks good.

> +static void
> +genX(emit_vertices)(struct brw_context *brw)
> +{
> +   uint32_t *dw;
> +
> +   brw_prepare_vertices(brw);
> +   brw_prepare_shader_draw_parameters(brw);
> +
> +#if GEN_GEN < 8
> +   brw_emit_query_begin(brw);
> +#endif

This function is a no-op (early returns) on Gen6+ (brw->hw_ctx != NULL).
We should either make it GEN_GEN < 6 or just call it everywhere.

> +
> +   const struct brw_vs_prog_data *vs_prog_data =
> +      brw_vs_prog_data(brw->vs.base.prog_data);
> +
> +#if GEN_GEN >= 8
> +   struct gl_context *ctx = &brw->ctx;
> +   bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
> +                          ctx->Polygon.BackMode != GL_FILL);
> +
> +   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
> +      unsigned vue = brw->vb.nr_enabled;
> +
> +      /* The element for the edge flags must always be last, so we have to
> +       * insert the SGVS before it in that case.
> +       */
> +      if (uses_edge_flag) {
> +         assert(vue > 0);
> +         vue--;
> +      }
> +
> +      WARN_ONCE(vue >= 33,
> +                "Trying to insert VID/IID past 33rd vertex element, "
> +                "need to reorder the vertex attrbutes.");
> +
> +      brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) {
> +         if (vs_prog_data->uses_vertexid) {
> +            vfs.VertexIDEnable = true;
> +            vfs.VertexIDComponentNumber = 2;
> +            vfs.VertexIDElementOffset = vue;
> +         }
> +
> +         if (vs_prog_data->uses_instanceid) {
> +            vfs.InstanceIDEnable = true;
> +            vfs.InstanceIDComponentNumber = 3;
> +            vfs.InstanceIDElementOffset = vue;
> +         }
> +      }
> +
> +      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
> +         vfi.InstancingEnable = true;
> +         vfi.VertexElementIndex = vue;
> +      }
> +   } else {
> +      brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
> +   }
> +
> +   /* Normally we don't need an element for the SGVS attribute because the
> +    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
> +    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
> +    * we're using draw parameters then we need an element for the those
> +    * values.  Additionally if there is an edge flag element then the SGVS
> +    * can't be inserted past that so we need a dummy element to ensure that
> +    * the edge flag is the last one.
> +    */
> +   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
> +                                    vs_prog_data->uses_baseinstance ||
> +                                    ((vs_prog_data->uses_instanceid ||
> +                                      vs_prog_data->uses_vertexid)
> +                                     && uses_edge_flag));
> +#else
> +   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
> +                                    vs_prog_data->uses_baseinstance ||
> +                                    vs_prog_data->uses_instanceid ||
> +                                    vs_prog_data->uses_vertexid);
> +#endif
> +   unsigned nr_elements =
> +      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
> +
> +#if GEN_GEN < 8
> +   /* If any of the formats of vb.enabled needs more that one upload, we need
> +    * to add it to nr_elements */

  */ goes on its own line.

> +   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
> +      struct brw_vertex_element *input = brw->vb.enabled[i];
> +      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
> +
> +      if (genX(uploads_needed(format)) > 1)
> +         nr_elements++;
> +   }
> +#endif
> +
> +   /* If the VS doesn't read any inputs (calculating vertex position from
> +    * a state variable for some reason, for example), emit a single pad
> +    * VERTEX_ELEMENT struct and bail.
> +    *
> +    * The stale VB state stays in place, but they don't do anything unless
> +    * a VE loads from them.
> +    */
> +   if (nr_elements == 0) {
> +      dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 3 + GENX(VERTEX_ELEMENT_STATE_length));

Shouldn't this be 1 + GENX(VERTEX_ELEMENT_STATE_length)?  One struct
VERTEX_ELEMENT_STATE struct plus 1 header DWord for
3DSTATE_VERTEX_ELEMENTS?  Which would be 3 DWords.

> +      struct GENX(VERTEX_ELEMENT_STATE) elem = {
> +         .Valid = true,
> +         .SourceElementFormat = SF_R32G32B32A32_FLOAT,

I've not heard of SF_R32G32B32A32_FLOAT - let's just use
ISL_FORMAT_R32G32B32A32_FLOAT.  That way we only have one set of format
enums in the driver.  If genxml is generating these, it might be nice to
change the XML so it stops doing so...

> +         .Component0Control = VFCOMP_STORE_0,
> +         .Component1Control = VFCOMP_STORE_0,
> +         .Component2Control = VFCOMP_STORE_0,
> +         .Component3Control = VFCOMP_STORE_1_FP,
> +      };
> +      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem);
> +      return;
> +   }
> +
> +   /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
> +   const bool uses_draw_params =
> +      vs_prog_data->uses_basevertex ||
> +      vs_prog_data->uses_baseinstance;
> +   const unsigned nr_buffers = brw->vb.nr_buffers +
> +      uses_draw_params + vs_prog_data->uses_drawid;
> +
> +   if (nr_buffers) {
> +#if GEN_GEN >= 6
> +      assert(nr_buffers <= 33);
> +#else
> +      assert(nr_buffers <= 17);
> +#endif

One idea:

    assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));

(up to you though)

> +
> +      dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
> +                           1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers);
> +
> +      for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
> +         const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
> +         /* Prior to Haswell and Bay Trail we have to use 4-component formats
> +          * to fake 3-component ones.  In particular, we do this for
> +          * half-float and 8 and 16-bit integer formats.  This means that the
> +          * vertex element may poke over the end of the buffer by 2 bytes.
> +          */
> +         unsigned padding =
> +            (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;

GEN_GEN <= 7 instead of brw->gen <= 7

> +         dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
> +                                             buffer->offset,
> +                                             buffer->offset + buffer->size + padding,
> +                                             buffer->stride,
> +                                             buffer->step_rate);
> +      }
> +
> +      if (uses_draw_params) {
> +         dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers,
> +                                             brw->draw.draw_params_bo,
> +                                             brw->draw.draw_params_offset,
> +                                             brw->draw.draw_params_bo->size,
> +                                             0 /* stride */,
> +                                             0 /* step rate */);
> +      }
> +
> +      if (vs_prog_data->uses_drawid) {
> +         dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
> +                                             brw->draw.draw_id_bo,
> +                                             brw->draw.draw_id_offset,
> +                                             brw->draw.draw_id_bo->size,
> +                                             0 /* stride */,
> +                                             0 /* step rate */);
> +      }
> +   }
> +
> +   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
> +    * presumably for VertexID/InstanceID.
> +    */
> +#if GEN_GEN >= 6
> +   assert(nr_elements <= 34);
> +   struct brw_vertex_element *gen6_edgeflag_input = NULL;
> +#else
> +   assert(nr_elements <= 18);
> +#endif
> +
> +   dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
> +                        1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
> +   unsigned i;
> +   for (i = 0; i < brw->vb.nr_enabled; i++) {

   for (unsigned i = 0 ... ?

> +      struct brw_vertex_element *input = brw->vb.enabled[i];
> +      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
> +      uint32_t comp0 = VFCOMP_STORE_SRC;
> +      uint32_t comp1 = VFCOMP_STORE_SRC;
> +      uint32_t comp2 = VFCOMP_STORE_SRC;
> +      uint32_t comp3 = VFCOMP_STORE_SRC;
> +      unsigned num_uploads = 1;
> +
> +#if GEN_GEN >= 8
> +      /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
> +       * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
> +       * element which has edge flag enabled."
> +       */
> +      assert(!(is_passthru_format(format) && uses_edge_flag));
> +#endif
> +
> +      /* The gen4 driver expects edgeflag to come in as a float, and passes
> +       * that float on to the tests in the clipper.  Mesa's current vertex
> +       * attribute value for EdgeFlag is stored as a float, which works out.
> +       * glEdgeFlagPointer, on the other hand, gives us an unnormalized
> +       * integer ubyte.  Just rewrite that to convert to a float.
> +       */
> +      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
> +#if GEN_GEN >= 6
> +         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
> +          * of in the VUE.  We have to upload it sideband as the last vertex
> +          * element according to the B-Spec.
> +          */
> +         gen6_edgeflag_input = input;
> +         continue;
> +#endif

Let's organize this like:

      /* The gen4 driver expects ...
       *
       * Gen6+ passes edgeflag as sideband ...
       */
#if GEN_GEN >= 6
      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
         gen6_edgeflag_input = input;
         continue;
      }
#endif

> +      }
> +
> +#if GEN_GEN < 8
> +      num_uploads = genX(uploads_needed(format));
> +#endif
> +
> +      for (unsigned c = 0; c < num_uploads; c++) {
> +#if GEN_GEN < 8
> +         uint32_t upload_format = downsize_format_if_needed(format, c);
> +#endif

Let's do:

         uint32_t upload_format = GEN_GEN >= 8 ? format :
            downsize_format_if_needed(format, c);

then we can always use upload_format and avoid ifdefs.

> +         /* If we need more that one upload, the offset stride would be 128
> +          * bits (16 bytes), as for previous uploads we are using the full
> +          * entry. */
> +         unsigned int offset = input->offset + c * 16;
> +         int size = input->glarray->Size;
> +
> +#if GEN_GEN < 8
> +         if (is_passthru_format(format))
> +            size = upload_format_size(upload_format);
> +#endif

We can probably just do if (GEN_GEN < 8 && is_passthru_format(format))
rather than #if...#endif.  Either way is fine.

> +
> +         switch (size) {
> +            case 0: comp0 = VFCOMP_STORE_0;
> +            case 1: comp1 = VFCOMP_STORE_0;
> +            case 2: comp2 = VFCOMP_STORE_0;
> +            case 3:
> +#if GEN_GEN >= 8
> +               if (input->glarray->Doubles) {

Let's make this if (GEN_GEN >= 8 && input->glarray->Doubles).
Then we can avoid the ifdefs and awkward } else ... if.

> +                  comp3 = VFCOMP_STORE_0;
> +               } else
> +#endif
> +               if (input->glarray->Integer) {
> +                  comp3 = VFCOMP_STORE_1_INT;
> +               } else {
> +                  comp3 = VFCOMP_STORE_1_FP;
> +               }
> +
> +               break;
> +         }
> +
> +#if GEN_GEN >= 8
> +         /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
> +          *
> +          *     "When SourceElementFormat is set to one of the *64*_PASSTHRU
> +          *     formats, 64-bit components are stored in the URB without any
> +          *     conversion. In this case, vertex elements must be written as 128
> +          *     or 256 bits, with VFCOMP_STORE_0 being used to pad the output as
> +          *     required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
> +          *     component into the URB, Component 1 must be specified as
> +          *     VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in
> +          *     order to output a 128-bit vertex element, or Components 1-3 must
> +          *     be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
> +          *     element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
> +          *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit
> +          *     vertex element."
> +          */
> +         if (input->glarray->Doubles && !input->is_dual_slot) {
> +            /* Store vertex elements which correspond to double and dvec2 vertex
> +             * shader inputs as 128-bit vertex elements, instead of 256-bits.
> +             */
> +            comp2 = VFCOMP_NOSTORE;
> +            comp3 = VFCOMP_NOSTORE;
> +         }
> +#endif
> +
> +         struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
> +            .VertexBufferIndex = input->buffer,
> +            .Valid = true,
> +#if GEN_GEN < 8
> +            .SourceElementFormat = upload_format,
> +#else
> +            .SourceElementFormat = format,
> +#endif

If you take my earlier suggestion upload_format will exist, so we can
just use it and avoid the preprocessor conditionals.

> +            .SourceElementOffset = offset,
> +            .Component0Control = comp0,
> +            .Component1Control = comp1,
> +            .Component2Control = comp2,
> +            .Component3Control = comp3,
> +#if GEN_GEN < 6

I believe this should be GEN_GEN < 5, not 6, to match the existing
behavior.

> +            .DestinationElementOffset = i * 4,
> +#endif
> +         };
> +
> +         GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
> +         dw += GENX(VERTEX_ELEMENT_STATE_length);
> +      }
> +   }
> +
> +   if (needs_sgvs_element) {
> +      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
> +         .Valid = true,
> +         .Component0Control = VFCOMP_STORE_0,
> +         .Component1Control = VFCOMP_STORE_0,
> +         .Component2Control = VFCOMP_STORE_0,
> +         .Component3Control = VFCOMP_STORE_0,
> +#if GEN_GEN < 6

Let's make this GEN_GEN < 5 to match the main case above.
(I'll send a patch to do that prior to your series.)

> +         .DestinationElementOffset = i * 4,
> +#endif
> +      };
> +
> +#if GEN_GEN >= 8
> +      if (vs_prog_data->uses_basevertex ||
> +          vs_prog_data->uses_baseinstance) {
> +         elem_state.VertexBufferIndex = brw->vb.nr_buffers;
> +         elem_state.SourceElementFormat = SF_R32G32_UINT;

ISL_FORMAT_R32G32_UINT

> +         elem_state.Component0Control = VFCOMP_STORE_SRC;
> +         elem_state.Component1Control = VFCOMP_STORE_SRC;
> +      }
> +#else
> +      elem_state.VertexBufferIndex = brw->vb.nr_buffers;
> +      elem_state.SourceElementFormat = SF_R32G32_UINT;

ISL_FORMAT_R32G32_UINT

> +      if (vs_prog_data->uses_basevertex)
> +         elem_state.Component0Control = VFCOMP_STORE_SRC;
> +
> +      if (vs_prog_data->uses_baseinstance)
> +         elem_state.Component1Control = VFCOMP_STORE_SRC;
> +
> +      if (vs_prog_data->uses_vertexid)
> +         elem_state.Component2Control = VFCOMP_STORE_VID;
> +
> +      if (vs_prog_data->uses_instanceid)
> +         elem_state.Component3Control = VFCOMP_STORE_IID;
> +#endif
> +
> +      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
> +      dw += GENX(VERTEX_ELEMENT_STATE_length);
> +   }
> +
> +   if (vs_prog_data->uses_drawid) {
> +      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
> +         .Valid = true,
> +         .VertexBufferIndex = brw->vb.nr_buffers + 1,
> +         .SourceElementFormat = SF_R32_UINT,

ISL_FORMAT_R32_UINT

> +         .Component0Control = VFCOMP_STORE_SRC,
> +         .Component1Control = VFCOMP_STORE_0,
> +         .Component2Control = VFCOMP_STORE_0,
> +         .Component3Control = VFCOMP_STORE_0,
> +#if GEN_GEN < 6

Let's make this GEN_GEN < 5 to match the main case.

> +         .DestinationElementOffset = i * 4,
> +#endif
> +      };
> +
> +      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
> +      dw += GENX(VERTEX_ELEMENT_STATE_length);
> +   }
> +
> +#if GEN_GEN >= 6
> +   if (gen6_edgeflag_input) {
> +      uint32_t format =
> +         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
> +
> +      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
> +         .Valid = true,
> +         .VertexBufferIndex = gen6_edgeflag_input->buffer,
> +         .EdgeFlagEnable = true,
> +         .SourceElementFormat = format,
> +         .SourceElementOffset = gen6_edgeflag_input->offset,
> +         .Component0Control = VFCOMP_STORE_SRC,
> +         .Component1Control = VFCOMP_STORE_0,
> +         .Component2Control = VFCOMP_STORE_0,
> +         .Component3Control = VFCOMP_STORE_0,
> +      };
> +
> +      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
> +      dw += GENX(VERTEX_ELEMENT_STATE_length);
> +   }
> +#endif
> +
> +#if GEN_GEN >= 8
> +   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
> +      const struct brw_vertex_element *input = brw->vb.enabled[i];
> +      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
> +      unsigned element_index;
> +
> +      /* The edge flag element is reordered to be the last one in the code
> +       * above so we need to compensate for that in the element indices used
> +       * below.
> +       */
> +      if (input == gen6_edgeflag_input)
> +         element_index = nr_elements - 1;
> +      else
> +         element_index = j++;
> +
> +      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
> +         vfi.VertexElementIndex = element_index;
> +         vfi.InstancingEnable = buffer->step_rate ? true : false;

buffer->step_rate != 0

> +         vfi.InstanceDataStepRate = buffer->step_rate;
> +      }
> +   }
> +
> +   if (vs_prog_data->uses_drawid) {
> +      const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
> +
> +      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
> +         vfi.VertexElementIndex = element;
> +      }
> +   }
> +#endif
> +}
> +
> +static const struct brw_tracked_state genX(vertices) = {
> +   .dirty = {
> +      .mesa = _NEW_POLYGON,
> +      .brw = BRW_NEW_BATCH |
> +             BRW_NEW_BLORP |
> +             BRW_NEW_VERTICES |
> +             BRW_NEW_VS_PROG_DATA,
> +   },
> +   .emit = genX(emit_vertices),
> +};
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170428/8725456e/attachment-0001.sig>