[Mesa-dev] [PATCH v2 3/7] intel: emit first_vertex and reorder the VE' components

Kenneth Graunke kenneth at whitecape.org
Tue Dec 5 23:23:03 UTC 2017


On Monday, December 4, 2017 12:12:49 PM PST Antia Puentes wrote:
> The new order is:
> * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
> * VE 2: <Draw ID, BaseVertex, 0, 0>
> 
> Previously it was:
> * VE 1: <BaseVertex, BaseInstance, VertexID, InstanceID>
> * VE 2: <Draw ID, 0, 0, 0>
> 
> The gl_BaseVertex is in a new location now, and firstvertex occupies
> the old gl_BaseVertex place. This way we can keep pointing to the
> indirect buffer for indirect draw calls.
> 
> Reviewed-by: Neil Roberts <nroberts at igalia.com>
> ---
>  src/intel/compiler/brw_nir.c                  | 11 +++++---
>  src/intel/compiler/brw_vec4.cpp               | 13 +++++----
>  src/mesa/drivers/dri/i965/brw_context.h       | 36 ++++++++++++++++++-------
>  src/mesa/drivers/dri/i965/brw_draw.c          | 26 +++++++++++-------
>  src/mesa/drivers/dri/i965/brw_draw_upload.c   | 13 ++++-----
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 39 +++++++++++++++------------
>  6 files changed, 88 insertions(+), 50 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 8f3f77f89ae..f702f5b8534 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -240,7 +240,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>      */
>     const bool has_sgvs =
>        nir->info.system_values_read &
> -      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> +      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>         BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>         BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
> @@ -262,6 +262,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>              nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>  
>              switch (intrin->intrinsic) {
> +            case nir_intrinsic_load_first_vertex:
>              case nir_intrinsic_load_base_vertex:
>              case nir_intrinsic_load_base_instance:
>              case nir_intrinsic_load_vertex_id_zero_base:
> @@ -279,7 +280,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>  
>                 nir_intrinsic_set_base(load, num_inputs);
>                 switch (intrin->intrinsic) {
> -               case nir_intrinsic_load_base_vertex:
> +               case nir_intrinsic_load_first_vertex:
>                    nir_intrinsic_set_component(load, 0);
>                    break;
>                 case nir_intrinsic_load_base_instance:
> @@ -292,11 +293,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>                    nir_intrinsic_set_component(load, 3);
>                    break;
>                 case nir_intrinsic_load_draw_id:
> +               case nir_intrinsic_load_base_vertex:
>                    /* gl_DrawID is stored right after gl_VertexID and friends
>                     * if any of them exist.
>                     */
>                    nir_intrinsic_set_base(load, num_inputs + has_sgvs);
> -                  nir_intrinsic_set_component(load, 0);
> +                  if (intrin->intrinsic ==  nir_intrinsic_load_draw_id)
> +                     nir_intrinsic_set_component(load, 0);
> +                  else
> +                     nir_intrinsic_set_component(load, 1);
>                    break;
>                 default:
>                    unreachable("Invalid system value intrinsic");
> diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
> index 14f930e0264..70a197a9fa0 100644
> --- a/src/intel/compiler/brw_vec4.cpp
> +++ b/src/intel/compiler/brw_vec4.cpp
> @@ -2789,13 +2789,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
>      * incoming vertex attribute.  So, add an extra slot.
>      */
>     if (shader->info.system_values_read &
> -       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> +       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>          BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>          BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>          BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>        nr_attribute_slots++;
>     }
>  
> +   if (shader->info.system_values_read &
> +       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> +        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
> +      nr_attribute_slots++;
> +   }
> +
>     if (shader->info.system_values_read &
>         BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
>        prog_data->uses_basevertex = true;
> @@ -2816,12 +2822,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
>         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
>        prog_data->uses_instanceid = true;
>  
> -   /* gl_DrawID has its very own vec4 */
>     if (shader->info.system_values_read &
> -       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
> +       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
>        prog_data->uses_drawid = true;
> -      nr_attribute_slots++;
> -   }
>  
>     /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
>      * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 06704838061..c6d176bc243 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -843,28 +843,44 @@ struct brw_context
>  
>     struct {
>        struct {
> -         /** The value of gl_BaseVertex for the current _mesa_prim. */
> -         int gl_basevertex;
> +         /**
> +          * Either the value of gl_BaseVertex for indexed draw calls or the
> +          * value of the argument <first> for non-indexed draw calls, for the
> +          * current _mesa_prim.
> +          */
> +         int firstvertex;
>  
>           /** The value of gl_BaseInstance for the current _mesa_prim. */
>           int gl_baseinstance;
>        } params;
>  
>        /**
> -       * Buffer and offset used for GL_ARB_shader_draw_parameters
> -       * (for now, only gl_BaseVertex).
> +       * Buffer and offset used for GL_ARB_shader_draw_parameters. For indirect
> +       * draw calls it will point to the indirect buffer.
>         */
>        struct brw_bo *draw_params_bo;
>        uint32_t draw_params_offset;
>  
> +      struct {
> +         /** The value of gl_DrawID for the current _mesa_prim. */
> +         int gl_drawid;
> +
> +         /**
> +          * The value of gl_BaseVertex for the current _mesa_prim. It must be
> +          * zero for non-indexed calls.
> +          */
> +         int gl_basevertex;
> +      } derived_params;
> +
>        /**
> -       * The value of gl_DrawID for the current _mesa_prim. This always comes
> -       * in from it's own vertex buffer since it's not part of the indirect
> -       * draw parameters.
> +       * Buffer and offset also used for GL_ARB_shader_draw_parameters. As they
> +       * are not part of the indirect buffer they will go in their own vertex
> +       * element. Note that although gl_BaseVertex is part of the indirect
> +       * buffer for indexed draw calls, that is not longer the case for
> +       * non-indexed.
>         */
> -      int gl_drawid;
> -      struct brw_bo *draw_id_bo;
> -      uint32_t draw_id_offset;
> +      struct brw_bo *derived_draw_params_bo;
> +      uint32_t derived_draw_params_offset;
>  
>        /**
>         * Pointer to the the buffer storing the indirect draw parameters. It
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 7e29dcfd4e8..18d26cfafca 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -760,25 +760,25 @@ brw_draw_single_prim(struct gl_context *ctx,
>      * always flag if the shader uses one of the values. For direct draws,
>      * we only flag if the values change.
>      */
> -   const int new_basevertex =
> +   const int new_firstvertex =
>        prim->indexed ? prim->basevertex : prim->start;
>     const int new_baseinstance = prim->base_instance;
>     const struct brw_vs_prog_data *vs_prog_data =
>        brw_vs_prog_data(brw->vs.base.prog_data);
>     if (prim_id > 0) {
>        const bool uses_draw_parameters =
> -         vs_prog_data->uses_basevertex ||
> +         vs_prog_data->uses_firstvertex ||
>           vs_prog_data->uses_baseinstance;
>  
>        if ((uses_draw_parameters && prim->is_indirect) ||
> -          (vs_prog_data->uses_basevertex &&
> -           brw->draw.params.gl_basevertex != new_basevertex) ||
> +          (vs_prog_data->uses_firstvertex &&
> +           brw->draw.params.firstvertex != new_firstvertex) ||
>            (vs_prog_data->uses_baseinstance &&
>             brw->draw.params.gl_baseinstance != new_baseinstance))
>           brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>     }
>  
> -   brw->draw.params.gl_basevertex = new_basevertex;
> +   brw->draw.params.firstvertex = new_firstvertex;
>     brw->draw.params.gl_baseinstance = new_baseinstance;
>     brw_bo_unreference(brw->draw.draw_params_bo);
>  
> @@ -803,12 +803,20 @@ brw_draw_single_prim(struct gl_context *ctx,
>      * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
>      * the loop.
>      */
> -   brw->draw.gl_drawid = prim->draw_id;
> -   brw_bo_unreference(brw->draw.draw_id_bo);
> -   brw->draw.draw_id_bo = NULL;
> -   if (prim_id > 0 && vs_prog_data->uses_drawid)
> +   const int new_basevertex = prim->indexed ? prim->basevertex : prim->start;
> +
> +   if (prim_id > 0 &&
> +       (vs_prog_data->uses_drawid ||
> +        (vs_prog_data->uses_basevertex &&
> +         brw->draw.derived_params.gl_basevertex != new_basevertex)))
>        brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>  
> +   brw->draw.derived_params.gl_drawid = prim->draw_id;
> +   brw->draw.derived_params.gl_basevertex = new_basevertex;
> +
> +   brw_bo_unreference(brw->draw.derived_draw_params_bo);
> +   brw->draw.derived_draw_params_bo = NULL;
> +
>     if (devinfo->gen < 6)
>        brw_set_prim(brw, prim);
>     else
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 9b81999ea05..da3a0569ccb 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -696,18 +696,19 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
>     const struct brw_vs_prog_data *vs_prog_data =
>        brw_vs_prog_data(brw->vs.base.prog_data);
>  
> -   /* For non-indirect draws, upload gl_BaseVertex. */
> -   if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) &&
> +   /* For non-indirect draws, upload the parameters. */
> +   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
>         brw->draw.draw_params_bo == NULL) {
>        intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
>  			&brw->draw.draw_params_bo,
>                          &brw->draw.draw_params_offset);
>     }
>  
> -   if (vs_prog_data->uses_drawid) {
> -      intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
> -                        &brw->draw.draw_id_bo,
> -                        &brw->draw.draw_id_offset);
> +   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
> +      intel_upload_data(brw, &brw->draw.derived_params,
> +                        sizeof(brw->draw.derived_params), 4,
> +                        &brw->draw.derived_draw_params_bo,
> +                        &brw->draw.derived_draw_params_offset);
>     }
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index dcf497c9183..db68d89aa66 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -498,19 +498,20 @@ genX(emit_vertices)(struct brw_context *brw)
>      * can't be inserted past that so we need a dummy element to ensure that
>      * the edge flag is the last one.
>      */
> -   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
> +   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
>                                      vs_prog_data->uses_baseinstance ||
>                                      ((vs_prog_data->uses_instanceid ||
>                                        vs_prog_data->uses_vertexid)
>                                       && uses_edge_flag));
>  #else
> -   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
> +   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
>                                      vs_prog_data->uses_baseinstance ||
>                                      vs_prog_data->uses_instanceid ||
>                                      vs_prog_data->uses_vertexid);
>  #endif
>     unsigned nr_elements =
> -      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
> +      brw->vb.nr_enabled + needs_sgvs_element +
> +      (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
>  
>  #if GEN_GEN < 8
>     /* If any of the formats of vb.enabled needs more that one upload, we need
> @@ -549,10 +550,15 @@ genX(emit_vertices)(struct brw_context *brw)
>  
>     /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
>     const bool uses_draw_params =
> -      vs_prog_data->uses_basevertex ||
> +      vs_prog_data->uses_firstvertex ||
>        vs_prog_data->uses_baseinstance;
> +
> +   const bool uses_derived_draw_params =
> +      vs_prog_data->uses_basevertex ||
> +      vs_prog_data->uses_drawid;
> +
>     const unsigned nr_buffers = brw->vb.nr_buffers +
> -      uses_draw_params + vs_prog_data->uses_drawid;
> +      uses_draw_params + uses_derived_draw_params;
>  
>     if (nr_buffers) {
>        assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
> @@ -586,11 +592,11 @@ genX(emit_vertices)(struct brw_context *brw)
>                                               0 /* step rate */);
>        }
>  
> -      if (vs_prog_data->uses_drawid) {
> +      if (uses_derived_draw_params) {
>           dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
> -                                             brw->draw.draw_id_bo,
> -                                             brw->draw.draw_id_offset,
> -                                             brw->draw.draw_id_bo->size,
> +                                             brw->draw.derived_draw_params_bo,
> +                                             brw->draw.derived_draw_params_offset,
> +                                             brw->draw.derived_draw_params_bo->size,
>                                               0 /* stride */,
>                                               0 /* step rate */);
>        }
> @@ -727,7 +733,7 @@ genX(emit_vertices)(struct brw_context *brw)
>        };
>  
>  #if GEN_GEN >= 8
> -      if (vs_prog_data->uses_basevertex ||
> +      if (vs_prog_data->uses_firstvertex ||
>            vs_prog_data->uses_baseinstance) {
>           elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>           elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
> @@ -737,13 +743,12 @@ genX(emit_vertices)(struct brw_context *brw)
>  #else
>        elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>        elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
> -      if (vs_prog_data->uses_basevertex)
> +      if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) {
>           elem_state.Component0Control = VFCOMP_STORE_SRC;
> -
> -      if (vs_prog_data->uses_baseinstance)
>           elem_state.Component1Control = VFCOMP_STORE_SRC;
> +      }
>  
> -      if (vs_prog_data->uses_vertexid)
> +      if (vs_prog_data->uses_firstvertex)

Seems like this should stay uses_vertexid?  I suppose in reality,
uses_firstvertex == uses_vertexid, because we'll always use the two
together today.  So it doesn't really matter.  Still, it's probably
clearer to have both flags, and keep VID tied to uses_vertexid.

With that changed,
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

>           elem_state.Component2Control = VFCOMP_STORE_VID;
>  
>        if (vs_prog_data->uses_instanceid)
> @@ -754,13 +759,13 @@ genX(emit_vertices)(struct brw_context *brw)
>        dw += GENX(VERTEX_ELEMENT_STATE_length);
>     }
>  
> -   if (vs_prog_data->uses_drawid) {
> +   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
>        struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
>           .Valid = true,
>           .VertexBufferIndex = brw->vb.nr_buffers + 1,
> -         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT,
> +         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT,
>           .Component0Control = VFCOMP_STORE_SRC,
> -         .Component1Control = VFCOMP_STORE_0,
> +         .Component1Control = VFCOMP_STORE_SRC,
>           .Component2Control = VFCOMP_STORE_0,
>           .Component3Control = VFCOMP_STORE_0,
>  #if GEN_GEN < 5
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171205/2df04143/attachment-0001.sig>


More information about the mesa-dev mailing list