[Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls

Jason Ekstrand jason at jlekstrand.net
Sat Apr 7 06:21:38 UTC 2018


On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick <idr at freedesktop.org> wrote:

> From: Antia Puentes <apuentes at igalia.com>
>
> We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
> vertex element. The previous Vertex Elements order was:
>
>   * VE 1: <BaseVertex/firstvertex, BaseInstance, VertexID, InstanceID>
>   * VE 2: <Draw ID, 0, 0, 0>
>
> and now it is:
>
>   * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
>   * VE 2: <Draw ID, BaseVertex, 0, 0>
>
> To move the BaseVertex keeping VE1 as it is, allows to keep pointing the
> vertex buffer associated to VE 1 to the indirect buffer for indirect
> draw calls.
>
> From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
>
>   "gl_BaseVertex holds the integer value passed to the baseVertex
>   parameter to the command that resulted in the current shader
>   invocation. In the case where the command has no baseVertex parameter,
>   the value of gl_BaseVertex is zero."
>
> Fixes CTS tests:
>
>   * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
>   * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstancedParam
> eters
>   * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
>   * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysIndirectP
> arameters
>   * KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirectCountPa
> rameters
>
> v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
> those in genX(emit_vertices).  Reformat commit message to 72 columns.
>
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
> ---
>  src/intel/compiler/brw_nir.c                  | 14 +++++----
>  src/intel/compiler/brw_vec4.cpp               | 14 +++++----
>  src/mesa/drivers/dri/i965/brw_context.h       | 32 ++++++++++++++-----
>  src/mesa/drivers/dri/i965/brw_draw.c          | 45
> ++++++++++++++++++---------
>  src/mesa/drivers/dri/i965/brw_draw_upload.c   | 14 ++++-----
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 38 +++++++++++-----------
>  6 files changed, 97 insertions(+), 60 deletions(-)
>
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 16b0d86814f..16ab529737b 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>      */
>     const bool has_sgvs =
>        nir->info.system_values_read &
> -      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> -       BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> +      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>         BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>         BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
> @@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>
>                 nir_intrinsic_set_base(load, num_inputs);
>                 switch (intrin->intrinsic) {
> -               case nir_intrinsic_load_base_vertex:
>                 case nir_intrinsic_load_first_vertex:
>                    nir_intrinsic_set_component(load, 0);
>                    break;
> @@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>                    nir_intrinsic_set_component(load, 3);
>                    break;
>                 case nir_intrinsic_load_draw_id:
> -                  /* gl_DrawID is stored right after gl_VertexID and
> friends
> -                   * if any of them exist.
> +               case nir_intrinsic_load_base_vertex:
> +                  /* gl_DrawID and gl_BaseVertex are stored right after
> +                     gl_VertexID and friends if any of them exist.
>                     */
>                    nir_intrinsic_set_base(load, num_inputs + has_sgvs);
> -                  nir_intrinsic_set_component(load, 0);
> +                  if (intrin->intrinsic == nir_intrinsic_load_draw_id)
> +                     nir_intrinsic_set_component(load, 0);
> +                  else
> +                     nir_intrinsic_set_component(load, 1);
>                    break;
>                 default:
>                    unreachable("Invalid system value intrinsic");
> diff --git a/src/intel/compiler/brw_vec4.cpp
> b/src/intel/compiler/brw_vec4.cpp
> index 1e384f5bf4d..d33caefdea9 100644
> --- a/src/intel/compiler/brw_vec4.cpp
> +++ b/src/intel/compiler/brw_vec4.cpp
> @@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
> *compiler, void *log_data,
>      * incoming vertex attribute.  So, add an extra slot.
>      */
>     if (shader->info.system_values_read &
> -       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> -        BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> +       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>          BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>          BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>          BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>        nr_attribute_slots++;
>     }
>
> +   if (shader->info.system_values_read &
> +       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> +        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
> +      nr_attribute_slots++;
> +   }
> +
>     if (shader->info.system_values_read &
>         BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
>        prog_data->uses_basevertex = true;
> @@ -2853,12 +2858,9 @@ brw_compile_vs(const struct brw_compiler *compiler,
> void *log_data,
>         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
>        prog_data->uses_instanceid = true;
>
> -   /* gl_DrawID has its very own vec4 */
>     if (shader->info.system_values_read &
> -       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
> +       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
>        prog_data->uses_drawid = true;
> -      nr_attribute_slots++;
> -   }
>
>     /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB
> Entry
>      * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically,
> in
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index c65a22c38bb..6758b34f47e 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -898,20 +898,36 @@ struct brw_context
>        } params;
>
>        /**
> -       * Buffer and offset used for GL_ARB_shader_draw_parameters
> -       * (for now, only gl_BaseVertex).
> +       * Buffer and offset used for GL_ARB_shader_draw_parameters which
> will
> +       * point to the indirect buffer for indirect draw calls.
>         */
>        struct brw_bo *draw_params_bo;
>        uint32_t draw_params_offset;
>
> +      struct {
> +         /**
> +          * The value of gl_DrawID for the current _mesa_prim. This
> always comes
> +          * in from it's own vertex buffer since it's not part of the
> indirect
> +          * draw parameters.
> +          */
> +         int gl_drawid;
> +
> +         /**
> +          * The value of gl_BaseVertex for the current _mesa_prim.
> Although
> +          * gl_BaseVertex is part of the indirect buffer for indexed draw
> calls,
> +          * that is not longer the case for non-indexed draw calls, where
> it must
> +          * be zero, so we store it in a different buffer.
> +          */
> +         int gl_basevertex;
> +      } derived_params;
> +
>        /**
> -       * The value of gl_DrawID for the current _mesa_prim. This always
> comes
> -       * in from it's own vertex buffer since it's not part of the
> indirect
> -       * draw parameters.
> +       * Buffer and offset used for GL_ARB_shader_draw_parameters which
> contains
> +       * parameters that are not present in the indirect buffer. They
> will go in
> +       * their own vertex element.
>         */
> -      int gl_drawid;
> -      struct brw_bo *draw_id_bo;
> -      uint32_t draw_id_offset;
> +      struct brw_bo *derived_draw_params_bo;
> +      uint32_t derived_draw_params_offset;
>
>        /**
>         * Pointer to the the buffer storing the indirect draw parameters.
> It
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index f51f083178e..e9ec8d585d2 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -241,6 +241,14 @@ brw_emit_prim(struct brw_context *brw,
>                                 prim->indirect_offset + 12);
>           brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>                                 prim->indirect_offset + 16);
> +
> +         /* Store the gl_BaseVertex value in its vertex buffer */
> +         if (brw->draw.derived_draw_params_bo != NULL) {
> +            brw_store_register_mem32(brw, brw->draw.derived_draw_params_
> bo,
> +                                     GEN7_3DPRIM_BASE_VERTEX,
> +                                     brw->draw.derived_draw_params_offset
> + 4);
> +            brw_emit_mi_flush(brw);
> +         }
>

Oh, boy, this is tricky... First of all, it's a bit of a bummer that we
can't just load the indirect buffer again for this.  Not too much to do
about it, I guess.

Second, there be very scary dragons here.  It turns out that, at least on
Haswell (and possibly other platforms), reading from state registers while
rendering is in-flight can lead to GPU hangs.  Yes, I said "reading".  We
found this out the hard way while working on Vulkan indirect clear colors.
The better thing to do here would be to use GPRs when available (I think
they're safe but I'm not sure) or to do a MI_COPY_MEM_MEM which, of course,
is only available on gen8+.  On Ivy Bridge (and haswell if we're going to
do a store_register_mem from a state register), we need to do a mi_flush
*before* the store as well.


>        } else {
>           brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>                                 prim->indirect_offset + 12);
> @@ -815,7 +823,7 @@ brw_draw_single_prim(struct gl_context *ctx,
>     }
>
>     /* Determine if we need to flag BRW_NEW_VERTICES for updating the
> -    * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
> +    * firstvertex or gl_BaseInstanceARB values. For indirect draw, we
>      * always flag if the shader uses one of the values. For direct draws,
>      * we only flag if the values change.
>      */
> @@ -825,16 +833,12 @@ brw_draw_single_prim(struct gl_context *ctx,
>     const struct brw_vs_prog_data *vs_prog_data =
>        brw_vs_prog_data(brw->vs.base.prog_data);
>     if (prim_id > 0) {
> -      const bool uses_firstvertex =
> -         vs_prog_data->uses_basevertex ||
> -         vs_prog_data->uses_firstvertex;
> -
>        const bool uses_draw_parameters =
> -         uses_firstvertex ||
> +         vs_prog_data->uses_firstvertex ||
>           vs_prog_data->uses_baseinstance;
>
>        if ((uses_draw_parameters && prim->is_indirect) ||
> -          (uses_firstvertex &&
> +          (vs_prog_data->uses_firstvertex &&
>             brw->draw.params.firstvertex != new_firstvertex) ||
>            (vs_prog_data->uses_baseinstance &&
>             brw->draw.params.gl_baseinstance != new_baseinstance))
> @@ -861,17 +865,28 @@ brw_draw_single_prim(struct gl_context *ctx,
>     }
>
>     /* gl_DrawID always needs its own vertex buffer since it's not part of
> -    * the indirect parameter buffer. If the program uses gl_DrawID we need
> -    * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
> -    * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
> -    * the loop.
> +    * the indirect parameter buffer. This is the same for gl_BaseVertex,
> which
> +    * is not part of the indirect parameter buffer for non-indexed draw
> calls.
> +    * If the program uses gl_DrawID or, uses gl_BaseVertex and it is an
> indirect
> +    * draw call or the value has changed, we need to flag
> BRW_NEW_VERTICES.
> +    * For the first iteration, we don't have valid vs_prog_data, but we
> always
> +    * flag BRW_NEW_VERTICES before the loop.
>      */
> -   brw->draw.gl_drawid = prim->draw_id;
> -   brw_bo_unreference(brw->draw.draw_id_bo);
> -   brw->draw.draw_id_bo = NULL;
> -   if (prim_id > 0 && vs_prog_data->uses_drawid)
> +   const int new_basevertex = prim->indexed ? prim->basevertex : 0;
> +   if (prim_id > 0 &&
> +       (vs_prog_data->uses_drawid ||
> +        (vs_prog_data->uses_basevertex &&
> +         (prim->is_indirect ||
> +          brw->draw.derived_params.gl_basevertex != new_basevertex))))
>        brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>
> +   brw->draw.derived_params.gl_drawid = prim->draw_id;
> +   brw->draw.derived_params.gl_basevertex = new_basevertex;
> +
> +   brw_bo_unreference(brw->draw.derived_draw_params_bo);
> +   brw->draw.derived_draw_params_bo = NULL;
> +   brw->draw.derived_draw_params_offset = 0;
> +
>     if (devinfo->gen < 6)
>        brw_set_prim(brw, prim);
>     else
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 7573f780f23..54ba951d6fd 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -704,11 +704,11 @@ brw_prepare_shader_draw_parameters(struct
> brw_context *brw)
>     const struct brw_vs_prog_data *vs_prog_data =
>        brw_vs_prog_data(brw->vs.base.prog_data);
>
> -   const bool uses_firstvertex =
> -      vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
> +   const bool uses_derived_draw_params =
> +      vs_prog_data->uses_basevertex || vs_prog_data->uses_drawid;
>
>     /* For non-indirect draws, upload the shader draw parameters */
> -   if ((uses_firstvertex || vs_prog_data->uses_baseinstance) &&
> +   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance)
> &&
>         brw->draw.draw_params_bo == NULL) {
>        brw_upload_data(&brw->upload,
>                        &brw->draw.params, sizeof(brw->draw.params), 4,
> @@ -716,11 +716,11 @@ brw_prepare_shader_draw_parameters(struct
> brw_context *brw)
>                        &brw->draw.draw_params_offset);
>     }
>
> -   if (vs_prog_data->uses_drawid) {
> +   if (uses_derived_draw_params) {
>        brw_upload_data(&brw->upload,
> -                      &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid),
> 4,
> -                      &brw->draw.draw_id_bo,
> -                      &brw->draw.draw_id_offset);
> +                      &brw->draw.derived_params, sizeof(brw->draw.derived_params),
> 4,
> +                      &brw->draw.derived_draw_params_bo,
> +                      &brw->draw.derived_draw_params_offset);
>     }
>  }
>
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 1a32c60ae34..8323446b4ac 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -539,16 +539,14 @@ genX(emit_vertices)(struct brw_context *brw)
>     }
>  #endif
>
> -   const bool uses_firstvertex =
> -      vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
> -
> -   const bool needs_sgvs_element = (uses_firstvertex ||
> +   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
>                                      vs_prog_data->uses_baseinstance ||
>                                      vs_prog_data->uses_instanceid ||
>                                      vs_prog_data->uses_vertexid);
>
>     unsigned nr_elements =
> -      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
> +      brw->vb.nr_enabled + needs_sgvs_element +
> +      (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
>
>  #if GEN_GEN < 8
>     /* If any of the formats of vb.enabled needs more that one upload, we
> need
> @@ -589,10 +587,15 @@ genX(emit_vertices)(struct brw_context *brw)
>
>     /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS
> packets. */
>     const bool uses_draw_params =
> -      uses_firstvertex ||
> +      vs_prog_data->uses_firstvertex ||
>        vs_prog_data->uses_baseinstance;
> +
> +   const bool uses_derived_draw_params =
> +      vs_prog_data->uses_drawid ||
> +      vs_prog_data->uses_basevertex;
> +
>     const unsigned nr_buffers = brw->vb.nr_buffers +
> -      uses_draw_params + vs_prog_data->uses_drawid;
> +      uses_draw_params + uses_derived_draw_params;
>
>     if (nr_buffers) {
>        assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
> @@ -626,11 +629,11 @@ genX(emit_vertices)(struct brw_context *brw)
>                                               0 /* step rate */);
>        }
>
> -      if (vs_prog_data->uses_drawid) {
> +      if (uses_derived_draw_params) {
>           dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers
> + 1,
> -                                             brw->draw.draw_id_bo,
> -                                             brw->draw.draw_id_offset,
> -                                             brw->draw.draw_id_bo->size,
> +
>  brw->draw.derived_draw_params_bo,
> +
>  brw->draw.derived_draw_params_offset,
> +
>  brw->draw.derived_draw_params_bo->size,
>                                               0 /* stride */,
>                                               0 /* step rate */);
>        }
> @@ -772,7 +775,7 @@ genX(emit_vertices)(struct brw_context *brw)
>        };
>
>  #if GEN_GEN >= 8
> -      if (uses_firstvertex ||
> +      if (vs_prog_data->uses_firstvertex ||
>            vs_prog_data->uses_baseinstance) {
>           elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>           elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
> @@ -782,11 +785,10 @@ genX(emit_vertices)(struct brw_context *brw)
>  #else
>        elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>        elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
> -      if (uses_firstvertex)
> +      if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance)
> {
>           elem_state.Component0Control = VFCOMP_STORE_SRC;
> -
> -      if (vs_prog_data->uses_baseinstance)
>           elem_state.Component1Control = VFCOMP_STORE_SRC;
> +      }
>
>        if (vs_prog_data->uses_vertexid)
>           elem_state.Component2Control = VFCOMP_STORE_VID;
> @@ -799,13 +801,13 @@ genX(emit_vertices)(struct brw_context *brw)
>        dw += GENX(VERTEX_ELEMENT_STATE_length);
>     }
>
> -   if (vs_prog_data->uses_drawid) {
> +   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
>        struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
>           .Valid = true,
>           .VertexBufferIndex = brw->vb.nr_buffers + 1,
> -         .SourceElementFormat = ISL_FORMAT_R32_UINT,
> +         .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
>           .Component0Control = VFCOMP_STORE_SRC,
> -         .Component1Control = VFCOMP_STORE_0,
> +         .Component1Control = VFCOMP_STORE_SRC,
>           .Component2Control = VFCOMP_STORE_0,
>           .Component3Control = VFCOMP_STORE_0,
>  #if GEN_GEN < 5
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180406/4c025746/attachment-0001.html>


More information about the mesa-dev mailing list