[Mesa-dev] [PATCH v3 8/8] i965: gl_BaseVertex must be zero for non-indexed draw calls

Antia Puentes apuentes at igalia.com
Thu Jan 25 18:15:44 UTC 2018


We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID vertex
element. The previous Vertex Elements order was:

  * VE 1: <BaseVertex/firstvertex, BaseInstance, VertexID, InstanceID>
  * VE 2: <Draw ID, 0, 0, 0>

and now it is:

  * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
  * VE 2: <Draw ID, BaseVertex, 0, 0>

To move the BaseVertex keeping VE1 as it is, allows to keep pointing the vertex
buffer associated to VE 1 to the indirect buffer for indirect draw calls.

>From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:

  "gl_BaseVertex holds the integer value passed to the baseVertex parameter to the
  command that resulted in the current shader invocation. In the case where the
  command has no baseVertex parameter, the value of gl_BaseVertex is zero."

Fixes CTS tests:

  * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
  * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstancedParameters
  * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
  * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysIndirectParameters
  * KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirectCountParameters

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
---
 src/intel/compiler/brw_nir.c                  | 14 +++++----
 src/intel/compiler/brw_vec4.cpp               | 14 +++++----
 src/mesa/drivers/dri/i965/brw_context.h       | 32 ++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_draw.c          | 45 ++++++++++++++++++---------
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 24 ++++++++------
 src/mesa/drivers/dri/i965/genX_state_upload.c | 38 +++++++++++-----------
 6 files changed, 105 insertions(+), 62 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 34b1e44adf0..c10fa73f4fc 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
     */
    const bool has_sgvs =
       nir->info.system_values_read &
-      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
-       BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
+      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
        BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
        BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
@@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
 
                nir_intrinsic_set_base(load, num_inputs);
                switch (intrin->intrinsic) {
-               case nir_intrinsic_load_base_vertex:
                case nir_intrinsic_load_first_vertex:
                   nir_intrinsic_set_component(load, 0);
                   break;
@@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
                   nir_intrinsic_set_component(load, 3);
                   break;
                case nir_intrinsic_load_draw_id:
-                  /* gl_DrawID is stored right after gl_VertexID and friends
-                   * if any of them exist.
+               case nir_intrinsic_load_base_vertex:
+                  /* gl_DrawID and gl_BaseVertex are stored right after
+                     gl_VertexID and friends if any of them exist.
                    */
                   nir_intrinsic_set_base(load, num_inputs + has_sgvs);
-                  nir_intrinsic_set_component(load, 0);
+                  if (intrin->intrinsic == nir_intrinsic_load_draw_id)
+                     nir_intrinsic_set_component(load, 0);
+                  else
+                     nir_intrinsic_set_component(load, 1);
                   break;
                default:
                   unreachable("Invalid system value intrinsic");
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 06c79630119..3b4b3c01b57 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2787,14 +2787,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
     * incoming vertex attribute.  So, add an extra slot.
     */
    if (shader->info.system_values_read &
-       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
-        BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
+       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
         BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
         BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
       nr_attribute_slots++;
    }
 
+   if (shader->info.system_values_read &
+       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
+      nr_attribute_slots++;
+   }
+
    if (shader->info.system_values_read &
        BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
       prog_data->uses_basevertex = true;
@@ -2815,12 +2820,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
       prog_data->uses_instanceid = true;
 
-   /* gl_DrawID has its very own vec4 */
    if (shader->info.system_values_read &
-       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
+       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
       prog_data->uses_drawid = true;
-      nr_attribute_slots++;
-   }
 
    /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
     * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0a20706567e..8091b0c1caf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -893,20 +893,36 @@ struct brw_context
       } params;
 
       /**
-       * Buffer and offset used for GL_ARB_shader_draw_parameters
-       * (for now, only gl_BaseVertex).
+       * Buffer and offset used for GL_ARB_shader_draw_parameters which will
+       * point to the indirect buffer for indirect draw calls.
        */
       struct brw_bo *draw_params_bo;
       uint32_t draw_params_offset;
 
+      struct {
+         /**
+          * The value of gl_DrawID for the current _mesa_prim. This always comes
+          * in from it's own vertex buffer since it's not part of the indirect
+          * draw parameters.
+          */
+         int gl_drawid;
+
+         /**
+          * The value of gl_BaseVertex for the current _mesa_prim. Although
+          * gl_BaseVertex is part of the indirect buffer for indexed draw calls,
+          * that is not longer the case for non-indexed draw calls, where it must
+          * be zero, so we store it in a different buffer.
+          */
+         int gl_basevertex;
+      } derived_params;
+
       /**
-       * The value of gl_DrawID for the current _mesa_prim. This always comes
-       * in from it's own vertex buffer since it's not part of the indirect
-       * draw parameters.
+       * Buffer and offset used for GL_ARB_shader_draw_parameters which contains
+       * parameters that are not present in the indirect buffer. They will go in
+       * their own vertex element.
        */
-      int gl_drawid;
-      struct brw_bo *draw_id_bo;
-      uint32_t draw_id_offset;
+      struct brw_bo *derived_draw_params_bo;
+      uint32_t derived_draw_params_offset;
 
       /**
        * Pointer to the the buffer storing the indirect draw parameters. It
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a1a5161fd35..a24b76f4cc7 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -240,6 +240,14 @@ brw_emit_prim(struct brw_context *brw,
                                prim->indirect_offset + 12);
          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
                                prim->indirect_offset + 16);
+
+         /* Store the gl_BaseVertex value in its vertex buffer */
+         if (brw->draw.derived_draw_params_bo != NULL) {
+            brw_store_register_mem32(brw, brw->draw.derived_draw_params_bo,
+                                     GEN7_3DPRIM_BASE_VERTEX,
+                                     brw->draw.derived_draw_params_offset + 4);
+            brw_emit_mi_flush(brw);
+         }
       } else {
          brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
                                prim->indirect_offset + 12);
@@ -812,7 +820,7 @@ brw_draw_single_prim(struct gl_context *ctx,
    }
 
    /* Determine if we need to flag BRW_NEW_VERTICES for updating the
-    * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
+    * firstvertex or gl_BaseInstanceARB values. For indirect draw, we
     * always flag if the shader uses one of the values. For direct draws,
     * we only flag if the values change.
     */
@@ -822,16 +830,12 @@ brw_draw_single_prim(struct gl_context *ctx,
    const struct brw_vs_prog_data *vs_prog_data =
       brw_vs_prog_data(brw->vs.base.prog_data);
    if (prim_id > 0) {
-      const bool uses_firstvertex =
-         vs_prog_data->uses_basevertex ||
-         vs_prog_data->uses_firstvertex;
-
       const bool uses_draw_parameters =
-         uses_firstvertex ||
+         vs_prog_data->uses_firstvertex ||
          vs_prog_data->uses_baseinstance;
 
       if ((uses_draw_parameters && prim->is_indirect) ||
-          (uses_firstvertex &&
+          (vs_prog_data->uses_firstvertex &&
            brw->draw.params.firstvertex != new_firstvertex) ||
           (vs_prog_data->uses_baseinstance &&
            brw->draw.params.gl_baseinstance != new_baseinstance))
@@ -858,17 +862,28 @@ brw_draw_single_prim(struct gl_context *ctx,
    }
 
    /* gl_DrawID always needs its own vertex buffer since it's not part of
-    * the indirect parameter buffer. If the program uses gl_DrawID we need
-    * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
-    * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
-    * the loop.
+    * the indirect parameter buffer. This is the same for gl_BaseVertex, which
+    * is not part of the indirect parameter buffer for non-indexed draw calls.
+    * If the program uses gl_DrawID or, uses gl_BaseVertex and it is an indirect
+    * draw call or the value has changed, we need to flag BRW_NEW_VERTICES.
+    * For the first iteration, we don't have valid vs_prog_data, but we always
+    * flag BRW_NEW_VERTICES before the loop.
     */
-   brw->draw.gl_drawid = prim->draw_id;
-   brw_bo_unreference(brw->draw.draw_id_bo);
-   brw->draw.draw_id_bo = NULL;
-   if (prim_id > 0 && vs_prog_data->uses_drawid)
+   const int new_basevertex = prim->indexed ? prim->basevertex : 0;
+   if (prim_id > 0 &&
+       (vs_prog_data->uses_drawid ||
+        (vs_prog_data->uses_basevertex &&
+         (prim->is_indirect ||
+          brw->draw.derived_params.gl_basevertex != new_basevertex))))
       brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
 
+   brw->draw.derived_params.gl_drawid = prim->draw_id;
+   brw->draw.derived_params.gl_basevertex = new_basevertex;
+
+   brw_bo_unreference(brw->draw.derived_draw_params_bo);
+   brw->draw.derived_draw_params_bo = NULL;
+   brw->draw.derived_draw_params_offset = 0;
+
    if (devinfo->gen < 6)
       brw_set_prim(brw, prim);
    else
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 52bf752bff9..f0ac79a7c05 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -696,21 +696,27 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
    const struct brw_vs_prog_data *vs_prog_data =
       brw_vs_prog_data(brw->vs.base.prog_data);
 
-   const bool uses_firstvertex =
-      vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
-
-   /* For non-indirect draws, upload the shader draw parameters */
-   if ((uses_firstvertex || vs_prog_data->uses_baseinstance) &&
+   /**
+    * For non-indirect draws, upload the shader draw parameters. For indirect
+    * draws there is no need, the buffer points to the indirect buffer.
+    */
+   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
        brw->draw.draw_params_bo == NULL) {
       intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
 			&brw->draw.draw_params_bo,
                         &brw->draw.draw_params_offset);
    }
 
-   if (vs_prog_data->uses_drawid) {
-      intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
-                        &brw->draw.draw_id_bo,
-                        &brw->draw.draw_id_offset);
+   /* Always upload the derived shader draw parameters. For indirect draws, if
+    * the draw call is indexed, the value of the gl_BaseVertex will be
+    * overwritten by the one contained in the indirect buffer, for non-indexed
+    * draws it should be zero, there is no need to overwrite.
+    */
+   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
+      intel_upload_data(brw, &brw->draw.derived_params,
+                        sizeof(brw->draw.derived_params), 4,
+                        &brw->draw.derived_draw_params_bo,
+                        &brw->draw.derived_draw_params_offset);
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 2f2a573fd05..2d45bab678d 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -488,16 +488,14 @@ genX(emit_vertices)(struct brw_context *brw)
    }
 #endif
 
-   const bool uses_firstvertex =
-      vs_prog_data->uses_basevertex || vs_prog_data->uses_firstvertex;
-
-   const bool needs_sgvs_element = (uses_firstvertex ||
+   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
                                     vs_prog_data->uses_baseinstance ||
                                     vs_prog_data->uses_instanceid ||
                                     vs_prog_data->uses_vertexid);
 
    unsigned nr_elements =
-      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
+      brw->vb.nr_enabled + needs_sgvs_element +
+      (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
 
 #if GEN_GEN < 8
    /* If any of the formats of vb.enabled needs more that one upload, we need
@@ -536,10 +534,15 @@ genX(emit_vertices)(struct brw_context *brw)
 
    /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
    const bool uses_draw_params =
-      uses_firstvertex ||
+      vs_prog_data->uses_firstvertex ||
       vs_prog_data->uses_baseinstance;
+
+   const bool uses_derived_draw_params =
+      vs_prog_data->uses_drawid ||
+      vs_prog_data->uses_basevertex;
+
    const unsigned nr_buffers = brw->vb.nr_buffers +
-      uses_draw_params + vs_prog_data->uses_drawid;
+      uses_draw_params + uses_derived_draw_params;
 
    if (nr_buffers) {
       assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
@@ -573,11 +576,11 @@ genX(emit_vertices)(struct brw_context *brw)
                                              0 /* step rate */);
       }
 
-      if (vs_prog_data->uses_drawid) {
+      if (uses_derived_draw_params) {
          dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
-                                             brw->draw.draw_id_bo,
-                                             brw->draw.draw_id_offset,
-                                             brw->draw.draw_id_bo->size,
+                                             brw->draw.derived_draw_params_bo,
+                                             brw->draw.derived_draw_params_offset,
+                                             brw->draw.derived_draw_params_bo->size,
                                              0 /* stride */,
                                              0 /* step rate */);
       }
@@ -714,7 +717,7 @@ genX(emit_vertices)(struct brw_context *brw)
       };
 
 #if GEN_GEN >= 8
-      if (uses_firstvertex ||
+      if (vs_prog_data->uses_firstvertex ||
           vs_prog_data->uses_baseinstance) {
          elem_state.VertexBufferIndex = brw->vb.nr_buffers;
          elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
@@ -724,11 +727,10 @@ genX(emit_vertices)(struct brw_context *brw)
 #else
       elem_state.VertexBufferIndex = brw->vb.nr_buffers;
       elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
-      if (uses_firstvertex)
+      if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) {
          elem_state.Component0Control = VFCOMP_STORE_SRC;
-
-      if (vs_prog_data->uses_baseinstance)
          elem_state.Component1Control = VFCOMP_STORE_SRC;
+      }
 
       if (vs_prog_data->uses_vertexid)
          elem_state.Component2Control = VFCOMP_STORE_VID;
@@ -741,13 +743,13 @@ genX(emit_vertices)(struct brw_context *brw)
       dw += GENX(VERTEX_ELEMENT_STATE_length);
    }
 
-   if (vs_prog_data->uses_drawid) {
+   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
       struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
          .Valid = true,
          .VertexBufferIndex = brw->vb.nr_buffers + 1,
-         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT,
+         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT,
          .Component0Control = VFCOMP_STORE_SRC,
-         .Component1Control = VFCOMP_STORE_0,
+         .Component1Control = VFCOMP_STORE_SRC,
          .Component2Control = VFCOMP_STORE_0,
          .Component3Control = VFCOMP_STORE_0,
 #if GEN_GEN < 5
-- 
2.14.1



More information about the mesa-dev mailing list