[Mesa-dev] [PATCH v2 3/7] intel: emit first_vertex and reorder the VE' components

Antia Puentes apuentes at igalia.com
Mon Dec 4 20:12:49 UTC 2017


The new order is:
* VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
* VE 2: <Draw ID, BaseVertex, 0, 0>

Previously it was:
* VE 1: <BaseVertex, BaseInstance, VertexID, InstanceID>
* VE 2: <Draw ID, 0, 0, 0>

The gl_BaseVertex is in a new location now, and firstvertex occupies
the old gl_BaseVertex place. This way we can keep pointing to the
indirect buffer for indirect draw calls.

Reviewed-by: Neil Roberts <nroberts at igalia.com>
---
 src/intel/compiler/brw_nir.c                  | 11 +++++---
 src/intel/compiler/brw_vec4.cpp               | 13 +++++----
 src/mesa/drivers/dri/i965/brw_context.h       | 36 ++++++++++++++++++-------
 src/mesa/drivers/dri/i965/brw_draw.c          | 26 +++++++++++-------
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 13 ++++-----
 src/mesa/drivers/dri/i965/genX_state_upload.c | 39 +++++++++++++++------------
 6 files changed, 88 insertions(+), 50 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 8f3f77f89ae..f702f5b8534 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -240,7 +240,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
     */
    const bool has_sgvs =
       nir->info.system_values_read &
-      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
        BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
        BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
@@ -262,6 +262,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
             switch (intrin->intrinsic) {
+            case nir_intrinsic_load_first_vertex:
             case nir_intrinsic_load_base_vertex:
             case nir_intrinsic_load_base_instance:
             case nir_intrinsic_load_vertex_id_zero_base:
@@ -279,7 +280,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
 
                nir_intrinsic_set_base(load, num_inputs);
                switch (intrin->intrinsic) {
-               case nir_intrinsic_load_base_vertex:
+               case nir_intrinsic_load_first_vertex:
                   nir_intrinsic_set_component(load, 0);
                   break;
                case nir_intrinsic_load_base_instance:
@@ -292,11 +293,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
                   nir_intrinsic_set_component(load, 3);
                   break;
                case nir_intrinsic_load_draw_id:
+               case nir_intrinsic_load_base_vertex:
                   /* gl_DrawID is stored right after gl_VertexID and friends
                    * if any of them exist.
                    */
                   nir_intrinsic_set_base(load, num_inputs + has_sgvs);
-                  nir_intrinsic_set_component(load, 0);
+                  if (intrin->intrinsic ==  nir_intrinsic_load_draw_id)
+                     nir_intrinsic_set_component(load, 0);
+                  else
+                     nir_intrinsic_set_component(load, 1);
                   break;
                default:
                   unreachable("Invalid system value intrinsic");
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 14f930e0264..70a197a9fa0 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2789,13 +2789,19 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
     * incoming vertex attribute.  So, add an extra slot.
     */
    if (shader->info.system_values_read &
-       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
         BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
         BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
       nr_attribute_slots++;
    }
 
+   if (shader->info.system_values_read &
+       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
+      nr_attribute_slots++;
+   }
+
    if (shader->info.system_values_read &
        BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
       prog_data->uses_basevertex = true;
@@ -2816,12 +2822,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
       prog_data->uses_instanceid = true;
 
-   /* gl_DrawID has its very own vec4 */
    if (shader->info.system_values_read &
-       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
+       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
       prog_data->uses_drawid = true;
-      nr_attribute_slots++;
-   }
 
    /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
     * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 06704838061..c6d176bc243 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -843,28 +843,44 @@ struct brw_context
 
    struct {
       struct {
-         /** The value of gl_BaseVertex for the current _mesa_prim. */
-         int gl_basevertex;
+         /**
+          * Either the value of gl_BaseVertex for indexed draw calls or the
+          * value of the argument <first> for non-indexed draw calls, for the
+          * current _mesa_prim.
+          */
+         int firstvertex;
 
          /** The value of gl_BaseInstance for the current _mesa_prim. */
          int gl_baseinstance;
       } params;
 
       /**
-       * Buffer and offset used for GL_ARB_shader_draw_parameters
-       * (for now, only gl_BaseVertex).
+       * Buffer and offset used for GL_ARB_shader_draw_parameters. For indirect
+       * draw calls it will point to the indirect buffer.
        */
       struct brw_bo *draw_params_bo;
       uint32_t draw_params_offset;
 
+      struct {
+         /** The value of gl_DrawID for the current _mesa_prim. */
+         int gl_drawid;
+
+         /**
+          * The value of gl_BaseVertex for the current _mesa_prim. It must be
+          * zero for non-indexed calls.
+          */
+         int gl_basevertex;
+      } derived_params;
+
       /**
-       * The value of gl_DrawID for the current _mesa_prim. This always comes
-       * in from it's own vertex buffer since it's not part of the indirect
-       * draw parameters.
+       * Buffer and offset also used for GL_ARB_shader_draw_parameters. As they
+       * are not part of the indirect buffer they will go in their own vertex
+       * element. Note that although gl_BaseVertex is part of the indirect
+       * buffer for indexed draw calls, that is not longer the case for
+       * non-indexed.
        */
-      int gl_drawid;
-      struct brw_bo *draw_id_bo;
-      uint32_t draw_id_offset;
+      struct brw_bo *derived_draw_params_bo;
+      uint32_t derived_draw_params_offset;
 
       /**
        * Pointer to the the buffer storing the indirect draw parameters. It
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e8..18d26cfafca 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -760,25 +760,25 @@ brw_draw_single_prim(struct gl_context *ctx,
     * always flag if the shader uses one of the values. For direct draws,
     * we only flag if the values change.
     */
-   const int new_basevertex =
+   const int new_firstvertex =
       prim->indexed ? prim->basevertex : prim->start;
    const int new_baseinstance = prim->base_instance;
    const struct brw_vs_prog_data *vs_prog_data =
       brw_vs_prog_data(brw->vs.base.prog_data);
    if (prim_id > 0) {
       const bool uses_draw_parameters =
-         vs_prog_data->uses_basevertex ||
+         vs_prog_data->uses_firstvertex ||
          vs_prog_data->uses_baseinstance;
 
       if ((uses_draw_parameters && prim->is_indirect) ||
-          (vs_prog_data->uses_basevertex &&
-           brw->draw.params.gl_basevertex != new_basevertex) ||
+          (vs_prog_data->uses_firstvertex &&
+           brw->draw.params.firstvertex != new_firstvertex) ||
           (vs_prog_data->uses_baseinstance &&
            brw->draw.params.gl_baseinstance != new_baseinstance))
          brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
    }
 
-   brw->draw.params.gl_basevertex = new_basevertex;
+   brw->draw.params.firstvertex = new_firstvertex;
    brw->draw.params.gl_baseinstance = new_baseinstance;
    brw_bo_unreference(brw->draw.draw_params_bo);
 
@@ -803,12 +803,20 @@ brw_draw_single_prim(struct gl_context *ctx,
     * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
     * the loop.
     */
-   brw->draw.gl_drawid = prim->draw_id;
-   brw_bo_unreference(brw->draw.draw_id_bo);
-   brw->draw.draw_id_bo = NULL;
-   if (prim_id > 0 && vs_prog_data->uses_drawid)
+   const int new_basevertex = prim->indexed ? prim->basevertex : prim->start;
+
+   if (prim_id > 0 &&
+       (vs_prog_data->uses_drawid ||
+        (vs_prog_data->uses_basevertex &&
+         brw->draw.derived_params.gl_basevertex != new_basevertex)))
       brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
 
+   brw->draw.derived_params.gl_drawid = prim->draw_id;
+   brw->draw.derived_params.gl_basevertex = new_basevertex;
+
+   brw_bo_unreference(brw->draw.derived_draw_params_bo);
+   brw->draw.derived_draw_params_bo = NULL;
+
    if (devinfo->gen < 6)
       brw_set_prim(brw, prim);
    else
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 9b81999ea05..da3a0569ccb 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -696,18 +696,19 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
    const struct brw_vs_prog_data *vs_prog_data =
       brw_vs_prog_data(brw->vs.base.prog_data);
 
-   /* For non-indirect draws, upload gl_BaseVertex. */
-   if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) &&
+   /* For non-indirect draws, upload the parameters. */
+   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
        brw->draw.draw_params_bo == NULL) {
       intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
 			&brw->draw.draw_params_bo,
                         &brw->draw.draw_params_offset);
    }
 
-   if (vs_prog_data->uses_drawid) {
-      intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
-                        &brw->draw.draw_id_bo,
-                        &brw->draw.draw_id_offset);
+   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
+      intel_upload_data(brw, &brw->draw.derived_params,
+                        sizeof(brw->draw.derived_params), 4,
+                        &brw->draw.derived_draw_params_bo,
+                        &brw->draw.derived_draw_params_offset);
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index dcf497c9183..db68d89aa66 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -498,19 +498,20 @@ genX(emit_vertices)(struct brw_context *brw)
     * can't be inserted past that so we need a dummy element to ensure that
     * the edge flag is the last one.
     */
-   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
+   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
                                     vs_prog_data->uses_baseinstance ||
                                     ((vs_prog_data->uses_instanceid ||
                                       vs_prog_data->uses_vertexid)
                                      && uses_edge_flag));
 #else
-   const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
+   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
                                     vs_prog_data->uses_baseinstance ||
                                     vs_prog_data->uses_instanceid ||
                                     vs_prog_data->uses_vertexid);
 #endif
    unsigned nr_elements =
-      brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
+      brw->vb.nr_enabled + needs_sgvs_element +
+      (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
 
 #if GEN_GEN < 8
    /* If any of the formats of vb.enabled needs more that one upload, we need
@@ -549,10 +550,15 @@ genX(emit_vertices)(struct brw_context *brw)
 
    /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
    const bool uses_draw_params =
-      vs_prog_data->uses_basevertex ||
+      vs_prog_data->uses_firstvertex ||
       vs_prog_data->uses_baseinstance;
+
+   const bool uses_derived_draw_params =
+      vs_prog_data->uses_basevertex ||
+      vs_prog_data->uses_drawid;
+
    const unsigned nr_buffers = brw->vb.nr_buffers +
-      uses_draw_params + vs_prog_data->uses_drawid;
+      uses_draw_params + uses_derived_draw_params;
 
    if (nr_buffers) {
       assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
@@ -586,11 +592,11 @@ genX(emit_vertices)(struct brw_context *brw)
                                              0 /* step rate */);
       }
 
-      if (vs_prog_data->uses_drawid) {
+      if (uses_derived_draw_params) {
          dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
-                                             brw->draw.draw_id_bo,
-                                             brw->draw.draw_id_offset,
-                                             brw->draw.draw_id_bo->size,
+                                             brw->draw.derived_draw_params_bo,
+                                             brw->draw.derived_draw_params_offset,
+                                             brw->draw.derived_draw_params_bo->size,
                                              0 /* stride */,
                                              0 /* step rate */);
       }
@@ -727,7 +733,7 @@ genX(emit_vertices)(struct brw_context *brw)
       };
 
 #if GEN_GEN >= 8
-      if (vs_prog_data->uses_basevertex ||
+      if (vs_prog_data->uses_firstvertex ||
           vs_prog_data->uses_baseinstance) {
          elem_state.VertexBufferIndex = brw->vb.nr_buffers;
          elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
@@ -737,13 +743,12 @@ genX(emit_vertices)(struct brw_context *brw)
 #else
       elem_state.VertexBufferIndex = brw->vb.nr_buffers;
       elem_state.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT;
-      if (vs_prog_data->uses_basevertex)
+      if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) {
          elem_state.Component0Control = VFCOMP_STORE_SRC;
-
-      if (vs_prog_data->uses_baseinstance)
          elem_state.Component1Control = VFCOMP_STORE_SRC;
+      }
 
-      if (vs_prog_data->uses_vertexid)
+      if (vs_prog_data->uses_firstvertex)
          elem_state.Component2Control = VFCOMP_STORE_VID;
 
       if (vs_prog_data->uses_instanceid)
@@ -754,13 +759,13 @@ genX(emit_vertices)(struct brw_context *brw)
       dw += GENX(VERTEX_ELEMENT_STATE_length);
    }
 
-   if (vs_prog_data->uses_drawid) {
+   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
       struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
          .Valid = true,
          .VertexBufferIndex = brw->vb.nr_buffers + 1,
-         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT,
+         .SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT,
          .Component0Control = VFCOMP_STORE_SRC,
-         .Component1Control = VFCOMP_STORE_0,
+         .Component1Control = VFCOMP_STORE_SRC,
          .Component2Control = VFCOMP_STORE_0,
          .Component3Control = VFCOMP_STORE_0,
 #if GEN_GEN < 5
-- 
2.14.1



More information about the mesa-dev mailing list