[Mesa-dev] [PATCH 2/2] i965: Separate gl_InstanceID and gl_VertexID uploading.

Ian Romanick idr at freedesktop.org
Fri Sep 12 08:37:11 PDT 2014


On 09/10/2014 03:41 PM, Kenneth Graunke wrote:
> We always uploaded them together, mostly out of laziness - both required
> an additional vertex element.  However, gl_VertexID now also requires an
> additional vertex buffer for storing gl_BaseVertex; for non-indirect
> draws this also means uploading (a small amount of) data.  This is extra
> overhead we don't need if the shader only uses gl_InstanceID.
> 
> In particular, our clear shaders currently use gl_InstanceID for doing
> layered clears, but don't need gl_VertexID.
> 
> XXX: Needs testing on Broadwell before pushing.

Has this happened yet?

> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> Cc: "10.3" <mesa-stable at lists.freedesktop.org>

The rest of the patch looks okay, but I don't know that I'm the best judge.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

> ---
>  src/mesa/drivers/dri/i965/brw_context.h           |  1 +
>  src/mesa/drivers/dri/i965/brw_draw_upload.c       | 29 +++++++++++++++++------
>  src/mesa/drivers/dri/i965/brw_vec4.cpp            |  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  4 +++-
>  src/mesa/drivers/dri/i965/gen8_draw_upload.c      | 22 +++++++++++------
>  5 files changed, 42 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 39cb856..5830aa99 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -553,6 +553,7 @@ struct brw_vs_prog_data {
>     GLbitfield64 inputs_read;
>  
>     bool uses_vertexid;
> +   bool uses_instanceid;
>  };
>  
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index d59ca8b..2162624 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -671,14 +671,16 @@ emit_vertex_buffer_state(struct brw_context *brw,
>  
>  static void brw_emit_vertices(struct brw_context *brw)
>  {
> -   GLuint i, nr_elements;
> +   GLuint i;
>  
>     brw_prepare_vertices(brw);
>     brw_prepare_shader_draw_parameters(brw);
>  
>     brw_emit_query_begin(brw);
>  
> -   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
> +   unsigned nr_elements = brw->vb.nr_enabled;
> +   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
> +      ++nr_elements;
>  
>     /* If the VS doesn't read any inputs (calculating vertex position from
>      * a state variable for some reason, for example), emit a single pad
> @@ -824,13 +826,26 @@ static void brw_emit_vertices(struct brw_context *brw)
>                  (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
>     }
>  
> -   if (brw->vs.prog_data->uses_vertexid) {
> +   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
>        uint32_t dw0 = 0, dw1 = 0;
> +      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
> +      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
> +      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
> +      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
>  
> -      dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
> -            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
> -            (BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_2_SHIFT) |
> -            (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_3_SHIFT);
> +      if (brw->vs.prog_data->uses_vertexid) {
> +         comp0 = BRW_VE1_COMPONENT_STORE_SRC;
> +         comp2 = BRW_VE1_COMPONENT_STORE_VID;
> +      }
> +
> +      if (brw->vs.prog_data->uses_instanceid) {
> +         comp3 = BRW_VE1_COMPONENT_STORE_IID;
> +      }
> +
> +      dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
> +            (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
> +            (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
> +            (comp3 << BRW_VE1_COMPONENT_3_SHIFT);
>  
>        if (brw->gen >= 6) {
>           dw0 |= GEN6_VE0_VALID |
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index cda097f..0f13c0d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1528,7 +1528,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
>      * don't represent it with a flag in inputs_read, so we call it
>      * VERT_ATTRIB_MAX.
>      */
> -   if (vs_prog_data->uses_vertexid) {
> +   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
>        attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
>        nr_attributes++;
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> index 667ed68..72b6ef0 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
> @@ -151,18 +151,20 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
>      * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
>      */
>     dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
> -   vs_prog_data->uses_vertexid = true;
>  
>     switch (ir->data.location) {
>     case SYSTEM_VALUE_BASE_VERTEX:
>        reg->writemask = WRITEMASK_X;
> +      vs_prog_data->uses_vertexid = true;
>        break;
>     case SYSTEM_VALUE_VERTEX_ID:
>     case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
>        reg->writemask = WRITEMASK_Z;
> +      vs_prog_data->uses_vertexid = true;
>        break;
>     case SYSTEM_VALUE_INSTANCE_ID:
>        reg->writemask = WRITEMASK_W;
> +      vs_prog_data->uses_instanceid = true;
>        break;
>     default:
>        unreachable("not reached");
> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> index 7e4c1eb..8f0e515 100644
> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> @@ -43,7 +43,7 @@ gen8_emit_vertices(struct brw_context *brw)
>     brw_prepare_vertices(brw);
>     brw_prepare_shader_draw_parameters(brw);
>  
> -   if (brw->vs.prog_data->uses_vertexid) {
> +   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
>        unsigned vue = brw->vb.nr_enabled;
>  
>        WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
> @@ -53,14 +53,22 @@ gen8_emit_vertices(struct brw_context *brw)
>                  "Trying to insert VID/IID past 33rd vertex element, "
>                  "need to reorder the vertex attrbutes.");
>  
> +      unsigned dw1 = 0;
> +      if (brw->vs.prog_data->uses_vertexid) {
> +         dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID |
> +                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |  /* .z channel */
> +                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT);
> +      }
> +
> +      if (brw->vs.prog_data->uses_instanceid) {
> +         dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID |
> +                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
> +                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT);
> +      }
> +
>        BEGIN_BATCH(2);
>        OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
> -      OUT_BATCH(GEN8_SGVS_ENABLE_VERTEX_ID |
> -                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |   /* .z channel */
> -                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT) |
> -                GEN8_SGVS_ENABLE_INSTANCE_ID |
> -                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
> -                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT));
> +      OUT_BATCH(dw1);
>        ADVANCE_BATCH();
>  
>        BEGIN_BATCH(3);
> 



More information about the mesa-dev mailing list