[Mesa-dev] [PATCH 3/3] i965: Implement "Static Vertex Count" geometry shader optimization.

Jordan Justen jordan.l.justen at intel.com
Thu Sep 24 23:59:04 PDT 2015


On 2015-09-24 22:20:40, Kenneth Graunke wrote:
> Broadwell's 3DSTATE_GS contains new "Static Output" and "Static Vertex
> Count" fields, which control a new optimization.  Normally, geometry
> shaders can output arbitrary numbers of vertices, which means that
> resource allocation has to be done on the fly.  However, if the number
> of vertices is statically known, the hardware can pre-allocate resources
> up front, which is more efficient.
> 
> Thanks to the new NIR GS intrinsics, this is easy.  We just call the
> function introduced in the previous commit to get the vertex count.
> If it obtains a count, we stop emitting the extra 32-bit "Vertex Count"
> field in the VUE, and instead fill out the 3DSTATE_GS fields.
> 
> Improves performance of Gl32GSCloth by 5.16347% +/- 0.12611% (n=91)
> on my Lenovo X250 laptop (Broadwell GT2) at 1024x768.
> 
> shader-db statistics for geometry shaders only:
> 
> total instructions in shared programs: 3227 -> 3207 (-0.62%)
> instructions in affected programs:     242 -> 222 (-8.26%)
> helped:                                10
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h           |  5 +++++
>  src/mesa/drivers/dri/i965/brw_defines.h           |  5 +++++
>  src/mesa/drivers/dri/i965/brw_gs.c                |  5 +++++
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 11 +++++++----
>  src/mesa/drivers/dri/i965/gen8_gs_state.c         |  6 ++++++
>  5 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index b05b8bd..5c31ba4 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -782,6 +782,11 @@ struct brw_gs_prog_data
>  
>     bool include_primitive_id;
>  
> +   /**
> +    * The number of vertices emitted, if constant - otherwise -1.
> +    */
> +   int static_vertex_count;
> +
>     int invocations;
>  
>     /**
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index f9d8d1b..6d94a6f 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1960,6 +1960,11 @@ enum brw_message_target {
>  # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK         INTEL_MASK(25, 16)
>  # define GEN6_GS_ENABLE                                        (1 << 15)
>  
> +/* Gen8+ DW8 */
> +# define GEN8_GS_STATIC_OUTPUT                          (1 << 30)
> +# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT              16
> +# define GEN8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
> +
>  /* Gen8+ DW9 */
>  # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
>  # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
> index 16ea684..111cf93 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -73,6 +73,11 @@ brw_codegen_gs_prog(struct brw_context *brw,
>     c.prog_data.base.base.nr_params = param_count;
>     c.prog_data.base.base.nr_image_params = gs->NumImages;
>  
> +   if (brw->gen >= 8) {
> +      c.prog_data.static_vertex_count =
> +         nir_gs_count_vertices(gp->program.Base.nir);
> +   }

It looks like static_vertex_count will always be 0 for gen < 8 which I
guess is an invalid value. It looks the code that uses it always
checks gen >= 8, so it should be fine.

Series Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>

>     if (brw->gen >= 7) {
>        if (gp->program.OutputType == GL_POINTS) {
>           /* When the output type is points, the geometry shader may output data
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index ff5bd98..acf0501 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -234,17 +234,20 @@ vec4_gs_visitor::emit_thread_end()
>      */
>     int base_mrf = 1;
>  
> +   bool static_vertex_count = c->prog_data.static_vertex_count != -1;
> +
>     current_annotation = "thread end";
>     dst_reg mrf_reg(MRF, base_mrf);
>     src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
>     vec4_instruction *inst = emit(MOV(mrf_reg, r0));
>     inst->force_writemask_all = true;
> -   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
> +   if (devinfo->gen < 8 || !static_vertex_count)
> +      emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
>     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
>        emit_shader_time_end();
>     inst = emit(GS_OPCODE_THREAD_END);
>     inst->base_mrf = base_mrf;
> -   inst->mlen = devinfo->gen >= 8 ? 2 : 1;
> +   inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
>  }
>  
>  
> @@ -284,7 +287,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
>     /* We need to increment Global Offset by 1 to make room for Broadwell's
>      * extra "Vertex Count" payload at the beginning of the URB entry.
>      */
> -   if (devinfo->gen >= 8)
> +   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
>        inst->offset++;
>  
>     inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> @@ -421,7 +424,7 @@ vec4_gs_visitor::emit_control_data_bits()
>      * URB entry.  Since this is an OWord message, Global Offset is counted
>      * in 128-bit units, so we must set it to 2.
>      */
> -   if (devinfo->gen >= 8)
> +   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
>        inst->offset = 2;
>     inst->base_mrf = base_mrf;
>     inst->mlen = 2;
> diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
> index 81bd3b2..4195f4c 100644
> --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
> @@ -90,6 +90,12 @@ gen8_upload_gs_state(struct brw_context *brw)
>        uint32_t dw8 = brw->gs.prog_data->control_data_format <<
>                       HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
>  
> +      if (brw->gs.prog_data->static_vertex_count != -1) {
> +         dw8 |= GEN8_GS_STATIC_OUTPUT |
> +                SET_FIELD(brw->gs.prog_data->static_vertex_count,
> +                          GEN8_GS_STATIC_VERTEX_COUNT);
> +      }
> +
>        if (brw->gen < 9)
>           dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
>        else
> -- 
> 2.5.3
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list