[Mesa-dev] [PATCH v3] i965: Implement GL_PRIMITIVES_GENERATED with non-zero streams.

Steven Newbury steve at snewbury.org.uk
Tue Jul 1 06:53:28 PDT 2014


On Thu, 2014-06-26 at 08:24 +0200, Iago Toral Quiroga wrote:
> So far we have been using CL_INVOCATION_COUNT to resolve this query 
> but this
> is no good with streams, as only stream 0 reaches the clipping 
> stage. Instead
> we will use SO_PRIM_STORAGE_NEEDED which can keep track of the 
> primitives sent
> to each individual stream.
>  
> Since SO_PRIM_STORAGE_NEEDED is related to the SOL stage and 
> according to
> ARB_transform_feedback3 we need to be able to query primitives 
> generated in
> each stream whether transform feedback is active or not what we do 
> is to
> enable the SOL unit even if transform feedback is not active but 
> disable all
> output buffers in that case. This effectively disables transform 
> feedback
> but permits activation of statistics enabling SO_PRIM_STORAGE_NEEDED 
> even
> when transform feedback is not active.
> ---
>  src/mesa/drivers/dri/i965/gen6_queryobj.c  | 13 +++++++++----
>  src/mesa/drivers/dri/i965/gen7_sol_state.c | 20 +++++++++++++++++---
>  2 files changed, 26 insertions(+), 7 deletions(-)
>  
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 0cb64ca..b4b1509 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -84,11 +84,16 @@ brw_store_register_mem64(struct brw_context *brw,
>  static void
>  write_primitives_generated(struct brw_context *brw,
> -                           drm_intel_bo *query_bo, int idx)
> +                           drm_intel_bo *query_bo, int stream, int 
> idx)
>  {
>     intel_batchbuffer_emit_mi_flush(brw);
> -   brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, 
> idx);
> +   if (brw->gen >= 7) {
> +      brw_store_register_mem64(brw, query_bo,
> +                               GEN7_SO_PRIM_STORAGE_NEEDED(stream), 
> idx);
> +   } else {
> +      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, 
> idx);
> +   }
>  }
>  static void
> @@ -240,7 +245,7 @@ gen6_begin_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>        break;
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 0);
> +      write_primitives_generated(brw, query->bo, 
> query->Base.Stream, 0);
>        break;
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> @@ -279,7 +284,7 @@ gen6_end_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>        break;
>     case GL_PRIMITIVES_GENERATED:
> -      write_primitives_generated(brw, query->bo, 1);
> +      write_primitives_generated(brw, query->bo, 
> query->Base.Stream, 1);
>        break;
>     case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
> b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 11b2e2e..d2c3ae3 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -223,14 +223,28 @@ upload_3dstate_streamout(struct brw_context 
> *brw, bool active,
>     uint32_t dw1 = 0, dw2 = 0;
>     int i;
> +   /*
> +    * From ARB_transform_feedback3:
> +    *
> +    *   "When a generated primitive query for a vertex stream is 
> active, the
> +    *   primitives-generated count is incremented every time a 
> primitive
> +    *   emitted to that stream reaches the Discarding Rasterization 
> stage
> +    *   (see Section 3.x) right before rasterization. This counter 
> is
> +    *   incremented whether or not transform feedback is active."
> +    *
> +    * Since we can only keep track of generated primitives for each 
> stream
> +    * in the SOL stage we need to make sure it is always active 
> even if
> +    * transform beedback is not. This way we can track primitives 
> generated
> +    * in each stream via SO_PRIMITIVE_STORAGE_NEEDED.
> +    */
> +   dw1 |= SO_FUNCTION_ENABLE;
> +   dw1 |= SO_STATISTICS_ENABLE;
> +
>     if (active) {
>        int urb_entry_read_offset = 0;
>        int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
>           urb_entry_read_offset;
> -      dw1 |= SO_FUNCTION_ENABLE;
> -      dw1 |= SO_STATISTICS_ENABLE;
> -
>        /* _NEW_LIGHT */
>        if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
>           dw1 |= SO_REORDER_TRAILING;
I'm getting an instant GPU lockup on Ivy Bridge (3840QM) which I've 
bisected to this commit in mesa/mesa git master.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140701/07dbfcf2/attachment.sig>


More information about the mesa-dev mailing list