[Mesa-dev] [PATCH v2 7/8] i965: Implement glDrawTransformFeedback().

Pohjolainen, Topi topi.pohjolainen at intel.com
Mon Oct 28 12:14:55 CET 2013


On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote:
> Implementing the GetTransformFeedbackVertexCount() driver hook allows
> the VBO module to call us with the right number of vertices.
> 
> The hardware doesn't directly count the number of vertices written by
> SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply
> by the number of vertices per primitive.
> 
> Unfortunately, counting the number of primitives generated is tricky:
> a program might pause a transform feedback operation, start a second one
> with a different object, then switch back and resume.  Both transform
> feedback operations share the SO_NUM_PRIMS_WRITTEN counters.
> 
> To work around this, we save the counter values at Begin, Pause, Resume,
> and End.  This "bookends" each section where transform feedback is
> active for the current object.  Adding up differences of pairs gives
> us the number of primitives generated.  (This is similar to what we
> do for occlusion queries on platforms without hardware contexts.)
> 
> v2: Fix missing parenthesis in assertion (caught by Eric Anholt).
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
> Reviewed-by: Eric Anholt <eric at anholt.net>
> ---
>  src/mesa/drivers/dri/i965/brw_context.c    |   2 +
>  src/mesa/drivers/dri/i965/brw_context.h    |  26 ++++
>  src/mesa/drivers/dri/i965/gen6_sol.c       |   1 +
>  src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 ++++++++++++++++++++++++++++-
>  4 files changed, 218 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 623273c..f4e04b6 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw,
>  
>     functions->NewTransformFeedback = brw_new_transform_feedback;
>     functions->DeleteTransformFeedback = brw_delete_transform_feedback;
> +   functions->GetTransformFeedbackVertexCount =
> +      brw_get_transform_feedback_vertex_count;
>     if (brw->gen >= 7) {
>        functions->BeginTransformFeedback = gen7_begin_transform_feedback;
>        functions->EndTransformFeedback = gen7_end_transform_feedback;
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 48aa4c1..c72bad1 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -880,11 +880,33 @@ struct intel_batchbuffer {
>     } saved;
>  };
>  
> +#define BRW_MAX_XFB_STREAMS 4
> +
>  struct brw_transform_feedback_object {
>     struct gl_transform_feedback_object base;
>  
>     /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
>     drm_intel_bo *offset_bo;
> +
> +   /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
> +   GLenum primitive_mode;
> +
> +   /**
> +    * Count of primitives generated during this transform feedback operation.
> +    *  @{
> +    */
> +   uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
> +   drm_intel_bo *prim_count_bo;
> +   unsigned prim_count_buffer_index; /**< in number of uint64_t units */
> +   /** @} */
> +
> +   /**
> +    * Number of vertices written between last Begin/EndTransformFeedback().
> +    *
> +    * Used to implement DrawTransformFeedback().
> +    */
> +   uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
> +   bool vertices_written_valid;
>  };
>  
>  /**
> @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
>  void
>  brw_end_transform_feedback(struct gl_context *ctx,
>                             struct gl_transform_feedback_object *obj);
> +GLsizei
> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> +                                        struct gl_transform_feedback_object *obj,
> +                                        GLuint stream);
>  
>  /* gen7_sol_state.c */
>  void
> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> index 2e6c86a..af5bed9 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx,
>     }
>  
>     drm_intel_bo_unreference(brw_obj->offset_bo);
> +   drm_intel_bo_unreference(brw_obj->prim_count_bo);
>  
>     free(brw_obj);
>  }
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 27421da..7cac8fe 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = {
>     .emit = upload_sol_state,
>  };
>  
> +/**
> + * Tally the number of primitives generated so far.
> + *
> + * The buffer contains a series of pairs:
> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> + *
> + * For each stream, we subtract the pair of values (end - start) to get the
> + * number of primitives generated during one section.  We accumulate these
> + * values, adding them up to get the total number of primitives generated.
> + */
> +static void
> +gen7_tally_prims_generated(struct brw_context *brw,
> +                           struct brw_transform_feedback_object *obj)
> +{
> +   /* If the current batch is still contributing to the number of primitives
> +    * generated, flush it now so the results will be present when mapped.
> +    */
> +   if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
> +      intel_batchbuffer_flush(brw);
> +
> +   if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
> +      perf_debug("Stalling for # of transform feedback primitives written.\n");
> +
> +   drm_intel_bo_map(obj->prim_count_bo, false);
> +   uint64_t *prim_counts = obj->prim_count_bo->virtual;
> +
> +   assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0);
> +   int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS);
> +
> +   for (int i = 0; i < pairs; i++) {
> +      for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) {
> +         obj->prims_generated[s] +=
> +            prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s];
> +      }
> +      prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */
> +   }
> +
> +   drm_intel_bo_unmap(obj->prim_count_bo);
> +
> +   /* Release the BO; we've already tallied all the data it contained. */
> +   drm_intel_bo_unreference(obj->prim_count_bo);
> +   obj->prim_count_bo = NULL;
> +}
> +
> +/**
> + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
> + * to prim_count_bo.
> + *
> + * If prim_count_bo is out of space, gather up the results so far into
> + * prims_generated[] and allocate a new buffer with enough space.
> + *
> + * The number of primitives written is used to compute the number of vertices
> + * written to a transform feedback stream, which is required to implement
> + * DrawTransformFeedback().
> + */
> +static void
> +gen7_save_primitives_written_counters(struct brw_context *brw,
> +                                struct brw_transform_feedback_object *obj)
> +{
> +   const int streams = BRW_MAX_XFB_STREAMS;
> +
> +   /* Check if there's enough space for a new pair of four values. */
> +   if (obj->prim_count_bo != NULL &&
> +       obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
> +      /* Gather up the results so far and release the BO. */
> +      gen7_tally_prims_generated(brw, obj);
> +   }
> +
> +   /* Allocate a new buffer if needed.  A page should be plenty. */
> +   if (obj->prim_count_bo == NULL) {
> +      obj->prim_count_buffer_index = 0;
> +      obj->prim_count_bo =
> +         drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);

I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer
object and this logic here to reallocate another. Couldn't we re-use the old
bo and simply let 'gen7_tally_prims_generated()' reset the
'prim_count_buffer_index' to zero?

Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards
itself from adding counters multiple times using the flag
'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()'
would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I
understand the logic all wrong?

> +   }
> +
> +   /* Flush any drawing so that the counters have the right values. */
> +   intel_batchbuffer_emit_mi_flush(brw);
> +
> +   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
> +   for (int i = 0; i < streams; i++) {
> +      brw_store_register_mem64(brw, obj->prim_count_bo,
> +                               GEN7_SO_NUM_PRIMS_WRITTEN(i),
> +                               obj->prim_count_buffer_index + i);
> +   }
> +
> +   /* Update where to write data to. */
> +   obj->prim_count_buffer_index += streams;
> +}
> +
> +/**
> + * Compute the number of vertices written by this transform feedback operation.
> + */
> +static void
> +brw_compute_xfb_vertices_written(struct brw_context *brw,
> +                                 struct brw_transform_feedback_object *obj)
> +{
> +   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
> +      return;
> +
> +   unsigned vertices_per_prim = 0;
> +
> +   switch (obj->primitive_mode) {
> +   case GL_POINTS:
> +      vertices_per_prim = 1;
> +      break;
> +   case GL_LINES:
> +      vertices_per_prim = 2;
> +      break;
> +   case GL_TRIANGLES:
> +      vertices_per_prim = 3;
> +      break;
> +   default:
> +      assert(!"Invalid transform feedback primitive mode.");
> +   }
> +
> +   /* Get the number of primitives generated. */
> +   gen7_tally_prims_generated(brw, obj);
> +
> +   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> +      obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i];
> +   }
> +   obj->vertices_written_valid = true;
> +}
> +
> +/**
> + * GetTransformFeedbackVertexCount() driver hook.
> + *
> + * Returns the number of vertices written to a particular stream by the last
> + * Begin/EndTransformFeedback block.  Used to implement DrawTransformFeedback().
> + */
> +GLsizei
> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> +                                        struct gl_transform_feedback_object *obj,
> +                                        GLuint stream)
> +{
> +   struct brw_context *brw = brw_context(ctx);
> +   struct brw_transform_feedback_object *brw_obj =
> +      (struct brw_transform_feedback_object *) obj;
> +
> +   assert(obj->EndedAnytime);
> +   assert(stream < BRW_MAX_XFB_STREAMS);
> +
> +   brw_compute_xfb_vertices_written(brw, brw_obj);
> +   return brw_obj->vertices_written[stream];
> +}
> +
>  void
>  gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
>                                struct gl_transform_feedback_object *obj)
>  {
>     struct brw_context *brw = brw_context(ctx);
> +   struct brw_transform_feedback_object *brw_obj =
> +      (struct brw_transform_feedback_object *) obj;
>  
>     intel_batchbuffer_flush(brw);
>     brw->batch.needs_sol_reset = true;
> +
> +   /* We're about to lose the information needed to compute the number of
> +    * vertices written during the last Begin/EndTransformFeedback section,
> +    * so we can't delay it any further.
> +    */
> +   brw_compute_xfb_vertices_written(brw, brw_obj);
> +
> +   /* No primitives have been generated yet. */
> +   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> +      brw_obj->prims_generated[i] = 0;
> +   }
> +
> +   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> +   gen7_save_primitives_written_counters(brw, brw_obj);
> +
> +   brw_obj->primitive_mode = mode;
>  }
>  
>  void
> @@ -270,8 +435,18 @@ gen7_end_transform_feedback(struct gl_context *ctx,
>      * simplicity, just do a full flush.
>      */
>     struct brw_context *brw = brw_context(ctx);
> +   struct brw_transform_feedback_object *brw_obj =
> +      (struct brw_transform_feedback_object *) obj;
>  
> -   intel_batchbuffer_emit_mi_flush(brw);
> +   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
> +   gen7_save_primitives_written_counters(brw, brw_obj);
> +
> +   /* EndTransformFeedback() means that we need to update the number of
> +    * vertices written.  Since it's only necessary if DrawTransformFeedback()
> +    * is called and it means mapping a buffer object, we delay computing it
> +    * until it's absolutely necessary to try and avoid stalls.
> +    */
> +   brw_obj->vertices_written_valid = false;
>  }
>  
>  void
> @@ -282,6 +457,9 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
>     struct brw_transform_feedback_object *brw_obj =
>        (struct brw_transform_feedback_object *) obj;
>  
> +   /* Flush any drawing so that the counters have the right values. */
> +   intel_batchbuffer_emit_mi_flush(brw);
> +
>     /* Save the SOL buffer offset register values. */
>     for (int i = 0; i < 4; i++) {
>        BEGIN_BATCH(3);
> @@ -292,6 +470,13 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
>                  i * sizeof(uint32_t));
>        ADVANCE_BATCH();
>     }
> +
> +   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
> +    * While this operation is paused, other transform feedback actions may
> +    * occur, which will contribute to the counters.  We need to exclude that
> +    * from our counts.
> +    */
> +   gen7_save_primitives_written_counters(brw, brw_obj);
>  }
>  
>  void
> @@ -312,4 +497,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx,
>                  i * sizeof(uint32_t));
>        ADVANCE_BATCH();
>     }
> +
> +   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> +   gen7_save_primitives_written_counters(brw, brw_obj);
>  }
> -- 
> 1.8.3.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list