[Mesa-dev] [PATCH v2 7/8] i965: Implement glDrawTransformFeedback().

Kenneth Graunke kenneth at whitecape.org
Mon Oct 28 23:05:50 CET 2013


On 10/28/2013 04:14 AM, Pohjolainen, Topi wrote:
> On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote:
>> Implementing the GetTransformFeedbackVertexCount() driver hook allows
>> the VBO module to call us with the right number of vertices.
>>
>> The hardware doesn't directly count the number of vertices written by
>> SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply
>> by the number of vertices per primitive.
>>
>> Unfortunately, counting the number of primitives generated is tricky:
>> a program might pause a transform feedback operation, start a second one
>> with a different object, then switch back and resume.  Both transform
>> feedback operations share the SO_NUM_PRIMS_WRITTEN counters.
>>
>> To work around this, we save the counter values at Begin, Pause, Resume,
>> and End.  This "bookends" each section where transform feedback is
>> active for the current object.  Adding up differences of pairs gives
>> us the number of primitives generated.  (This is similar to what we
>> do for occlusion queries on platforms without hardware contexts.)
>>
>> v2: Fix missing parenthesis in assertion (caught by Eric Anholt).
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
>> Reviewed-by: Eric Anholt <eric at anholt.net>
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.c    |   2 +
>>  src/mesa/drivers/dri/i965/brw_context.h    |  26 ++++
>>  src/mesa/drivers/dri/i965/gen6_sol.c       |   1 +
>>  src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 ++++++++++++++++++++++++++++-
>>  4 files changed, 218 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>> index 623273c..f4e04b6 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw,
>>  
>>     functions->NewTransformFeedback = brw_new_transform_feedback;
>>     functions->DeleteTransformFeedback = brw_delete_transform_feedback;
>> +   functions->GetTransformFeedbackVertexCount =
>> +      brw_get_transform_feedback_vertex_count;
>>     if (brw->gen >= 7) {
>>        functions->BeginTransformFeedback = gen7_begin_transform_feedback;
>>        functions->EndTransformFeedback = gen7_end_transform_feedback;
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index 48aa4c1..c72bad1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -880,11 +880,33 @@ struct intel_batchbuffer {
>>     } saved;
>>  };
>>  
>> +#define BRW_MAX_XFB_STREAMS 4
>> +
>>  struct brw_transform_feedback_object {
>>     struct gl_transform_feedback_object base;
>>  
>>     /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
>>     drm_intel_bo *offset_bo;
>> +
>> +   /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
>> +   GLenum primitive_mode;
>> +
>> +   /**
>> +    * Count of primitives generated during this transform feedback operation.
>> +    *  @{
>> +    */
>> +   uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
>> +   drm_intel_bo *prim_count_bo;
>> +   unsigned prim_count_buffer_index; /**< in number of uint64_t units */
>> +   /** @} */
>> +
>> +   /**
>> +    * Number of vertices written between last Begin/EndTransformFeedback().
>> +    *
>> +    * Used to implement DrawTransformFeedback().
>> +    */
>> +   uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
>> +   bool vertices_written_valid;
>>  };
>>  
>>  /**
>> @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
>>  void
>>  brw_end_transform_feedback(struct gl_context *ctx,
>>                             struct gl_transform_feedback_object *obj);
>> +GLsizei
>> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
>> +                                        struct gl_transform_feedback_object *obj,
>> +                                        GLuint stream);
>>  
>>  /* gen7_sol_state.c */
>>  void
>> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
>> index 2e6c86a..af5bed9 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
>> @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx,
>>     }
>>  
>>     drm_intel_bo_unreference(brw_obj->offset_bo);
>> +   drm_intel_bo_unreference(brw_obj->prim_count_bo);
>>  
>>     free(brw_obj);
>>  }
>> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> index 27421da..7cac8fe 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = {
>>     .emit = upload_sol_state,
>>  };
>>  
>> +/**
>> + * Tally the number of primitives generated so far.
>> + *
>> + * The buffer contains a series of pairs:
>> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
>> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
>> + *
>> + * For each stream, we subtract the pair of values (end - start) to get the
>> + * number of primitives generated during one section.  We accumulate these
>> + * values, adding them up to get the total number of primitives generated.
>> + */
>> +static void
>> +gen7_tally_prims_generated(struct brw_context *brw,
>> +                           struct brw_transform_feedback_object *obj)
>> +{
>> +   /* If the current batch is still contributing to the number of primitives
>> +    * generated, flush it now so the results will be present when mapped.
>> +    */
>> +   if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
>> +      intel_batchbuffer_flush(brw);
>> +
>> +   if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
>> +      perf_debug("Stalling for # of transform feedback primitives written.\n");
>> +
>> +   drm_intel_bo_map(obj->prim_count_bo, false);
>> +   uint64_t *prim_counts = obj->prim_count_bo->virtual;
>> +
>> +   assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0);
>> +   int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS);
>> +
>> +   for (int i = 0; i < pairs; i++) {
>> +      for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) {
>> +         obj->prims_generated[s] +=
>> +            prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s];
>> +      }
>> +      prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */
>> +   }
>> +
>> +   drm_intel_bo_unmap(obj->prim_count_bo);
>> +
>> +   /* Release the BO; we've already tallied all the data it contained. */
>> +   drm_intel_bo_unreference(obj->prim_count_bo);
>> +   obj->prim_count_bo = NULL;
>> +}
>> +
>> +/**
>> + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
>> + * to prim_count_bo.
>> + *
>> + * If prim_count_bo is out of space, gather up the results so far into
>> + * prims_generated[] and allocate a new buffer with enough space.
>> + *
>> + * The number of primitives written is used to compute the number of vertices
>> + * written to a transform feedback stream, which is required to implement
>> + * DrawTransformFeedback().
>> + */
>> +static void
>> +gen7_save_primitives_written_counters(struct brw_context *brw,
>> +                                struct brw_transform_feedback_object *obj)
>> +{
>> +   const int streams = BRW_MAX_XFB_STREAMS;
>> +
>> +   /* Check if there's enough space for a new pair of four values. */
>> +   if (obj->prim_count_bo != NULL &&
>> +       obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
>> +      /* Gather up the results so far and release the BO. */
>> +      gen7_tally_prims_generated(brw, obj);
>> +   }
>> +
>> +   /* Allocate a new buffer if needed.  A page should be plenty. */
>> +   if (obj->prim_count_bo == NULL) {
>> +      obj->prim_count_buffer_index = 0;
>> +      obj->prim_count_bo =
>> +         drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);
> 
> I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer
> object and this logic here to reallocate another. Couldn't we re-use the old
> bo and simply let 'gen7_tally_prims_generated()' reset the
> 'prim_count_buffer_index' to zero?
> 
> Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards
> itself from adding counters multiple times using the flag
> 'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()'
> would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I
> understand the logic all wrong?

Topi,

You're correct.  There's no real point in releasing the buffer just to
allocate a new one.

I'll change it to allocate once at brw_new_transform_feedback_object
time, and just reuse it.

Thanks for the review!

--Ken


More information about the mesa-dev mailing list