[Mesa-dev] [PATCH v2 7/8] i965: Implement glDrawTransformFeedback().
Kenneth Graunke
kenneth at whitecape.org
Mon Oct 28 23:05:50 CET 2013
On 10/28/2013 04:14 AM, Pohjolainen, Topi wrote:
> On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote:
>> Implementing the GetTransformFeedbackVertexCount() driver hook allows
>> the VBO module to call us with the right number of vertices.
>>
>> The hardware doesn't directly count the number of vertices written by
>> SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply
>> by the number of vertices per primitive.
>>
>> Unfortunately, counting the number of primitives generated is tricky:
>> a program might pause a transform feedback operation, start a second one
>> with a different object, then switch back and resume. Both transform
>> feedback operations share the SO_NUM_PRIMS_WRITTEN counters.
>>
>> To work around this, we save the counter values at Begin, Pause, Resume,
>> and End. This "bookends" each section where transform feedback is
>> active for the current object. Adding up differences of pairs gives
>> us the number of primitives generated. (This is similar to what we
>> do for occlusion queries on platforms without hardware contexts.)
>>
>> v2: Fix missing parenthesis in assertion (caught by Eric Anholt).
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
>> Reviewed-by: Eric Anholt <eric at anholt.net>
>> ---
>> src/mesa/drivers/dri/i965/brw_context.c | 2 +
>> src/mesa/drivers/dri/i965/brw_context.h | 26 ++++
>> src/mesa/drivers/dri/i965/gen6_sol.c | 1 +
>> src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 ++++++++++++++++++++++++++++-
>> 4 files changed, 218 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
>> index 623273c..f4e04b6 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>> @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw,
>>
>> functions->NewTransformFeedback = brw_new_transform_feedback;
>> functions->DeleteTransformFeedback = brw_delete_transform_feedback;
>> + functions->GetTransformFeedbackVertexCount =
>> + brw_get_transform_feedback_vertex_count;
>> if (brw->gen >= 7) {
>> functions->BeginTransformFeedback = gen7_begin_transform_feedback;
>> functions->EndTransformFeedback = gen7_end_transform_feedback;
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index 48aa4c1..c72bad1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -880,11 +880,33 @@ struct intel_batchbuffer {
>> } saved;
>> };
>>
>> +#define BRW_MAX_XFB_STREAMS 4
>> +
>> struct brw_transform_feedback_object {
>> struct gl_transform_feedback_object base;
>>
>> /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
>> drm_intel_bo *offset_bo;
>> +
>> + /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
>> + GLenum primitive_mode;
>> +
>> + /**
>> + * Count of primitives generated during this transform feedback operation.
>> + * @{
>> + */
>> + uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
>> + drm_intel_bo *prim_count_bo;
>> + unsigned prim_count_buffer_index; /**< in number of uint64_t units */
>> + /** @} */
>> +
>> + /**
>> + * Number of vertices written between last Begin/EndTransformFeedback().
>> + *
>> + * Used to implement DrawTransformFeedback().
>> + */
>> + uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
>> + bool vertices_written_valid;
>> };
>>
>> /**
>> @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
>> void
>> brw_end_transform_feedback(struct gl_context *ctx,
>> struct gl_transform_feedback_object *obj);
>> +GLsizei
>> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
>> + struct gl_transform_feedback_object *obj,
>> + GLuint stream);
>>
>> /* gen7_sol_state.c */
>> void
>> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
>> index 2e6c86a..af5bed9 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
>> @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx,
>> }
>>
>> drm_intel_bo_unreference(brw_obj->offset_bo);
>> + drm_intel_bo_unreference(brw_obj->prim_count_bo);
>>
>> free(brw_obj);
>> }
>> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> index 27421da..7cac8fe 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
>> @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = {
>> .emit = upload_sol_state,
>> };
>>
>> +/**
>> + * Tally the number of primitives generated so far.
>> + *
>> + * The buffer contains a series of pairs:
>> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
>> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
>> + *
>> + * For each stream, we subtract the pair of values (end - start) to get the
>> + * number of primitives generated during one section. We accumulate these
>> + * values, adding them up to get the total number of primitives generated.
>> + */
>> +static void
>> +gen7_tally_prims_generated(struct brw_context *brw,
>> + struct brw_transform_feedback_object *obj)
>> +{
>> + /* If the current batch is still contributing to the number of primitives
>> + * generated, flush it now so the results will be present when mapped.
>> + */
>> + if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
>> + intel_batchbuffer_flush(brw);
>> +
>> + if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
>> + perf_debug("Stalling for # of transform feedback primitives written.\n");
>> +
>> + drm_intel_bo_map(obj->prim_count_bo, false);
>> + uint64_t *prim_counts = obj->prim_count_bo->virtual;
>> +
>> + assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0);
>> + int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS);
>> +
>> + for (int i = 0; i < pairs; i++) {
>> + for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) {
>> + obj->prims_generated[s] +=
>> + prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s];
>> + }
>> + prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */
>> + }
>> +
>> + drm_intel_bo_unmap(obj->prim_count_bo);
>> +
>> + /* Release the BO; we've already tallied all the data it contained. */
>> + drm_intel_bo_unreference(obj->prim_count_bo);
>> + obj->prim_count_bo = NULL;
>> +}
>> +
>> +/**
>> + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
>> + * to prim_count_bo.
>> + *
>> + * If prim_count_bo is out of space, gather up the results so far into
>> + * prims_generated[] and allocate a new buffer with enough space.
>> + *
>> + * The number of primitives written is used to compute the number of vertices
>> + * written to a transform feedback stream, which is required to implement
>> + * DrawTransformFeedback().
>> + */
>> +static void
>> +gen7_save_primitives_written_counters(struct brw_context *brw,
>> + struct brw_transform_feedback_object *obj)
>> +{
>> + const int streams = BRW_MAX_XFB_STREAMS;
>> +
>> + /* Check if there's enough space for a new pair of four values. */
>> + if (obj->prim_count_bo != NULL &&
>> + obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
>> + /* Gather up the results so far and release the BO. */
>> + gen7_tally_prims_generated(brw, obj);
>> + }
>> +
>> + /* Allocate a new buffer if needed. A page should be plenty. */
>> + if (obj->prim_count_bo == NULL) {
>> + obj->prim_count_buffer_index = 0;
>> + obj->prim_count_bo =
>> + drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);
>
> I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer
> object and this logic here to reallocate another. Couldn't we re-use the old
> bo and simply let 'gen7_tally_prims_generated()' reset the
> 'prim_count_buffer_index' to zero?
>
> Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards
> itself from adding counters multiple times using the flag
> 'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()'
> would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I
> understand the logic all wrong?
Topi,
You're correct. There's no real point in releasing the buffer just to
allocate a new one.
I'll change it to allocate once at brw_new_transform_feedback_object
time, and just reuse it.
Thanks for the review!
--Ken
More information about the mesa-dev
mailing list