[Mesa-dev] [PATCH v2 7/8] i965: Implement glDrawTransformFeedback().
Pohjolainen, Topi
topi.pohjolainen at intel.com
Mon Oct 28 12:14:55 CET 2013
On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote:
> Implementing the GetTransformFeedbackVertexCount() driver hook allows
> the VBO module to call us with the right number of vertices.
>
> The hardware doesn't directly count the number of vertices written by
> SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply
> by the number of vertices per primitive.
>
> Unfortunately, counting the number of primitives generated is tricky:
> a program might pause a transform feedback operation, start a second one
> with a different object, then switch back and resume. Both transform
> feedback operations share the SO_NUM_PRIMS_WRITTEN counters.
>
> To work around this, we save the counter values at Begin, Pause, Resume,
> and End. This "bookends" each section where transform feedback is
> active for the current object. Adding up differences of pairs gives
> us the number of primitives generated. (This is similar to what we
> do for occlusion queries on platforms without hardware contexts.)
>
> v2: Fix missing parenthesis in assertion (caught by Eric Anholt).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
> Reviewed-by: Eric Anholt <eric at anholt.net>
> ---
> src/mesa/drivers/dri/i965/brw_context.c | 2 +
> src/mesa/drivers/dri/i965/brw_context.h | 26 ++++
> src/mesa/drivers/dri/i965/gen6_sol.c | 1 +
> src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 ++++++++++++++++++++++++++++-
> 4 files changed, 218 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 623273c..f4e04b6 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw,
>
> functions->NewTransformFeedback = brw_new_transform_feedback;
> functions->DeleteTransformFeedback = brw_delete_transform_feedback;
> + functions->GetTransformFeedbackVertexCount =
> + brw_get_transform_feedback_vertex_count;
> if (brw->gen >= 7) {
> functions->BeginTransformFeedback = gen7_begin_transform_feedback;
> functions->EndTransformFeedback = gen7_end_transform_feedback;
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 48aa4c1..c72bad1 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -880,11 +880,33 @@ struct intel_batchbuffer {
> } saved;
> };
>
> +#define BRW_MAX_XFB_STREAMS 4
> +
> struct brw_transform_feedback_object {
> struct gl_transform_feedback_object base;
>
> /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
> drm_intel_bo *offset_bo;
> +
> + /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
> + GLenum primitive_mode;
> +
> + /**
> + * Count of primitives generated during this transform feedback operation.
> + * @{
> + */
> + uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
> + drm_intel_bo *prim_count_bo;
> + unsigned prim_count_buffer_index; /**< in number of uint64_t units */
> + /** @} */
> +
> + /**
> + * Number of vertices written between last Begin/EndTransformFeedback().
> + *
> + * Used to implement DrawTransformFeedback().
> + */
> + uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
> + bool vertices_written_valid;
> };
>
> /**
> @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
> void
> brw_end_transform_feedback(struct gl_context *ctx,
> struct gl_transform_feedback_object *obj);
> +GLsizei
> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> + struct gl_transform_feedback_object *obj,
> + GLuint stream);
>
> /* gen7_sol_state.c */
> void
> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> index 2e6c86a..af5bed9 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx,
> }
>
> drm_intel_bo_unreference(brw_obj->offset_bo);
> + drm_intel_bo_unreference(brw_obj->prim_count_bo);
>
> free(brw_obj);
> }
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 27421da..7cac8fe 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = {
> .emit = upload_sol_state,
> };
>
> +/**
> + * Tally the number of primitives generated so far.
> + *
> + * The buffer contains a series of pairs:
> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> + *
> + * For each stream, we subtract the pair of values (end - start) to get the
> + * number of primitives generated during one section. We accumulate these
> + * values, adding them up to get the total number of primitives generated.
> + */
> +static void
> +gen7_tally_prims_generated(struct brw_context *brw,
> + struct brw_transform_feedback_object *obj)
> +{
> + /* If the current batch is still contributing to the number of primitives
> + * generated, flush it now so the results will be present when mapped.
> + */
> + if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
> + intel_batchbuffer_flush(brw);
> +
> + if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
> + perf_debug("Stalling for # of transform feedback primitives written.\n");
> +
> + drm_intel_bo_map(obj->prim_count_bo, false);
> + uint64_t *prim_counts = obj->prim_count_bo->virtual;
> +
> + assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0);
> + int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS);
> +
> + for (int i = 0; i < pairs; i++) {
> + for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) {
> + obj->prims_generated[s] +=
> + prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s];
> + }
> + prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */
> + }
> +
> + drm_intel_bo_unmap(obj->prim_count_bo);
> +
> + /* Release the BO; we've already tallied all the data it contained. */
> + drm_intel_bo_unreference(obj->prim_count_bo);
> + obj->prim_count_bo = NULL;
> +}
> +
> +/**
> + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
> + * to prim_count_bo.
> + *
> + * If prim_count_bo is out of space, gather up the results so far into
> + * prims_generated[] and allocate a new buffer with enough space.
> + *
> + * The number of primitives written is used to compute the number of vertices
> + * written to a transform feedback stream, which is required to implement
> + * DrawTransformFeedback().
> + */
> +static void
> +gen7_save_primitives_written_counters(struct brw_context *brw,
> + struct brw_transform_feedback_object *obj)
> +{
> + const int streams = BRW_MAX_XFB_STREAMS;
> +
> + /* Check if there's enough space for a new pair of four values. */
> + if (obj->prim_count_bo != NULL &&
> + obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
> + /* Gather up the results so far and release the BO. */
> + gen7_tally_prims_generated(brw, obj);
> + }
> +
> + /* Allocate a new buffer if needed. A page should be plenty. */
> + if (obj->prim_count_bo == NULL) {
> + obj->prim_count_buffer_index = 0;
> + obj->prim_count_bo =
> + drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);
I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer
object and this logic here to reallocate another. Couldn't we re-use the old
bo and simply let 'gen7_tally_prims_generated()' reset the
'prim_count_buffer_index' to zero?
Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards
itself from adding counters multiple times using the flag
'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()'
would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I
understand the logic all wrong?
> + }
> +
> + /* Flush any drawing so that the counters have the right values. */
> + intel_batchbuffer_emit_mi_flush(brw);
> +
> + /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
> + for (int i = 0; i < streams; i++) {
> + brw_store_register_mem64(brw, obj->prim_count_bo,
> + GEN7_SO_NUM_PRIMS_WRITTEN(i),
> + obj->prim_count_buffer_index + i);
> + }
> +
> + /* Update where to write data to. */
> + obj->prim_count_buffer_index += streams;
> +}
> +
> +/**
> + * Compute the number of vertices written by this transform feedback operation.
> + */
> +static void
> +brw_compute_xfb_vertices_written(struct brw_context *brw,
> + struct brw_transform_feedback_object *obj)
> +{
> + if (obj->vertices_written_valid || !obj->base.EndedAnytime)
> + return;
> +
> + unsigned vertices_per_prim = 0;
> +
> + switch (obj->primitive_mode) {
> + case GL_POINTS:
> + vertices_per_prim = 1;
> + break;
> + case GL_LINES:
> + vertices_per_prim = 2;
> + break;
> + case GL_TRIANGLES:
> + vertices_per_prim = 3;
> + break;
> + default:
> + assert(!"Invalid transform feedback primitive mode.");
> + }
> +
> + /* Get the number of primitives generated. */
> + gen7_tally_prims_generated(brw, obj);
> +
> + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> + obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i];
> + }
> + obj->vertices_written_valid = true;
> +}
> +
> +/**
> + * GetTransformFeedbackVertexCount() driver hook.
> + *
> + * Returns the number of vertices written to a particular stream by the last
> + * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback().
> + */
> +GLsizei
> +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> + struct gl_transform_feedback_object *obj,
> + GLuint stream)
> +{
> + struct brw_context *brw = brw_context(ctx);
> + struct brw_transform_feedback_object *brw_obj =
> + (struct brw_transform_feedback_object *) obj;
> +
> + assert(obj->EndedAnytime);
> + assert(stream < BRW_MAX_XFB_STREAMS);
> +
> + brw_compute_xfb_vertices_written(brw, brw_obj);
> + return brw_obj->vertices_written[stream];
> +}
> +
> void
> gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
> struct gl_transform_feedback_object *obj)
> {
> struct brw_context *brw = brw_context(ctx);
> + struct brw_transform_feedback_object *brw_obj =
> + (struct brw_transform_feedback_object *) obj;
>
> intel_batchbuffer_flush(brw);
> brw->batch.needs_sol_reset = true;
> +
> + /* We're about to lose the information needed to compute the number of
> + * vertices written during the last Begin/EndTransformFeedback section,
> + * so we can't delay it any further.
> + */
> + brw_compute_xfb_vertices_written(brw, brw_obj);
> +
> + /* No primitives have been generated yet. */
> + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> + brw_obj->prims_generated[i] = 0;
> + }
> +
> + /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> + gen7_save_primitives_written_counters(brw, brw_obj);
> +
> + brw_obj->primitive_mode = mode;
> }
>
> void
> @@ -270,8 +435,18 @@ gen7_end_transform_feedback(struct gl_context *ctx,
> * simplicity, just do a full flush.
> */
> struct brw_context *brw = brw_context(ctx);
> + struct brw_transform_feedback_object *brw_obj =
> + (struct brw_transform_feedback_object *) obj;
>
> - intel_batchbuffer_emit_mi_flush(brw);
> + /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
> + gen7_save_primitives_written_counters(brw, brw_obj);
> +
> + /* EndTransformFeedback() means that we need to update the number of
> + * vertices written. Since it's only necessary if DrawTransformFeedback()
> + * is called and it means mapping a buffer object, we delay computing it
> + * until it's absolutely necessary to try and avoid stalls.
> + */
> + brw_obj->vertices_written_valid = false;
> }
>
> void
> @@ -282,6 +457,9 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
> struct brw_transform_feedback_object *brw_obj =
> (struct brw_transform_feedback_object *) obj;
>
> + /* Flush any drawing so that the counters have the right values. */
> + intel_batchbuffer_emit_mi_flush(brw);
> +
> /* Save the SOL buffer offset register values. */
> for (int i = 0; i < 4; i++) {
> BEGIN_BATCH(3);
> @@ -292,6 +470,13 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
> i * sizeof(uint32_t));
> ADVANCE_BATCH();
> }
> +
> + /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
> + * While this operation is paused, other transform feedback actions may
> + * occur, which will contribute to the counters. We need to exclude that
> + * from our counts.
> + */
> + gen7_save_primitives_written_counters(brw, brw_obj);
> }
>
> void
> @@ -312,4 +497,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx,
> i * sizeof(uint32_t));
> ADVANCE_BATCH();
> }
> +
> + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> + gen7_save_primitives_written_counters(brw, brw_obj);
> }
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list