[Mesa-dev] [PATCH v2 7/8] i965: Implement glDrawTransformFeedback().

Pohjolainen, Topi topi.pohjolainen at intel.com
Mon Oct 28 12:52:03 CET 2013


On Mon, Oct 28, 2013 at 01:14:55PM +0200, Pohjolainen, Topi wrote:
> On Fri, Oct 25, 2013 at 10:35:48PM -0700, Kenneth Graunke wrote:
> > Implementing the GetTransformFeedbackVertexCount() driver hook allows
> > the VBO module to call us with the right number of vertices.
> > 
> > The hardware doesn't directly count the number of vertices written by
> > SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply
> > by the number of vertices per primitive.
> > 
> > Unfortunately, counting the number of primitives generated is tricky:
> > a program might pause a transform feedback operation, start a second one
> > with a different object, then switch back and resume.  Both transform
> > feedback operations share the SO_NUM_PRIMS_WRITTEN counters.
> > 
> > To work around this, we save the counter values at Begin, Pause, Resume,
> > and End.  This "bookends" each section where transform feedback is
> > active for the current object.  Adding up differences of pairs gives
> > us the number of primitives generated.  (This is similar to what we
> > do for occlusion queries on platforms without hardware contexts.)
> > 
> > v2: Fix missing parenthesis in assertion (caught by Eric Anholt).
> > 
> > Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> > Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
> > Reviewed-by: Eric Anholt <eric at anholt.net>
> > ---
> >  src/mesa/drivers/dri/i965/brw_context.c    |   2 +
> >  src/mesa/drivers/dri/i965/brw_context.h    |  26 ++++
> >  src/mesa/drivers/dri/i965/gen6_sol.c       |   1 +
> >  src/mesa/drivers/dri/i965/gen7_sol_state.c | 190 ++++++++++++++++++++++++++++-
> >  4 files changed, 218 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> > index 623273c..f4e04b6 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.c
> > +++ b/src/mesa/drivers/dri/i965/brw_context.c
> > @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw,
> >  
> >     functions->NewTransformFeedback = brw_new_transform_feedback;
> >     functions->DeleteTransformFeedback = brw_delete_transform_feedback;
> > +   functions->GetTransformFeedbackVertexCount =
> > +      brw_get_transform_feedback_vertex_count;
> >     if (brw->gen >= 7) {
> >        functions->BeginTransformFeedback = gen7_begin_transform_feedback;
> >        functions->EndTransformFeedback = gen7_end_transform_feedback;
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> > index 48aa4c1..c72bad1 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -880,11 +880,33 @@ struct intel_batchbuffer {
> >     } saved;
> >  };
> >  
> > +#define BRW_MAX_XFB_STREAMS 4
> > +
> >  struct brw_transform_feedback_object {
> >     struct gl_transform_feedback_object base;
> >  
> >     /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
> >     drm_intel_bo *offset_bo;
> > +
> > +   /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
> > +   GLenum primitive_mode;
> > +
> > +   /**
> > +    * Count of primitives generated during this transform feedback operation.
> > +    *  @{
> > +    */
> > +   uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
> > +   drm_intel_bo *prim_count_bo;
> > +   unsigned prim_count_buffer_index; /**< in number of uint64_t units */
> > +   /** @} */
> > +
> > +   /**
> > +    * Number of vertices written between last Begin/EndTransformFeedback().
> > +    *
> > +    * Used to implement DrawTransformFeedback().
> > +    */
> > +   uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
> > +   bool vertices_written_valid;
> >  };
> >  
> >  /**
> > @@ -1574,6 +1596,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
> >  void
> >  brw_end_transform_feedback(struct gl_context *ctx,
> >                             struct gl_transform_feedback_object *obj);
> > +GLsizei
> > +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> > +                                        struct gl_transform_feedback_object *obj,
> > +                                        GLuint stream);
> >  
> >  /* gen7_sol_state.c */
> >  void
> > diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> > index 2e6c86a..af5bed9 100644
> > --- a/src/mesa/drivers/dri/i965/gen6_sol.c
> > +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> > @@ -162,6 +162,7 @@ brw_delete_transform_feedback(struct gl_context *ctx,
> >     }
> >  
> >     drm_intel_bo_unreference(brw_obj->offset_bo);
> > +   drm_intel_bo_unreference(brw_obj->prim_count_bo);
> >  
> >     free(brw_obj);
> >  }
> > diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > index 27421da..7cac8fe 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > @@ -249,14 +249,179 @@ const struct brw_tracked_state gen7_sol_state = {
> >     .emit = upload_sol_state,
> >  };
> >  
> > +/**
> > + * Tally the number of primitives generated so far.
> > + *
> > + * The buffer contains a series of pairs:
> > + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> > + * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
> > + *
> > + * For each stream, we subtract the pair of values (end - start) to get the
> > + * number of primitives generated during one section.  We accumulate these
> > + * values, adding them up to get the total number of primitives generated.
> > + */
> > +static void
> > +gen7_tally_prims_generated(struct brw_context *brw,
> > +                           struct brw_transform_feedback_object *obj)
> > +{
> > +   /* If the current batch is still contributing to the number of primitives
> > +    * generated, flush it now so the results will be present when mapped.
> > +    */
> > +   if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo))
> > +      intel_batchbuffer_flush(brw);
> > +
> > +   if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo)))
> > +      perf_debug("Stalling for # of transform feedback primitives written.\n");
> > +
> > +   drm_intel_bo_map(obj->prim_count_bo, false);
> > +   uint64_t *prim_counts = obj->prim_count_bo->virtual;
> > +
> > +   assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0);
> > +   int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS);
> > +
> > +   for (int i = 0; i < pairs; i++) {
> > +      for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) {
> > +         obj->prims_generated[s] +=
> > +            prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s];
> > +      }
> > +      prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */
> > +   }
> > +
> > +   drm_intel_bo_unmap(obj->prim_count_bo);
> > +
> > +   /* Release the BO; we've already tallied all the data it contained. */
> > +   drm_intel_bo_unreference(obj->prim_count_bo);
> > +   obj->prim_count_bo = NULL;
> > +}
> > +
> > +/**
> > + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
> > + * to prim_count_bo.
> > + *
> > + * If prim_count_bo is out of space, gather up the results so far into
> > + * prims_generated[] and allocate a new buffer with enough space.
> > + *
> > + * The number of primitives written is used to compute the number of vertices
> > + * written to a transform feedback stream, which is required to implement
> > + * DrawTransformFeedback().
> > + */
> > +static void
> > +gen7_save_primitives_written_counters(struct brw_context *brw,
> > +                                struct brw_transform_feedback_object *obj)
> > +{
> > +   const int streams = BRW_MAX_XFB_STREAMS;
> > +
> > +   /* Check if there's enough space for a new pair of four values. */
> > +   if (obj->prim_count_bo != NULL &&
> > +       obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) {
> > +      /* Gather up the results so far and release the BO. */
> > +      gen7_tally_prims_generated(brw, obj);
> > +   }
> > +
> > +   /* Allocate a new buffer if needed.  A page should be plenty. */
> > +   if (obj->prim_count_bo == NULL) {
> > +      obj->prim_count_buffer_index = 0;
> > +      obj->prim_count_bo =
> > +         drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 4096);
> 
> I was wondering why 'gen7_tally_prims_generated()' needs to dispose the buffer
> object and this logic here to reallocate another. Couldn't we re-use the old
> bo and simply let 'gen7_tally_prims_generated()' reset the
> 'prim_count_buffer_index' to zero?
> 
> Below 'brw_compute_xfb_vertices_written()' wants to tally also but it guards
> itself from adding counters multiple times using the flag
> 'vertices_written_valid'. In fact if it didn't 'gen7_tally_prims_generated()'
> would call 'drm_intel_bo_map()' against NULL pointer, right? Or did I
> understand the logic all wrong?

Further to the "20 questions", try to bear with me. I may easily be missing
things but would this patch work without the register storing and loading
introduced in patch four if pause() and resume() hooks saved the counters the
same way as begin() and end()? In other words, one would track the deltas
between begin()/pause(), pause()/resume() and resume()/end() in addition to the
begin()/end() (if no pause()-resume() sequence were in between)?

> 
> > +   }
> > +
> > +   /* Flush any drawing so that the counters have the right values. */
> > +   intel_batchbuffer_emit_mi_flush(brw);
> > +
> > +   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
> > +   for (int i = 0; i < streams; i++) {
> > +      brw_store_register_mem64(brw, obj->prim_count_bo,
> > +                               GEN7_SO_NUM_PRIMS_WRITTEN(i),
> > +                               obj->prim_count_buffer_index + i);
> > +   }
> > +
> > +   /* Update where to write data to. */
> > +   obj->prim_count_buffer_index += streams;
> > +}
> > +
> > +/**
> > + * Compute the number of vertices written by this transform feedback operation.
> > + */
> > +static void
> > +brw_compute_xfb_vertices_written(struct brw_context *brw,
> > +                                 struct brw_transform_feedback_object *obj)
> > +{
> > +   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
> > +      return;
> > +
> > +   unsigned vertices_per_prim = 0;
> > +
> > +   switch (obj->primitive_mode) {
> > +   case GL_POINTS:
> > +      vertices_per_prim = 1;
> > +      break;
> > +   case GL_LINES:
> > +      vertices_per_prim = 2;
> > +      break;
> > +   case GL_TRIANGLES:
> > +      vertices_per_prim = 3;
> > +      break;
> > +   default:
> > +      assert(!"Invalid transform feedback primitive mode.");
> > +   }
> > +
> > +   /* Get the number of primitives generated. */
> > +   gen7_tally_prims_generated(brw, obj);
> > +
> > +   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> > +      obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i];
> > +   }
> > +   obj->vertices_written_valid = true;
> > +}
> > +
> > +/**
> > + * GetTransformFeedbackVertexCount() driver hook.
> > + *
> > + * Returns the number of vertices written to a particular stream by the last
> > + * Begin/EndTransformFeedback block.  Used to implement DrawTransformFeedback().
> > + */
> > +GLsizei
> > +brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
> > +                                        struct gl_transform_feedback_object *obj,
> > +                                        GLuint stream)
> > +{
> > +   struct brw_context *brw = brw_context(ctx);
> > +   struct brw_transform_feedback_object *brw_obj =
> > +      (struct brw_transform_feedback_object *) obj;
> > +
> > +   assert(obj->EndedAnytime);
> > +   assert(stream < BRW_MAX_XFB_STREAMS);
> > +
> > +   brw_compute_xfb_vertices_written(brw, brw_obj);
> > +   return brw_obj->vertices_written[stream];
> > +}
> > +
> >  void
> >  gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
> >                                struct gl_transform_feedback_object *obj)
> >  {
> >     struct brw_context *brw = brw_context(ctx);
> > +   struct brw_transform_feedback_object *brw_obj =
> > +      (struct brw_transform_feedback_object *) obj;
> >  
> >     intel_batchbuffer_flush(brw);
> >     brw->batch.needs_sol_reset = true;
> > +
> > +   /* We're about to lose the information needed to compute the number of
> > +    * vertices written during the last Begin/EndTransformFeedback section,
> > +    * so we can't delay it any further.
> > +    */
> > +   brw_compute_xfb_vertices_written(brw, brw_obj);
> > +
> > +   /* No primitives have been generated yet. */
> > +   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
> > +      brw_obj->prims_generated[i] = 0;
> > +   }
> > +
> > +   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> > +   gen7_save_primitives_written_counters(brw, brw_obj);
> > +
> > +   brw_obj->primitive_mode = mode;
> >  }
> >  
> >  void
> > @@ -270,8 +435,18 @@ gen7_end_transform_feedback(struct gl_context *ctx,
> >      * simplicity, just do a full flush.
> >      */
> >     struct brw_context *brw = brw_context(ctx);
> > +   struct brw_transform_feedback_object *brw_obj =
> > +      (struct brw_transform_feedback_object *) obj;
> >  
> > -   intel_batchbuffer_emit_mi_flush(brw);
> > +   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
> > +   gen7_save_primitives_written_counters(brw, brw_obj);
> > +
> > +   /* EndTransformFeedback() means that we need to update the number of
> > +    * vertices written.  Since it's only necessary if DrawTransformFeedback()
> > +    * is called and it means mapping a buffer object, we delay computing it
> > +    * until it's absolutely necessary to try and avoid stalls.
> > +    */
> > +   brw_obj->vertices_written_valid = false;
> >  }
> >  
> >  void
> > @@ -282,6 +457,9 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
> >     struct brw_transform_feedback_object *brw_obj =
> >        (struct brw_transform_feedback_object *) obj;
> >  
> > +   /* Flush any drawing so that the counters have the right values. */
> > +   intel_batchbuffer_emit_mi_flush(brw);
> > +
> >     /* Save the SOL buffer offset register values. */
> >     for (int i = 0; i < 4; i++) {
> >        BEGIN_BATCH(3);
> > @@ -292,6 +470,13 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
> >                  i * sizeof(uint32_t));
> >        ADVANCE_BATCH();
> >     }
> > +
> > +   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
> > +    * While this operation is paused, other transform feedback actions may
> > +    * occur, which will contribute to the counters.  We need to exclude that
> > +    * from our counts.
> > +    */
> > +   gen7_save_primitives_written_counters(brw, brw_obj);
> >  }
> >  
> >  void
> > @@ -312,4 +497,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx,
> >                  i * sizeof(uint32_t));
> >        ADVANCE_BATCH();
> >     }
> > +
> > +   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
> > +   gen7_save_primitives_written_counters(brw, brw_obj);
> >  }
> > -- 
> > 1.8.3.2
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list