[Mesa-dev] [PATCH] swr: Remove stall waiting for core query counters.

Kyriazis, George george.kyriazis at intel.com
Wed May 4 15:47:07 UTC 2016


Reviewed-By: George Kyriazis <george.kyriazis at intel.com>

> -----Original Message-----
> From: mesa-dev [mailto:mesa-dev-bounces at lists.freedesktop.org] On
> Behalf Of BruceCherniak
> Sent: Thursday, April 28, 2016 12:13 PM
> To: mesa-dev at lists.freedesktop.org
> Subject: [Mesa-dev] [PATCH] swr: Remove stall waiting for core query
> counters.
> 
> When gathering query results, swr_gather_stats was
> unnecessarily stalling the entire pipeline.  Results are now
> collected asynchronously, with a fence marking completion.
> ---
>  src/gallium/drivers/swr/swr_fence.cpp |    6 -
>  src/gallium/drivers/swr/swr_fence.h   |    8 ++
>  src/gallium/drivers/swr/swr_query.cpp |  180 ++++++++++++------------------
> ---
>  src/gallium/drivers/swr/swr_query.h   |   11 ++-
>  4 files changed, 81 insertions(+), 124 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_fence.cpp
> b/src/gallium/drivers/swr/swr_fence.cpp
> index 2e95b39..8a8e864 100644
> --- a/src/gallium/drivers/swr/swr_fence.cpp
> +++ b/src/gallium/drivers/swr/swr_fence.cpp
> @@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
>        swr_fence_destroy(old);
>  }
> 
> -static INLINE boolean
> -swr_is_fence_done(struct pipe_fence_handle *fence_handle)
> -{
> -   struct swr_fence *fence = swr_fence(fence_handle);
> -   return (fence->read == fence->write);
> -}
> 
>  /*
>   * Wait for the fence to finish.
> diff --git a/src/gallium/drivers/swr/swr_fence.h
> b/src/gallium/drivers/swr/swr_fence.h
> index df3776e..47f4d2e 100644
> --- a/src/gallium/drivers/swr/swr_fence.h
> +++ b/src/gallium/drivers/swr/swr_fence.h
> @@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
>     return (struct swr_fence *)fence;
>  }
> 
> +
> +static INLINE boolean
> +swr_is_fence_done(struct pipe_fence_handle *fence_handle)
> +{
> +   struct swr_fence *fence = swr_fence(fence_handle);
> +   return (fence->read == fence->write);
> +}
> +
>  static INLINE boolean
>  swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
>  {
> diff --git a/src/gallium/drivers/swr/swr_query.cpp
> b/src/gallium/drivers/swr/swr_query.cpp
> index f038a6e..5c59965 100644
> --- a/src/gallium/drivers/swr/swr_query.cpp
> +++ b/src/gallium/drivers/swr/swr_query.cpp
> @@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct
> pipe_query *q)
>     struct swr_query *pq = swr_query(q);
> 
>     if (pq->fence) {
> -      if (!swr_is_fence_pending(pq->fence)) {
> -         swr_fence_submit(swr_context(pipe), pq->fence);
> +      if (swr_is_fence_pending(pq->fence))
>           swr_fence_finish(pipe->screen, pq->fence, 0);
> -      }
>        swr_fence_reference(pipe->screen, &pq->fence, NULL);
>     }
> 
> @@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe,
> struct pipe_query *q)
>  }
> 
> 
> -// XXX Create a fence callback, rather than stalling SwrWaitForIdle
>  static void
>  swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
>  {
>     struct swr_context *ctx = swr_context(pipe);
> 
>     assert(pq->result);
> -   union pipe_query_result *result = pq->result;
> +   struct swr_query_result *result = pq->result;
>     boolean enable_stats = pq->enable_stats;
> -   SWR_STATS swr_stats = {0};
> -
> -   if (pq->fence) {
> -      if (!swr_is_fence_pending(pq->fence)) {
> -         swr_fence_submit(ctx, pq->fence);
> -         swr_fence_finish(pipe->screen, pq->fence, 0);
> -      }
> -      swr_fence_reference(pipe->screen, &pq->fence, NULL);
> -   }
> 
> -   /*
> -    * These queries don't need SWR Stats enabled in the core
> -    * Set and return.
> -    */
> +   /* A few results don't require the core, so don't involve it */
>     switch (pq->type) {
>     case PIPE_QUERY_TIMESTAMP:
>     case PIPE_QUERY_TIME_ELAPSED:
> -      result->u64 = swr_get_timestamp(pipe->screen);
> -      return;
> +      result->timestamp = swr_get_timestamp(pipe->screen);
>        break;
>     case PIPE_QUERY_TIMESTAMP_DISJOINT:
> -      /* nothing to do here */
> -      return;
> -      break;
>     case PIPE_QUERY_GPU_FINISHED:
> -      result->b = TRUE; /* XXX TODO Add an api func to SWR to compare
> drawId
> -                           vs LastRetiredId? */
> -      return;
> +      /* nothing to do here */
>        break;
>     default:
> -      /* Any query that needs SwrCore stats */
> -      break;
> -   }
> -
> -   /*
> -    * All other results are collected from SwrCore counters
> -    */
> +      /*
> +       * All other results are collected from SwrCore counters via
> +       * SwrGetStats. This returns immediately, but results are later filled
> +       * in by the backend.  Fence status is the only indication of
> +       * completion.  */
> +      SwrGetStats(ctx->swrContext, &result->core);
> +
> +      if (!pq->fence) {
> +         struct swr_screen *screen = swr_screen(pipe->screen);
> +         swr_fence_reference(pipe->screen, &pq->fence, screen-
> >flush_fence);
> +      }
> +      swr_fence_submit(ctx, pq->fence);
> 
> -   /* XXX, Should turn this into a fence callback and skip the stall */
> -   SwrGetStats(ctx->swrContext, &swr_stats);
> -   /* SwrGetStats returns immediately, wait for collection */
> -   SwrWaitForIdle(ctx->swrContext);
> +      /* Only change stat collection if there are no active queries */
> +      if (ctx->active_queries == 0)
> +         SwrEnableStats(ctx->swrContext, enable_stats);
> 
> -   switch (pq->type) {
> -   case PIPE_QUERY_OCCLUSION_PREDICATE:
> -   case PIPE_QUERY_OCCLUSION_COUNTER:
> -      result->u64 = swr_stats.DepthPassCount;
> -      break;
> -   case PIPE_QUERY_PRIMITIVES_GENERATED:
> -      result->u64 = swr_stats.IaPrimitives;
> -      break;
> -   case PIPE_QUERY_PRIMITIVES_EMITTED:
> -      result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
> -      break;
> -   case PIPE_QUERY_SO_STATISTICS:
> -   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
> -      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
> -      so_stats->num_primitives_written =
> -         swr_stats.SoNumPrimsWritten[pq->index];
> -      so_stats->primitives_storage_needed =
> -         swr_stats.SoPrimStorageNeeded[pq->index];
> -   } break;
> -   case PIPE_QUERY_PIPELINE_STATISTICS: {
> -      struct pipe_query_data_pipeline_statistics *p_stats =
> -         &result->pipeline_statistics;
> -      p_stats->ia_vertices = swr_stats.IaVertices;
> -      p_stats->ia_primitives = swr_stats.IaPrimitives;
> -      p_stats->vs_invocations = swr_stats.VsInvocations;
> -      p_stats->gs_invocations = swr_stats.GsInvocations;
> -      p_stats->gs_primitives = swr_stats.GsPrimitives;
> -      p_stats->c_invocations = swr_stats.CPrimitives;
> -      p_stats->c_primitives = swr_stats.CPrimitives;
> -      p_stats->ps_invocations = swr_stats.PsInvocations;
> -      p_stats->hs_invocations = swr_stats.HsInvocations;
> -      p_stats->ds_invocations = swr_stats.DsInvocations;
> -      p_stats->cs_invocations = swr_stats.CsInvocations;
> -   } break;
> -   default:
> -      assert(0 && "Unsupported query");
>        break;
>     }
> -
> -   /* Only change stat collection if there are no active queries */
> -   if (ctx->active_queries == 0)
> -      SwrEnableStats(ctx->swrContext, enable_stats);
>  }
> 
> 
> @@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
>                       boolean wait,
>                       union pipe_query_result *result)
>  {
> -   struct swr_context *ctx = swr_context(pipe);
>     struct swr_query *pq = swr_query(q);
> +   struct swr_query_result *start = &pq->start;
> +   struct swr_query_result *end = &pq->end;
> +   unsigned index = pq->index;
> 
>     if (pq->fence) {
> -      if (!swr_is_fence_pending(pq->fence)) {
> -         swr_fence_submit(ctx, pq->fence);
> -         if (!wait)
> -            return FALSE;
> -         swr_fence_finish(pipe->screen, pq->fence, 0);
> -      }
> +      if (!wait && !swr_is_fence_done(pq->fence))
> +         return FALSE;
> +
> +      swr_fence_finish(pipe->screen, pq->fence, 0);
>        swr_fence_reference(pipe->screen, &pq->fence, NULL);
>     }
> 
> @@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
>     switch (pq->type) {
>     /* Booleans */
>     case PIPE_QUERY_OCCLUSION_PREDICATE:
> -      result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
> +      result->b = end->core.DepthPassCount != start->core.DepthPassCount;
>        break;
>     case PIPE_QUERY_GPU_FINISHED:
> -      result->b = pq->end.b;
> +      result->b = TRUE;
>        break;
>     /* Counters */
>     case PIPE_QUERY_OCCLUSION_COUNTER:
> +      result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
> +      break;
>     case PIPE_QUERY_TIMESTAMP:
>     case PIPE_QUERY_TIME_ELAPSED:
> +      result->u64 = end->timestamp - start->timestamp;
> +      break;
>     case PIPE_QUERY_PRIMITIVES_GENERATED:
> +      result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
>     case PIPE_QUERY_PRIMITIVES_EMITTED:
> -      result->u64 = pq->end.u64 - pq->start.u64;
> +      result->u64 = end->core.SoNumPrimsWritten[index]
> +         - start->core.SoNumPrimsWritten[index];
>        break;
>     /* Structures */
>     case PIPE_QUERY_SO_STATISTICS: {
>        struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
> -      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
> -      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
> +      struct SWR_STATS *start = &pq->start.core;
> +      struct SWR_STATS *end = &pq->end.core;
>        so_stats->num_primitives_written =
> -         end->num_primitives_written - start->num_primitives_written;
> +         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
>        so_stats->primitives_storage_needed =
> -         end->primitives_storage_needed - start->primitives_storage_needed;
> +         end->SoPrimStorageNeeded[index] - start-
> >SoPrimStorageNeeded[index];
>     } break;
> -   case PIPE_QUERY_TIMESTAMP_DISJOINT: {
> +   case PIPE_QUERY_TIMESTAMP_DISJOINT:
>        /* os_get_time_nano returns nanoseconds */
>        result->timestamp_disjoint.frequency = UINT64_C(1000000000);
>        result->timestamp_disjoint.disjoint = FALSE;
> -   } break;
> +      break;
>     case PIPE_QUERY_PIPELINE_STATISTICS: {
>        struct pipe_query_data_pipeline_statistics *p_stats =
>           &result->pipeline_statistics;
> -      struct pipe_query_data_pipeline_statistics *start =
> -         &pq->start.pipeline_statistics;
> -      struct pipe_query_data_pipeline_statistics *end =
> -         &pq->end.pipeline_statistics;
> -      p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
> -      p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
> -      p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
> -      p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
> -      p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
> -      p_stats->c_invocations = end->c_invocations - start->c_invocations;
> -      p_stats->c_primitives = end->c_primitives - start->c_primitives;
> -      p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
> -      p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
> -      p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
> -      p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
> -   } break;
> +      struct SWR_STATS *start = &pq->start.core;
> +      struct SWR_STATS *end = &pq->end.core;
> +      p_stats->ia_vertices = end->IaVertices - start->IaVertices;
> +      p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
> +      p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
> +      p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
> +      p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
> +      p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
> +      p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
> +      p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
> +      p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
> +      p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
> +      p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
> +    } break;
>     case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
> -      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
> -      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
> +      struct SWR_STATS *start = &pq->start.core;
> +      struct SWR_STATS *end = &pq->end.core;
>        uint64_t num_primitives_written =
> -         end->num_primitives_written - start->num_primitives_written;
> +         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
>        uint64_t primitives_storage_needed =
> -         end->primitives_storage_needed - start->primitives_storage_needed;
> +         end->SoPrimStorageNeeded[index] - start-
> >SoPrimStorageNeeded[index];
>        result->b = num_primitives_written > primitives_storage_needed;
> -   } break;
> +   }
> +      break;
>     default:
>        assert(0 && "Unsupported query");
>        break;
> @@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct
> pipe_query *q)
>     struct swr_context *ctx = swr_context(pipe);
>     struct swr_query *pq = swr_query(q);
> 
> +   assert(!pq->enable_stats && "swr_begin_query: Query is already
> active!");
> +
>     /* Initialize Results */
>     memset(&pq->start, 0, sizeof(pq->start));
>     memset(&pq->end, 0, sizeof(pq->end));
> @@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct
> pipe_query *q)
> 
>     /* override start timestamp to 0 for TIMESTAMP query */
>     if (pq->type == PIPE_QUERY_TIMESTAMP)
> -      pq->start.u64 = 0;
> +      pq->start.timestamp = 0;
> 
>     return true;
>  }
> diff --git a/src/gallium/drivers/swr/swr_query.h
> b/src/gallium/drivers/swr/swr_query.h
> index 836d07b..0ab034d 100644
> --- a/src/gallium/drivers/swr/swr_query.h
> +++ b/src/gallium/drivers/swr/swr_query.h
> @@ -27,13 +27,18 @@
> 
>  #include <limits.h>
> 
> +struct swr_query_result {
> +   SWR_STATS core;
> +   uint64_t timestamp;
> +};
> +
>  struct swr_query {
>     unsigned type; /* PIPE_QUERY_* */
>     unsigned index;
> 
> -   union pipe_query_result *result;
> -   union pipe_query_result start;
> -   union pipe_query_result end;
> +   struct swr_query_result *result;
> +   struct swr_query_result start;
> +   struct swr_query_result end;
> 
>     struct pipe_fence_handle *fence;
> 
> --
> 1.7.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list