[Mesa-dev] [PATCH] swr: Remove stall waiting for core query counters.

BruceCherniak bruce.cherniak at intel.com
Thu Apr 28 17:13:15 UTC 2016


When gathering query results, swr_gather_stats was
unnecessarily stalling the entire pipeline.  Results are now
collected asynchronously, with a fence marking completion.
---
 src/gallium/drivers/swr/swr_fence.cpp |    6 -
 src/gallium/drivers/swr/swr_fence.h   |    8 ++
 src/gallium/drivers/swr/swr_query.cpp |  180 ++++++++++++---------------------
 src/gallium/drivers/swr/swr_query.h   |   11 ++-
 4 files changed, 81 insertions(+), 124 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp
index 2e95b39..8a8e864 100644
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
       swr_fence_destroy(old);
 }
 
-static INLINE boolean
-swr_is_fence_done(struct pipe_fence_handle *fence_handle)
-{
-   struct swr_fence *fence = swr_fence(fence_handle);
-   return (fence->read == fence->write);
-}
 
 /*
  * Wait for the fence to finish.
diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h
index df3776e..47f4d2e 100644
--- a/src/gallium/drivers/swr/swr_fence.h
+++ b/src/gallium/drivers/swr/swr_fence.h
@@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
    return (struct swr_fence *)fence;
 }
 
+
+static INLINE boolean
+swr_is_fence_done(struct pipe_fence_handle *fence_handle)
+{
+   struct swr_fence *fence = swr_fence(fence_handle);
+   return (fence->read == fence->write);
+}
+
 static INLINE boolean
 swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
 {
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index f038a6e..5c59965 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
    struct swr_query *pq = swr_query(q);
 
    if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(swr_context(pipe), pq->fence);
+      if (swr_is_fence_pending(pq->fence))
          swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
       swr_fence_reference(pipe->screen, &pq->fence, NULL);
    }
 
@@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 }
 
 
-// XXX Create a fence callback, rather than stalling SwrWaitForIdle
 static void
 swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
 {
    struct swr_context *ctx = swr_context(pipe);
 
    assert(pq->result);
-   union pipe_query_result *result = pq->result;
+   struct swr_query_result *result = pq->result;
    boolean enable_stats = pq->enable_stats;
-   SWR_STATS swr_stats = {0};
-
-   if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(ctx, pq->fence);
-         swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
-      swr_fence_reference(pipe->screen, &pq->fence, NULL);
-   }
 
-   /*
-    * These queries don't need SWR Stats enabled in the core
-    * Set and return.
-    */
+   /* A few results don't require the core, so don't involve it */
    switch (pq->type) {
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
-      result->u64 = swr_get_timestamp(pipe->screen);
-      return;
+      result->timestamp = swr_get_timestamp(pipe->screen);
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
-      /* nothing to do here */
-      return;
-      break;
    case PIPE_QUERY_GPU_FINISHED:
-      result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
-                           vs LastRetiredId? */
-      return;
+      /* nothing to do here */
       break;
    default:
-      /* Any query that needs SwrCore stats */
-      break;
-   }
-
-   /*
-    * All other results are collected from SwrCore counters
-    */
+      /*
+       * All other results are collected from SwrCore counters via
+       * SwrGetStats. This returns immediately, but results are later filled
+       * in by the backend.  Fence status is the only indication of
+       * completion.  */
+      SwrGetStats(ctx->swrContext, &result->core);
+
+      if (!pq->fence) {
+         struct swr_screen *screen = swr_screen(pipe->screen);
+         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
+      }
+      swr_fence_submit(ctx, pq->fence);
 
-   /* XXX, Should turn this into a fence callback and skip the stall */
-   SwrGetStats(ctx->swrContext, &swr_stats);
-   /* SwrGetStats returns immediately, wait for collection */
-   SwrWaitForIdle(ctx->swrContext);
+      /* Only change stat collection if there are no active queries */
+      if (ctx->active_queries == 0)
+         SwrEnableStats(ctx->swrContext, enable_stats);
 
-   switch (pq->type) {
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-      result->u64 = swr_stats.DepthPassCount;
-      break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-      result->u64 = swr_stats.IaPrimitives;
-      break;
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-      result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
-      break;
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
-      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
-      so_stats->num_primitives_written =
-         swr_stats.SoNumPrimsWritten[pq->index];
-      so_stats->primitives_storage_needed =
-         swr_stats.SoPrimStorageNeeded[pq->index];
-   } break;
-   case PIPE_QUERY_PIPELINE_STATISTICS: {
-      struct pipe_query_data_pipeline_statistics *p_stats =
-         &result->pipeline_statistics;
-      p_stats->ia_vertices = swr_stats.IaVertices;
-      p_stats->ia_primitives = swr_stats.IaPrimitives;
-      p_stats->vs_invocations = swr_stats.VsInvocations;
-      p_stats->gs_invocations = swr_stats.GsInvocations;
-      p_stats->gs_primitives = swr_stats.GsPrimitives;
-      p_stats->c_invocations = swr_stats.CPrimitives;
-      p_stats->c_primitives = swr_stats.CPrimitives;
-      p_stats->ps_invocations = swr_stats.PsInvocations;
-      p_stats->hs_invocations = swr_stats.HsInvocations;
-      p_stats->ds_invocations = swr_stats.DsInvocations;
-      p_stats->cs_invocations = swr_stats.CsInvocations;
-   } break;
-   default:
-      assert(0 && "Unsupported query");
       break;
    }
-
-   /* Only change stat collection if there are no active queries */
-   if (ctx->active_queries == 0)
-      SwrEnableStats(ctx->swrContext, enable_stats);
 }
 
 
@@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
                      boolean wait,
                      union pipe_query_result *result)
 {
-   struct swr_context *ctx = swr_context(pipe);
    struct swr_query *pq = swr_query(q);
+   struct swr_query_result *start = &pq->start;
+   struct swr_query_result *end = &pq->end;
+   unsigned index = pq->index;
 
    if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(ctx, pq->fence);
-         if (!wait)
-            return FALSE;
-         swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
+      if (!wait && !swr_is_fence_done(pq->fence))
+         return FALSE;
+
+      swr_fence_finish(pipe->screen, pq->fence, 0);
       swr_fence_reference(pipe->screen, &pq->fence, NULL);
    }
 
@@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
    switch (pq->type) {
    /* Booleans */
    case PIPE_QUERY_OCCLUSION_PREDICATE:
-      result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
+      result->b = end->core.DepthPassCount != start->core.DepthPassCount;
       break;
    case PIPE_QUERY_GPU_FINISHED:
-      result->b = pq->end.b;
+      result->b = TRUE;
       break;
    /* Counters */
    case PIPE_QUERY_OCCLUSION_COUNTER:
+      result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
+      break;
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
+      result->u64 = end->timestamp - start->timestamp;
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
+      result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
    case PIPE_QUERY_PRIMITIVES_EMITTED:
-      result->u64 = pq->end.u64 - pq->start.u64;
+      result->u64 = end->core.SoNumPrimsWritten[index]
+         - start->core.SoNumPrimsWritten[index];
       break;
    /* Structures */
    case PIPE_QUERY_SO_STATISTICS: {
       struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
-      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
-      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
       so_stats->num_primitives_written =
-         end->num_primitives_written - start->num_primitives_written;
+         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
       so_stats->primitives_storage_needed =
-         end->primitives_storage_needed - start->primitives_storage_needed;
+         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
    } break;
-   case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
       /* os_get_time_nano returns nanoseconds */
       result->timestamp_disjoint.frequency = UINT64_C(1000000000);
       result->timestamp_disjoint.disjoint = FALSE;
-   } break;
+      break;
    case PIPE_QUERY_PIPELINE_STATISTICS: {
       struct pipe_query_data_pipeline_statistics *p_stats =
          &result->pipeline_statistics;
-      struct pipe_query_data_pipeline_statistics *start =
-         &pq->start.pipeline_statistics;
-      struct pipe_query_data_pipeline_statistics *end =
-         &pq->end.pipeline_statistics;
-      p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
-      p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
-      p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
-      p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
-      p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
-      p_stats->c_invocations = end->c_invocations - start->c_invocations;
-      p_stats->c_primitives = end->c_primitives - start->c_primitives;
-      p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
-      p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
-      p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
-      p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
-   } break;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
+      p_stats->ia_vertices = end->IaVertices - start->IaVertices;
+      p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
+      p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
+      p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
+      p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
+      p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
+      p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
+      p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
+      p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
+      p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
+      p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
+    } break;
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
-      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
-      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
       uint64_t num_primitives_written =
-         end->num_primitives_written - start->num_primitives_written;
+         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
       uint64_t primitives_storage_needed =
-         end->primitives_storage_needed - start->primitives_storage_needed;
+         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
       result->b = num_primitives_written > primitives_storage_needed;
-   } break;
+   }
+      break;
    default:
       assert(0 && "Unsupported query");
       break;
@@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
    struct swr_context *ctx = swr_context(pipe);
    struct swr_query *pq = swr_query(q);
 
+   assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
+
    /* Initialize Results */
    memset(&pq->start, 0, sizeof(pq->start));
    memset(&pq->end, 0, sizeof(pq->end));
@@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 
    /* override start timestamp to 0 for TIMESTAMP query */
    if (pq->type == PIPE_QUERY_TIMESTAMP)
-      pq->start.u64 = 0;
+      pq->start.timestamp = 0;
 
    return true;
 }
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index 836d07b..0ab034d 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -27,13 +27,18 @@
 
 #include <limits.h>
 
+struct swr_query_result {
+   SWR_STATS core;
+   uint64_t timestamp;
+};
+
 struct swr_query {
    unsigned type; /* PIPE_QUERY_* */
    unsigned index;
 
-   union pipe_query_result *result;
-   union pipe_query_result start;
-   union pipe_query_result end;
+   struct swr_query_result *result;
+   struct swr_query_result start;
+   struct swr_query_result end;
 
    struct pipe_fence_handle *fence;
 
-- 
1.7.1



More information about the mesa-dev mailing list