[Mesa-dev] [PATCH] swr: Removed stalling SwrWaitForIdle from queries.

Kyriazis, George george.kyriazis at intel.com
Fri Sep 30 16:51:26 UTC 2016


Reviewed-by: George Kyriazis <george.kyriazis at intel.com<mailto:george.kyriazis at intel.com>>



From: Bruce Cherniak <bruce.cherniak at intel.com<mailto:bruce.cherniak at intel.com>>
Subject: [PATCH] swr: Removed stalling SwrWaitForIdle from queries.
Date: September 27, 2016 at 1:27:08 PM CDT
To: <mesa-dev at lists.freedesktop.org<mailto:mesa-dev at lists.freedesktop.org>>
Cc: Bruce Cherniak <bruce.cherniak at intel.com<mailto:bruce.cherniak at intel.com>>

Previous fundamental change in stats gathering added a temporary
SwrWaitForIdle to begin_query and end_query.  Code has been reworked to
remove stall.
---
src/gallium/drivers/swr/swr_context.cpp |  33 +++----
src/gallium/drivers/swr/swr_context.h   |  11 ++-
src/gallium/drivers/swr/swr_query.cpp   | 152 +++++++++++++-------------------
src/gallium/drivers/swr/swr_query.h     |  10 +--
4 files changed, 87 insertions(+), 119 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index 15e60cd..cbc60e0 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -24,6 +24,7 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_atomic.h"

extern "C" {
#include "util/u_transfer.h"
@@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
   if (!pDC)
      return;

-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;

-   SWR_STATS *pSwrStats = &ctx->stats;
+   SWR_STATS *pSwrStats = &pqr->core;

   pSwrStats->DepthPassCount += pStats->DepthPassCount;
   pSwrStats->PsInvocations += pStats->PsInvocations;
@@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
   if (!pDC)
      return;

-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;

-   SWR_STATS_FE *pSwrStats = &ctx->statsFE;
-   pSwrStats->IaVertices += pStats->IaVertices;
-   pSwrStats->IaPrimitives += pStats->IaPrimitives;
-   pSwrStats->VsInvocations += pStats->VsInvocations;
-   pSwrStats->HsInvocations += pStats->HsInvocations;
-   pSwrStats->DsInvocations += pStats->DsInvocations;
-   pSwrStats->GsInvocations += pStats->GsInvocations;
-   pSwrStats->CInvocations += pStats->CInvocations;
-   pSwrStats->CPrimitives += pStats->CPrimitives;
-   pSwrStats->GsPrimitives += pStats->GsPrimitives;
+   SWR_STATS_FE *pSwrStats = &pqr->coreFE;
+   p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
+   p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
+   p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
+   p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
+   p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
+   p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
+   p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
+   p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
+   p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);

   for (unsigned i = 0; i < 4; i++) {
-      pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];
-      pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];
+      p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
+            pStats->SoPrimStorageNeeded[i]);
+      p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
+            pStats->SoNumPrimsWritten[i]);
   }
}

diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index 6854d69..eecfe0d 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -92,7 +92,7 @@ struct swr_draw_context {
   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];

   SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
-   void *swr_ctx;
+   void *pStats;
};

/* gen_llvm_types FINI */
@@ -159,9 +159,6 @@ struct swr_context {
   /* SWR private state - draw context */
   struct swr_draw_context swrDC;

-   SWR_STATS stats;
-   SWR_STATS_FE statsFE;
-
   unsigned dirty; /**< Mask of SWR_NEW_x flags */
};

@@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe)
}

static INLINE void
-swr_update_draw_context(struct swr_context *ctx)
+swr_update_draw_context(struct swr_context *ctx,
+      struct swr_query_result *pqr = nullptr)
{
   swr_draw_context *pDC =
      (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
-   ctx->swrDC.swr_ctx = ctx;
+   if (pqr)
+      ctx->swrDC.pStats = pqr;
   memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
}

diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index c51c529..8bb0b16 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
}


-static void
-swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
-{
-   struct swr_context *ctx = swr_context(pipe);
-
-   assert(pq->result);
-   struct swr_query_result *result = pq->result;
-   boolean enable_stats = pq->enable_stats;
-
-   /* A few results don't require the core, so don't involve it */
-   switch (pq->type) {
-   case PIPE_QUERY_TIMESTAMP:
-   case PIPE_QUERY_TIME_ELAPSED:
-      result->timestamp = swr_get_timestamp(pipe->screen);
-      break;
-   case PIPE_QUERY_TIMESTAMP_DISJOINT:
-   case PIPE_QUERY_GPU_FINISHED:
-      /* nothing to do here */
-      break;
-   default:
-      /* TODO: should fence instead of stalling pipeline */
-      SwrWaitForIdle(ctx->swrContext);
-      memcpy(&result->core, &ctx->stats, sizeof(result->core));
-      memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));
-
-#if 0
-      if (!pq->fence) {
-         struct swr_screen *screen = swr_screen(pipe->screen);
-         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
-      }
-      swr_fence_submit(ctx, pq->fence);
-#endif
-
-      /* Only change stat collection if there are no active queries */
-      if (ctx->active_queries == 0)
-         SwrEnableStats(ctx->swrContext, enable_stats);
-
-      break;
-   }
-}
-
-
static boolean
swr_get_query_result(struct pipe_context *pipe,
                     struct pipe_query *q,
@@ -120,8 +78,6 @@ swr_get_query_result(struct pipe_context *pipe,
                     union pipe_query_result *result)
{
   struct swr_query *pq = swr_query(q);
-   struct swr_query_result *start = &pq->start;
-   struct swr_query_result *end = &pq->end;
   unsigned index = pq->index;

   if (pq->fence) {
@@ -132,40 +88,37 @@ swr_get_query_result(struct pipe_context *pipe,
      swr_fence_reference(pipe->screen, &pq->fence, NULL);
   }

-   /* XXX: Need to handle counter rollover */
-
+   /* All values are reset to 0 at swr_begin_query, except starting timestamp.
+    * Counters become simply end values.  */
   switch (pq->type) {
   /* Booleans */
   case PIPE_QUERY_OCCLUSION_PREDICATE:
-      result->b = end->core.DepthPassCount != start->core.DepthPassCount;
+      result->b = pq->result.core.DepthPassCount != 0;
      break;
   case PIPE_QUERY_GPU_FINISHED:
      result->b = TRUE;
      break;
   /* Counters */
   case PIPE_QUERY_OCCLUSION_COUNTER:
-      result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
+      result->u64 = pq->result.core.DepthPassCount;
      break;
   case PIPE_QUERY_TIMESTAMP:
   case PIPE_QUERY_TIME_ELAPSED:
-      result->u64 = end->timestamp - start->timestamp;
+      result->u64 = pq->result.timestamp_end - pq->result.timestamp_start;
      break;
   case PIPE_QUERY_PRIMITIVES_GENERATED:
-      result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;
+      result->u64 = pq->result.coreFE.IaPrimitives;
      break;
   case PIPE_QUERY_PRIMITIVES_EMITTED:
-      result->u64 = end->coreFE.SoNumPrimsWritten[index]
-         - start->coreFE.SoNumPrimsWritten[index];
+      result->u64 = pq->result.coreFE.SoNumPrimsWritten[index];
      break;
   /* Structures */
   case PIPE_QUERY_SO_STATISTICS: {
      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
-      struct SWR_STATS_FE *start = &pq->start.coreFE;
-      struct SWR_STATS_FE *end = &pq->end.coreFE;
      so_stats->num_primitives_written =
-         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
+         pq->result.coreFE.SoNumPrimsWritten[index];
      so_stats->primitives_storage_needed =
-         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
+         pq->result.coreFE.SoPrimStorageNeeded[index];
   } break;
   case PIPE_QUERY_TIMESTAMP_DISJOINT:
      /* os_get_time_nano returns nanoseconds */
@@ -175,29 +128,23 @@ swr_get_query_result(struct pipe_context *pipe,
   case PIPE_QUERY_PIPELINE_STATISTICS: {
      struct pipe_query_data_pipeline_statistics *p_stats =
         &result->pipeline_statistics;
-      struct SWR_STATS *start = &pq->start.core;
-      struct SWR_STATS *end = &pq->end.core;
-      struct SWR_STATS_FE *startFE = &pq->start.coreFE;
-      struct SWR_STATS_FE *endFE = &pq->end.coreFE;
-      p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;
-      p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;
-      p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;
-      p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;
-      p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;
-      p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;
-      p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;
-      p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
-      p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;
-      p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;
-      p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
+      p_stats->ia_vertices = pq->result.coreFE.IaVertices;
+      p_stats->ia_primitives = pq->result.coreFE.IaPrimitives;
+      p_stats->vs_invocations = pq->result.coreFE.VsInvocations;
+      p_stats->gs_invocations = pq->result.coreFE.GsInvocations;
+      p_stats->gs_primitives = pq->result.coreFE.GsPrimitives;
+      p_stats->c_invocations = pq->result.coreFE.CPrimitives;
+      p_stats->c_primitives = pq->result.coreFE.CPrimitives;
+      p_stats->ps_invocations = pq->result.core.PsInvocations;
+      p_stats->hs_invocations = pq->result.coreFE.HsInvocations;
+      p_stats->ds_invocations = pq->result.coreFE.DsInvocations;
+      p_stats->cs_invocations = pq->result.core.CsInvocations;
    } break;
   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
-      struct SWR_STATS_FE *start = &pq->start.coreFE;
-      struct SWR_STATS_FE *end = &pq->end.coreFE;
      uint64_t num_primitives_written =
-         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
+         pq->result.coreFE.SoNumPrimsWritten[index];
      uint64_t primitives_storage_needed =
-         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
+         pq->result.coreFE.SoPrimStorageNeeded[index];
      result->b = num_primitives_written > primitives_storage_needed;
   }
      break;
@@ -215,21 +162,27 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
   struct swr_context *ctx = swr_context(pipe);
   struct swr_query *pq = swr_query(q);

-   assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
-
   /* Initialize Results */
-   memset(&pq->start, 0, sizeof(pq->start));
-   memset(&pq->end, 0, sizeof(pq->end));
+   memset(&pq->result, 0, sizeof(pq->result));
+   switch (pq->type) {
+   case PIPE_QUERY_TIMESTAMP:
+      /* nothing to do */
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+      pq->result.timestamp_start = swr_get_timestamp(pipe->screen);
+      break;
+   default:
+      /* Core counters required.  Update draw context with location to
+       * store results. */
+      swr_update_draw_context(ctx, &pq->result);

-   /* Gather start stats and enable SwrCore counters */
-   pq->result = &pq->start;
-   pq->enable_stats = TRUE;
-   swr_gather_stats(pipe, pq);
-   ctx->active_queries++;
+      /* Only change stat collection if there are no active queries */
+      if (ctx->active_queries == 0)
+         SwrEnableStats(ctx->swrContext, TRUE);
+      break;
+   }

-   /* override start timestamp to 0 for TIMESTAMP query */
-   if (pq->type == PIPE_QUERY_TIMESTAMP)
-      pq->start.timestamp = 0;
+   ctx->active_queries++;

   return true;
}
@@ -244,10 +197,27 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
          && "swr_end_query, there are no active queries!");
   ctx->active_queries--;

-   /* Gather end stats and disable SwrCore counters */
-   pq->result = &pq->end;
-   pq->enable_stats = FALSE;
-   swr_gather_stats(pipe, pq);
+   switch (pq->type) {
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIME_ELAPSED:
+      pq->result.timestamp_end = swr_get_timestamp(pipe->screen);
+      break;
+   default:
+      /* Stats are updated asynchronously, a fence is used to signal
+       * completion. */
+      if (!pq->fence) {
+         struct swr_screen *screen = swr_screen(pipe->screen);
+         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
+      }
+      swr_fence_submit(ctx, pq->fence);
+
+      /* Only change stat collection if there are no active queries */
+      if (ctx->active_queries == 0)
+         SwrEnableStats(ctx->swrContext, FALSE);
+
+      break;
+   }
+
   return true;
}

diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index 931d687..c5160ce 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -30,20 +30,16 @@
struct swr_query_result {
   SWR_STATS core;
   SWR_STATS_FE coreFE;
-   uint64_t timestamp;
+   uint64_t timestamp_start;
+   uint64_t timestamp_end;
};

struct swr_query {
   unsigned type; /* PIPE_QUERY_* */
   unsigned index;

-   struct swr_query_result *result;
-   struct swr_query_result start;
-   struct swr_query_result end;
-
+   struct swr_query_result result;
   struct pipe_fence_handle *fence;
-
-   boolean enable_stats;
};

extern void swr_query_init(struct pipe_context *pipe);
--
2.7.4

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160930/ae5da354/attachment-0001.html>


More information about the mesa-dev mailing list