<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:Helvetica;
        panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman",serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
span.EmailStyle17
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="blue" vlink="purple">
<div class="WordSection1">
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D">Reviewed-by: George Kyriazis <<a href="mailto:george.kyriazis@intel.com">george.kyriazis@intel.com</a>><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
<div>
<div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">From: </span>
</b><span style="font-family:"Helvetica",sans-serif">Bruce Cherniak <<a href="mailto:bruce.cherniak@intel.com">bruce.cherniak@intel.com</a>></span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Subject: [PATCH] swr: Removed stalling SwrWaitForIdle from queries.</span></b><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Date: </span>
</b><span style="font-family:"Helvetica",sans-serif">September 27, 2016 at 1:27:08 PM CDT</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">To: </span>
</b><span style="font-family:"Helvetica",sans-serif"><<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a>></span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Cc: </span>
</b><span style="font-family:"Helvetica",sans-serif">Bruce Cherniak <<a href="mailto:bruce.cherniak@intel.com">bruce.cherniak@intel.com</a>></span><o:p></o:p></p>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt">Previous fundamental change in stats gathering added a temporary<br>
SwrWaitForIdle to begin_query and end_query.  Code has been reworked to<br>
remove stall.<br>
---<br>
src/gallium/drivers/swr/swr_context.cpp |  33 +++----<br>
src/gallium/drivers/swr/swr_context.h   |  11 ++-<br>
src/gallium/drivers/swr/swr_query.cpp   | 152 +++++++++++++-------------------<br>
src/gallium/drivers/swr/swr_query.h     |  10 +--<br>
4 files changed, 87 insertions(+), 119 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp<br>
index 15e60cd..cbc60e0 100644<br>
--- a/src/gallium/drivers/swr/swr_context.cpp<br>
+++ b/src/gallium/drivers/swr/swr_context.cpp<br>
@@ -24,6 +24,7 @@<br>
#include "util/u_memory.h"<br>
#include "util/u_inlines.h"<br>
#include "util/u_format.h"<br>
+#include "util/u_atomic.h"<br>
<br>
extern "C" {<br>
#include "util/u_transfer.h"<br>
@@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)<br>
   if (!pDC)<br>
      return;<br>
<br>
-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;<br>
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;<br>
<br>
-   SWR_STATS *pSwrStats = &ctx->stats;<br>
+   SWR_STATS *pSwrStats = &pqr->core;<br>
<br>
   pSwrStats->DepthPassCount += pStats->DepthPassCount;<br>
   pSwrStats->PsInvocations += pStats->PsInvocations;<br>
@@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)<br>
   if (!pDC)<br>
      return;<br>
<br>
-   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;<br>
+   struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;<br>
<br>
-   SWR_STATS_FE *pSwrStats = &ctx->statsFE;<br>
-   pSwrStats->IaVertices += pStats->IaVertices;<br>
-   pSwrStats->IaPrimitives += pStats->IaPrimitives;<br>
-   pSwrStats->VsInvocations += pStats->VsInvocations;<br>
-   pSwrStats->HsInvocations += pStats->HsInvocations;<br>
-   pSwrStats->DsInvocations += pStats->DsInvocations;<br>
-   pSwrStats->GsInvocations += pStats->GsInvocations;<br>
-   pSwrStats->CInvocations += pStats->CInvocations;<br>
-   pSwrStats->CPrimitives += pStats->CPrimitives;<br>
-   pSwrStats->GsPrimitives += pStats->GsPrimitives;<br>
+   SWR_STATS_FE *pSwrStats = &pqr->coreFE;<br>
+   p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);<br>
+   p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);<br>
+   p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);<br>
+   p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);<br>
+   p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);<br>
+   p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);<br>
+   p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);<br>
+   p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);<br>
+   p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);<br>
<br>
   for (unsigned i = 0; i < 4; i++) {<br>
-      pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];<br>
-      pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];<br>
+      p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],<br>
+            pStats->SoPrimStorageNeeded[i]);<br>
+      p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],<br>
+            pStats->SoNumPrimsWritten[i]);<br>
   }<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h<br>
index 6854d69..eecfe0d 100644<br>
--- a/src/gallium/drivers/swr/swr_context.h<br>
+++ b/src/gallium/drivers/swr/swr_context.h<br>
@@ -92,7 +92,7 @@ struct swr_draw_context {<br>
   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];<br>
<br>
   SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];<br>
-   void *swr_ctx;<br>
+   void *pStats;<br>
};<br>
<br>
/* gen_llvm_types FINI */<br>
@@ -159,9 +159,6 @@ struct swr_context {<br>
   /* SWR private state - draw context */<br>
   struct swr_draw_context swrDC;<br>
<br>
-   SWR_STATS stats;<br>
-   SWR_STATS_FE statsFE;<br>
-<br>
   unsigned dirty; /**< Mask of SWR_NEW_x flags */<br>
};<br>
<br>
@@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe)<br>
}<br>
<br>
static INLINE void<br>
-swr_update_draw_context(struct swr_context *ctx)<br>
+swr_update_draw_context(struct swr_context *ctx,<br>
+      struct swr_query_result *pqr = nullptr)<br>
{<br>
   swr_draw_context *pDC =<br>
      (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);<br>
-   ctx->swrDC.swr_ctx = ctx;<br>
+   if (pqr)<br>
+      ctx->swrDC.pStats = pqr;<br>
   memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp<br>
index c51c529..8bb0b16 100644<br>
--- a/src/gallium/drivers/swr/swr_query.cpp<br>
+++ b/src/gallium/drivers/swr/swr_query.cpp<br>
@@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)<br>
}<br>
<br>
<br>
-static void<br>
-swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)<br>
-{<br>
-   struct swr_context *ctx = swr_context(pipe);<br>
-<br>
-   assert(pq->result);<br>
-   struct swr_query_result *result = pq->result;<br>
-   boolean enable_stats = pq->enable_stats;<br>
-<br>
-   /* A few results don't require the core, so don't involve it */<br>
-   switch (pq->type) {<br>
-   case PIPE_QUERY_TIMESTAMP:<br>
-   case PIPE_QUERY_TIME_ELAPSED:<br>
-      result->timestamp = swr_get_timestamp(pipe->screen);<br>
-      break;<br>
-   case PIPE_QUERY_TIMESTAMP_DISJOINT:<br>
-   case PIPE_QUERY_GPU_FINISHED:<br>
-      /* nothing to do here */<br>
-      break;<br>
-   default:<br>
-      /* TODO: should fence instead of stalling pipeline */<br>
-      SwrWaitForIdle(ctx->swrContext);<br>
-      memcpy(&result->core, &ctx->stats, sizeof(result->core));<br>
-      memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));<br>
-<br>
-#if 0<br>
-      if (!pq->fence) {<br>
-         struct swr_screen *screen = swr_screen(pipe->screen);<br>
-         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);<br>
-      }<br>
-      swr_fence_submit(ctx, pq->fence);<br>
-#endif<br>
-<br>
-      /* Only change stat collection if there are no active queries */<br>
-      if (ctx->active_queries == 0)<br>
-         SwrEnableStats(ctx->swrContext, enable_stats);<br>
-<br>
-      break;<br>
-   }<br>
-}<br>
-<br>
-<br>
static boolean<br>
swr_get_query_result(struct pipe_context *pipe,<br>
                     struct pipe_query *q,<br>
@@ -120,8 +78,6 @@ swr_get_query_result(struct pipe_context *pipe,<br>
                     union pipe_query_result *result)<br>
{<br>
   struct swr_query *pq = swr_query(q);<br>
-   struct swr_query_result *start = &pq->start;<br>
-   struct swr_query_result *end = &pq->end;<br>
   unsigned index = pq->index;<br>
<br>
   if (pq->fence) {<br>
@@ -132,40 +88,37 @@ swr_get_query_result(struct pipe_context *pipe,<br>
      swr_fence_reference(pipe->screen, &pq->fence, NULL);<br>
   }<br>
<br>
-   /* XXX: Need to handle counter rollover */<br>
-<br>
+   /* All values are reset to 0 at swr_begin_query, except starting timestamp.<br>
+    * Counters become simply end values.  */<br>
   switch (pq->type) {<br>
   /* Booleans */<br>
   case PIPE_QUERY_OCCLUSION_PREDICATE:<br>
-      result->b = end->core.DepthPassCount != start->core.DepthPassCount;<br>
+      result->b = pq->result.core.DepthPassCount != 0;<br>
      break;<br>
   case PIPE_QUERY_GPU_FINISHED:<br>
      result->b = TRUE;<br>
      break;<br>
   /* Counters */<br>
   case PIPE_QUERY_OCCLUSION_COUNTER:<br>
-      result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;<br>
+      result->u64 = pq->result.core.DepthPassCount;<br>
      break;<br>
   case PIPE_QUERY_TIMESTAMP:<br>
   case PIPE_QUERY_TIME_ELAPSED:<br>
-      result->u64 = end->timestamp - start->timestamp;<br>
+      result->u64 = pq->result.timestamp_end - pq->result.timestamp_start;<br>
      break;<br>
   case PIPE_QUERY_PRIMITIVES_GENERATED:<br>
-      result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;<br>
+      result->u64 = pq->result.coreFE.IaPrimitives;<br>
      break;<br>
   case PIPE_QUERY_PRIMITIVES_EMITTED:<br>
-      result->u64 = end->coreFE.SoNumPrimsWritten[index]<br>
-         - start->coreFE.SoNumPrimsWritten[index];<br>
+      result->u64 = pq->result.coreFE.SoNumPrimsWritten[index];<br>
      break;<br>
   /* Structures */<br>
   case PIPE_QUERY_SO_STATISTICS: {<br>
      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;<br>
-      struct SWR_STATS_FE *start = &pq->start.coreFE;<br>
-      struct SWR_STATS_FE *end = &pq->end.coreFE;<br>
      so_stats->num_primitives_written =<br>
-         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];<br>
+         pq->result.coreFE.SoNumPrimsWritten[index];<br>
      so_stats->primitives_storage_needed =<br>
-         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];<br>
+         pq->result.coreFE.SoPrimStorageNeeded[index];<br>
   } break;<br>
   case PIPE_QUERY_TIMESTAMP_DISJOINT:<br>
      /* os_get_time_nano returns nanoseconds */<br>
@@ -175,29 +128,23 @@ swr_get_query_result(struct pipe_context *pipe,<br>
   case PIPE_QUERY_PIPELINE_STATISTICS: {<br>
      struct pipe_query_data_pipeline_statistics *p_stats =<br>
         &result->pipeline_statistics;<br>
-      struct SWR_STATS *start = &pq->start.core;<br>
-      struct SWR_STATS *end = &pq->end.core;<br>
-      struct SWR_STATS_FE *startFE = &pq->start.coreFE;<br>
-      struct SWR_STATS_FE *endFE = &pq->end.coreFE;<br>
-      p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;<br>
-      p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;<br>
-      p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;<br>
-      p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;<br>
-      p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;<br>
-      p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;<br>
-      p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;<br>
-      p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;<br>
-      p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;<br>
-      p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;<br>
-      p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;<br>
+      p_stats->ia_vertices = pq->result.coreFE.IaVertices;<br>
+      p_stats->ia_primitives = pq->result.coreFE.IaPrimitives;<br>
+      p_stats->vs_invocations = pq->result.coreFE.VsInvocations;<br>
+      p_stats->gs_invocations = pq->result.coreFE.GsInvocations;<br>
+      p_stats->gs_primitives = pq->result.coreFE.GsPrimitives;<br>
+      p_stats->c_invocations = pq->result.coreFE.CPrimitives;<br>
+      p_stats->c_primitives = pq->result.coreFE.CPrimitives;<br>
+      p_stats->ps_invocations = pq->result.core.PsInvocations;<br>
+      p_stats->hs_invocations = pq->result.coreFE.HsInvocations;<br>
+      p_stats->ds_invocations = pq->result.coreFE.DsInvocations;<br>
+      p_stats->cs_invocations = pq->result.core.CsInvocations;<br>
    } break;<br>
   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {<br>
-      struct SWR_STATS_FE *start = &pq->start.coreFE;<br>
-      struct SWR_STATS_FE *end = &pq->end.coreFE;<br>
      uint64_t num_primitives_written =<br>
-         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];<br>
+         pq->result.coreFE.SoNumPrimsWritten[index];<br>
      uint64_t primitives_storage_needed =<br>
-         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];<br>
+         pq->result.coreFE.SoPrimStorageNeeded[index];<br>
      result->b = num_primitives_written > primitives_storage_needed;<br>
   }<br>
      break;<br>
@@ -215,21 +162,27 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)<br>
   struct swr_context *ctx = swr_context(pipe);<br>
   struct swr_query *pq = swr_query(q);<br>
<br>
-   assert(!pq->enable_stats && "swr_begin_query: Query is already active!");<br>
-<br>
   /* Initialize Results */<br>
-   memset(&pq->start, 0, sizeof(pq->start));<br>
-   memset(&pq->end, 0, sizeof(pq->end));<br>
+   memset(&pq->result, 0, sizeof(pq->result));<br>
+   switch (pq->type) {<br>
+   case PIPE_QUERY_TIMESTAMP:<br>
+      /* nothing to do */<br>
+      break;<br>
+   case PIPE_QUERY_TIME_ELAPSED:<br>
+      pq->result.timestamp_start = swr_get_timestamp(pipe->screen);<br>
+      break;<br>
+   default:<br>
+      /* Core counters required.  Update draw context with location to<br>
+       * store results. */<br>
+      swr_update_draw_context(ctx, &pq->result);<br>
<br>
-   /* Gather start stats and enable SwrCore counters */<br>
-   pq->result = &pq->start;<br>
-   pq->enable_stats = TRUE;<br>
-   swr_gather_stats(pipe, pq);<br>
-   ctx->active_queries++;<br>
+      /* Only change stat collection if there are no active queries */<br>
+      if (ctx->active_queries == 0)<br>
+         SwrEnableStats(ctx->swrContext, TRUE);<br>
+      break;<br>
+   }<br>
<br>
-   /* override start timestamp to 0 for TIMESTAMP query */<br>
-   if (pq->type == PIPE_QUERY_TIMESTAMP)<br>
-      pq->start.timestamp = 0;<br>
+   ctx->active_queries++;<br>
<br>
   return true;<br>
}<br>
@@ -244,10 +197,27 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q)<br>
          && "swr_end_query, there are no active queries!");<br>
   ctx->active_queries--;<br>
<br>
-   /* Gather end stats and disable SwrCore counters */<br>
-   pq->result = &pq->end;<br>
-   pq->enable_stats = FALSE;<br>
-   swr_gather_stats(pipe, pq);<br>
+   switch (pq->type) {<br>
+   case PIPE_QUERY_TIMESTAMP:<br>
+   case PIPE_QUERY_TIME_ELAPSED:<br>
+      pq->result.timestamp_end = swr_get_timestamp(pipe->screen);<br>
+      break;<br>
+   default:<br>
+      /* Stats are updated asynchronously, a fence is used to signal<br>
+       * completion. */<br>
+      if (!pq->fence) {<br>
+         struct swr_screen *screen = swr_screen(pipe->screen);<br>
+         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);<br>
+      }<br>
+      swr_fence_submit(ctx, pq->fence);<br>
+<br>
+      /* Only change stat collection if there are no active queries */<br>
+      if (ctx->active_queries == 0)<br>
+         SwrEnableStats(ctx->swrContext, FALSE);<br>
+<br>
+      break;<br>
+   }<br>
+<br>
   return true;<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h<br>
index 931d687..c5160ce 100644<br>
--- a/src/gallium/drivers/swr/swr_query.h<br>
+++ b/src/gallium/drivers/swr/swr_query.h<br>
@@ -30,20 +30,16 @@<br>
struct swr_query_result {<br>
   SWR_STATS core;<br>
   SWR_STATS_FE coreFE;<br>
-   uint64_t timestamp;<br>
+   uint64_t timestamp_start;<br>
+   uint64_t timestamp_end;<br>
};<br>
<br>
struct swr_query {<br>
   unsigned type; /* PIPE_QUERY_* */<br>
   unsigned index;<br>
<br>
-   struct swr_query_result *result;<br>
-   struct swr_query_result start;<br>
-   struct swr_query_result end;<br>
-<br>
+   struct swr_query_result result;<br>
   struct pipe_fence_handle *fence;<br>
-<br>
-   boolean enable_stats;<br>
};<br>
<br>
extern void swr_query_init(struct pipe_context *pipe);<br>
-- <br>
2.7.4<o:p></o:p></p>
</div>
</div>
</blockquote>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
</div>
</body>
</html>