<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
{font-family:Helvetica;
panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman",serif;}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-reply;
font-family:"Calibri",sans-serif;
color:#1F497D;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page WordSection1
{size:8.5in 11.0in;
margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="blue" vlink="purple">
<div class="WordSection1">
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D">Reviewed-by: George Kyriazis <<a href="mailto:george.kyriazis@intel.com">george.kyriazis@intel.com</a>><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
<div>
<div>
<blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">From: </span>
</b><span style="font-family:"Helvetica",sans-serif">Bruce Cherniak <<a href="mailto:bruce.cherniak@intel.com">bruce.cherniak@intel.com</a>></span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Subject: [PATCH] swr: Removed stalling SwrWaitForIdle from queries.</span></b><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Date: </span>
</b><span style="font-family:"Helvetica",sans-serif">September 27, 2016 at 1:27:08 PM CDT</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">To: </span>
</b><span style="font-family:"Helvetica",sans-serif"><<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a>></span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><b><span style="font-family:"Helvetica",sans-serif">Cc: </span>
</b><span style="font-family:"Helvetica",sans-serif">Bruce Cherniak <<a href="mailto:bruce.cherniak@intel.com">bruce.cherniak@intel.com</a>></span><o:p></o:p></p>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt">Previous fundamental change in stats gathering added a temporary<br>
SwrWaitForIdle to begin_query and end_query. Code has been reworked to<br>
remove stall.<br>
---<br>
src/gallium/drivers/swr/swr_context.cpp | 33 +++----<br>
src/gallium/drivers/swr/swr_context.h | 11 ++-<br>
src/gallium/drivers/swr/swr_query.cpp | 152 +++++++++++++-------------------<br>
src/gallium/drivers/swr/swr_query.h | 10 +--<br>
4 files changed, 87 insertions(+), 119 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp<br>
index 15e60cd..cbc60e0 100644<br>
--- a/src/gallium/drivers/swr/swr_context.cpp<br>
+++ b/src/gallium/drivers/swr/swr_context.cpp<br>
@@ -24,6 +24,7 @@<br>
#include "util/u_memory.h"<br>
#include "util/u_inlines.h"<br>
#include "util/u_format.h"<br>
+#include "util/u_atomic.h"<br>
<br>
extern "C" {<br>
#include "util/u_transfer.h"<br>
@@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)<br>
if (!pDC)<br>
return;<br>
<br>
- struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;<br>
+ struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;<br>
<br>
- SWR_STATS *pSwrStats = &ctx->stats;<br>
+ SWR_STATS *pSwrStats = &pqr->core;<br>
<br>
pSwrStats->DepthPassCount += pStats->DepthPassCount;<br>
pSwrStats->PsInvocations += pStats->PsInvocations;<br>
@@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)<br>
if (!pDC)<br>
return;<br>
<br>
- struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;<br>
+ struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;<br>
<br>
- SWR_STATS_FE *pSwrStats = &ctx->statsFE;<br>
- pSwrStats->IaVertices += pStats->IaVertices;<br>
- pSwrStats->IaPrimitives += pStats->IaPrimitives;<br>
- pSwrStats->VsInvocations += pStats->VsInvocations;<br>
- pSwrStats->HsInvocations += pStats->HsInvocations;<br>
- pSwrStats->DsInvocations += pStats->DsInvocations;<br>
- pSwrStats->GsInvocations += pStats->GsInvocations;<br>
- pSwrStats->CInvocations += pStats->CInvocations;<br>
- pSwrStats->CPrimitives += pStats->CPrimitives;<br>
- pSwrStats->GsPrimitives += pStats->GsPrimitives;<br>
+ SWR_STATS_FE *pSwrStats = &pqr->coreFE;<br>
+ p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);<br>
+ p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);<br>
+ p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);<br>
+ p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);<br>
+ p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);<br>
+ p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);<br>
+ p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);<br>
+ p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);<br>
+ p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);<br>
<br>
for (unsigned i = 0; i < 4; i++) {<br>
- pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];<br>
- pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];<br>
+ p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],<br>
+ pStats->SoPrimStorageNeeded[i]);<br>
+ p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],<br>
+ pStats->SoNumPrimsWritten[i]);<br>
}<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h<br>
index 6854d69..eecfe0d 100644<br>
--- a/src/gallium/drivers/swr/swr_context.h<br>
+++ b/src/gallium/drivers/swr/swr_context.h<br>
@@ -92,7 +92,7 @@ struct swr_draw_context {<br>
float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];<br>
<br>
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];<br>
- void *swr_ctx;<br>
+ void *pStats;<br>
};<br>
<br>
/* gen_llvm_types FINI */<br>
@@ -159,9 +159,6 @@ struct swr_context {<br>
/* SWR private state - draw context */<br>
struct swr_draw_context swrDC;<br>
<br>
- SWR_STATS stats;<br>
- SWR_STATS_FE statsFE;<br>
-<br>
unsigned dirty; /**< Mask of SWR_NEW_x flags */<br>
};<br>
<br>
@@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe)<br>
}<br>
<br>
static INLINE void<br>
-swr_update_draw_context(struct swr_context *ctx)<br>
+swr_update_draw_context(struct swr_context *ctx,<br>
+ struct swr_query_result *pqr = nullptr)<br>
{<br>
swr_draw_context *pDC =<br>
(swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);<br>
- ctx->swrDC.swr_ctx = ctx;<br>
+ if (pqr)<br>
+ ctx->swrDC.pStats = pqr;<br>
memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp<br>
index c51c529..8bb0b16 100644<br>
--- a/src/gallium/drivers/swr/swr_query.cpp<br>
+++ b/src/gallium/drivers/swr/swr_query.cpp<br>
@@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)<br>
}<br>
<br>
<br>
-static void<br>
-swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)<br>
-{<br>
- struct swr_context *ctx = swr_context(pipe);<br>
-<br>
- assert(pq->result);<br>
- struct swr_query_result *result = pq->result;<br>
- boolean enable_stats = pq->enable_stats;<br>
-<br>
- /* A few results don't require the core, so don't involve it */<br>
- switch (pq->type) {<br>
- case PIPE_QUERY_TIMESTAMP:<br>
- case PIPE_QUERY_TIME_ELAPSED:<br>
- result->timestamp = swr_get_timestamp(pipe->screen);<br>
- break;<br>
- case PIPE_QUERY_TIMESTAMP_DISJOINT:<br>
- case PIPE_QUERY_GPU_FINISHED:<br>
- /* nothing to do here */<br>
- break;<br>
- default:<br>
- /* TODO: should fence instead of stalling pipeline */<br>
- SwrWaitForIdle(ctx->swrContext);<br>
- memcpy(&result->core, &ctx->stats, sizeof(result->core));<br>
- memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));<br>
-<br>
-#if 0<br>
- if (!pq->fence) {<br>
- struct swr_screen *screen = swr_screen(pipe->screen);<br>
- swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);<br>
- }<br>
- swr_fence_submit(ctx, pq->fence);<br>
-#endif<br>
-<br>
- /* Only change stat collection if there are no active queries */<br>
- if (ctx->active_queries == 0)<br>
- SwrEnableStats(ctx->swrContext, enable_stats);<br>
-<br>
- break;<br>
- }<br>
-}<br>
-<br>
-<br>
static boolean<br>
swr_get_query_result(struct pipe_context *pipe,<br>
struct pipe_query *q,<br>
@@ -120,8 +78,6 @@ swr_get_query_result(struct pipe_context *pipe,<br>
union pipe_query_result *result)<br>
{<br>
struct swr_query *pq = swr_query(q);<br>
- struct swr_query_result *start = &pq->start;<br>
- struct swr_query_result *end = &pq->end;<br>
unsigned index = pq->index;<br>
<br>
if (pq->fence) {<br>
@@ -132,40 +88,37 @@ swr_get_query_result(struct pipe_context *pipe,<br>
swr_fence_reference(pipe->screen, &pq->fence, NULL);<br>
}<br>
<br>
- /* XXX: Need to handle counter rollover */<br>
-<br>
+ /* All values are reset to 0 at swr_begin_query, except starting timestamp.<br>
+ * Counters become simply end values. */<br>
switch (pq->type) {<br>
/* Booleans */<br>
case PIPE_QUERY_OCCLUSION_PREDICATE:<br>
- result->b = end->core.DepthPassCount != start->core.DepthPassCount;<br>
+ result->b = pq->result.core.DepthPassCount != 0;<br>
break;<br>
case PIPE_QUERY_GPU_FINISHED:<br>
result->b = TRUE;<br>
break;<br>
/* Counters */<br>
case PIPE_QUERY_OCCLUSION_COUNTER:<br>
- result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;<br>
+ result->u64 = pq->result.core.DepthPassCount;<br>
break;<br>
case PIPE_QUERY_TIMESTAMP:<br>
case PIPE_QUERY_TIME_ELAPSED:<br>
- result->u64 = end->timestamp - start->timestamp;<br>
+ result->u64 = pq->result.timestamp_end - pq->result.timestamp_start;<br>
break;<br>
case PIPE_QUERY_PRIMITIVES_GENERATED:<br>
- result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;<br>
+ result->u64 = pq->result.coreFE.IaPrimitives;<br>
break;<br>
case PIPE_QUERY_PRIMITIVES_EMITTED:<br>
- result->u64 = end->coreFE.SoNumPrimsWritten[index]<br>
- - start->coreFE.SoNumPrimsWritten[index];<br>
+ result->u64 = pq->result.coreFE.SoNumPrimsWritten[index];<br>
break;<br>
/* Structures */<br>
case PIPE_QUERY_SO_STATISTICS: {<br>
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;<br>
- struct SWR_STATS_FE *start = &pq->start.coreFE;<br>
- struct SWR_STATS_FE *end = &pq->end.coreFE;<br>
so_stats->num_primitives_written =<br>
- end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];<br>
+ pq->result.coreFE.SoNumPrimsWritten[index];<br>
so_stats->primitives_storage_needed =<br>
- end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];<br>
+ pq->result.coreFE.SoPrimStorageNeeded[index];<br>
} break;<br>
case PIPE_QUERY_TIMESTAMP_DISJOINT:<br>
/* os_get_time_nano returns nanoseconds */<br>
@@ -175,29 +128,23 @@ swr_get_query_result(struct pipe_context *pipe,<br>
case PIPE_QUERY_PIPELINE_STATISTICS: {<br>
struct pipe_query_data_pipeline_statistics *p_stats =<br>
&result->pipeline_statistics;<br>
- struct SWR_STATS *start = &pq->start.core;<br>
- struct SWR_STATS *end = &pq->end.core;<br>
- struct SWR_STATS_FE *startFE = &pq->start.coreFE;<br>
- struct SWR_STATS_FE *endFE = &pq->end.coreFE;<br>
- p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;<br>
- p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;<br>
- p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;<br>
- p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;<br>
- p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;<br>
- p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;<br>
- p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;<br>
- p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;<br>
- p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;<br>
- p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;<br>
- p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;<br>
+ p_stats->ia_vertices = pq->result.coreFE.IaVertices;<br>
+ p_stats->ia_primitives = pq->result.coreFE.IaPrimitives;<br>
+ p_stats->vs_invocations = pq->result.coreFE.VsInvocations;<br>
+ p_stats->gs_invocations = pq->result.coreFE.GsInvocations;<br>
+ p_stats->gs_primitives = pq->result.coreFE.GsPrimitives;<br>
+ p_stats->c_invocations = pq->result.coreFE.CPrimitives;<br>
+ p_stats->c_primitives = pq->result.coreFE.CPrimitives;<br>
+ p_stats->ps_invocations = pq->result.core.PsInvocations;<br>
+ p_stats->hs_invocations = pq->result.coreFE.HsInvocations;<br>
+ p_stats->ds_invocations = pq->result.coreFE.DsInvocations;<br>
+ p_stats->cs_invocations = pq->result.core.CsInvocations;<br>
} break;<br>
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {<br>
- struct SWR_STATS_FE *start = &pq->start.coreFE;<br>
- struct SWR_STATS_FE *end = &pq->end.coreFE;<br>
uint64_t num_primitives_written =<br>
- end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];<br>
+ pq->result.coreFE.SoNumPrimsWritten[index];<br>
uint64_t primitives_storage_needed =<br>
- end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];<br>
+ pq->result.coreFE.SoPrimStorageNeeded[index];<br>
result->b = num_primitives_written > primitives_storage_needed;<br>
}<br>
break;<br>
@@ -215,21 +162,27 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)<br>
struct swr_context *ctx = swr_context(pipe);<br>
struct swr_query *pq = swr_query(q);<br>
<br>
- assert(!pq->enable_stats && "swr_begin_query: Query is already active!");<br>
-<br>
/* Initialize Results */<br>
- memset(&pq->start, 0, sizeof(pq->start));<br>
- memset(&pq->end, 0, sizeof(pq->end));<br>
+ memset(&pq->result, 0, sizeof(pq->result));<br>
+ switch (pq->type) {<br>
+ case PIPE_QUERY_TIMESTAMP:<br>
+ /* nothing to do */<br>
+ break;<br>
+ case PIPE_QUERY_TIME_ELAPSED:<br>
+ pq->result.timestamp_start = swr_get_timestamp(pipe->screen);<br>
+ break;<br>
+ default:<br>
+ /* Core counters required. Update draw context with location to<br>
+ * store results. */<br>
+ swr_update_draw_context(ctx, &pq->result);<br>
<br>
- /* Gather start stats and enable SwrCore counters */<br>
- pq->result = &pq->start;<br>
- pq->enable_stats = TRUE;<br>
- swr_gather_stats(pipe, pq);<br>
- ctx->active_queries++;<br>
+ /* Only change stat collection if there are no active queries */<br>
+ if (ctx->active_queries == 0)<br>
+ SwrEnableStats(ctx->swrContext, TRUE);<br>
+ break;<br>
+ }<br>
<br>
- /* override start timestamp to 0 for TIMESTAMP query */<br>
- if (pq->type == PIPE_QUERY_TIMESTAMP)<br>
- pq->start.timestamp = 0;<br>
+ ctx->active_queries++;<br>
<br>
return true;<br>
}<br>
@@ -244,10 +197,27 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q)<br>
&& "swr_end_query, there are no active queries!");<br>
ctx->active_queries--;<br>
<br>
- /* Gather end stats and disable SwrCore counters */<br>
- pq->result = &pq->end;<br>
- pq->enable_stats = FALSE;<br>
- swr_gather_stats(pipe, pq);<br>
+ switch (pq->type) {<br>
+ case PIPE_QUERY_TIMESTAMP:<br>
+ case PIPE_QUERY_TIME_ELAPSED:<br>
+ pq->result.timestamp_end = swr_get_timestamp(pipe->screen);<br>
+ break;<br>
+ default:<br>
+ /* Stats are updated asynchronously, a fence is used to signal<br>
+ * completion. */<br>
+ if (!pq->fence) {<br>
+ struct swr_screen *screen = swr_screen(pipe->screen);<br>
+ swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);<br>
+ }<br>
+ swr_fence_submit(ctx, pq->fence);<br>
+<br>
+ /* Only change stat collection if there are no active queries */<br>
+ if (ctx->active_queries == 0)<br>
+ SwrEnableStats(ctx->swrContext, FALSE);<br>
+<br>
+ break;<br>
+ }<br>
+<br>
return true;<br>
}<br>
<br>
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h<br>
index 931d687..c5160ce 100644<br>
--- a/src/gallium/drivers/swr/swr_query.h<br>
+++ b/src/gallium/drivers/swr/swr_query.h<br>
@@ -30,20 +30,16 @@<br>
struct swr_query_result {<br>
SWR_STATS core;<br>
SWR_STATS_FE coreFE;<br>
- uint64_t timestamp;<br>
+ uint64_t timestamp_start;<br>
+ uint64_t timestamp_end;<br>
};<br>
<br>
struct swr_query {<br>
unsigned type; /* PIPE_QUERY_* */<br>
unsigned index;<br>
<br>
- struct swr_query_result *result;<br>
- struct swr_query_result start;<br>
- struct swr_query_result end;<br>
-<br>
+ struct swr_query_result result;<br>
struct pipe_fence_handle *fence;<br>
-<br>
- boolean enable_stats;<br>
};<br>
<br>
extern void swr_query_init(struct pipe_context *pipe);<br>
-- <br>
2.7.4<o:p></o:p></p>
</div>
</div>
</blockquote>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
</div>
</body>
</html>