[Mesa-dev] [PATCH] swr: Remove stall waiting for core query counters.
BruceCherniak
bruce.cherniak at intel.com
Thu Apr 28 17:13:15 UTC 2016
When gathering query results, swr_gather_stats was
unnecessarily stalling the entire pipeline. Results are now
collected asynchronously, with a fence marking completion.
---
src/gallium/drivers/swr/swr_fence.cpp | 6 -
src/gallium/drivers/swr/swr_fence.h | 8 ++
src/gallium/drivers/swr/swr_query.cpp | 180 ++++++++++++---------------------
src/gallium/drivers/swr/swr_query.h | 11 ++-
4 files changed, 81 insertions(+), 124 deletions(-)
diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp
index 2e95b39..8a8e864 100644
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
swr_fence_destroy(old);
}
-static INLINE boolean
-swr_is_fence_done(struct pipe_fence_handle *fence_handle)
-{
- struct swr_fence *fence = swr_fence(fence_handle);
- return (fence->read == fence->write);
-}
/*
* Wait for the fence to finish.
diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h
index df3776e..47f4d2e 100644
--- a/src/gallium/drivers/swr/swr_fence.h
+++ b/src/gallium/drivers/swr/swr_fence.h
@@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
return (struct swr_fence *)fence;
}
+
+static INLINE boolean
+swr_is_fence_done(struct pipe_fence_handle *fence_handle)
+{
+ struct swr_fence *fence = swr_fence(fence_handle);
+ return (fence->read == fence->write);
+}
+
static INLINE boolean
swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
{
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index f038a6e..5c59965 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
struct swr_query *pq = swr_query(q);
if (pq->fence) {
- if (!swr_is_fence_pending(pq->fence)) {
- swr_fence_submit(swr_context(pipe), pq->fence);
+ if (swr_is_fence_pending(pq->fence))
swr_fence_finish(pipe->screen, pq->fence, 0);
- }
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
@@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
}
-// XXX Create a fence callback, rather than stalling SwrWaitForIdle
static void
swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
{
struct swr_context *ctx = swr_context(pipe);
assert(pq->result);
- union pipe_query_result *result = pq->result;
+ struct swr_query_result *result = pq->result;
boolean enable_stats = pq->enable_stats;
- SWR_STATS swr_stats = {0};
-
- if (pq->fence) {
- if (!swr_is_fence_pending(pq->fence)) {
- swr_fence_submit(ctx, pq->fence);
- swr_fence_finish(pipe->screen, pq->fence, 0);
- }
- swr_fence_reference(pipe->screen, &pq->fence, NULL);
- }
- /*
- * These queries don't need SWR Stats enabled in the core
- * Set and return.
- */
+ /* A few results don't require the core, so don't involve it */
switch (pq->type) {
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
- result->u64 = swr_get_timestamp(pipe->screen);
- return;
+ result->timestamp = swr_get_timestamp(pipe->screen);
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* nothing to do here */
- return;
- break;
case PIPE_QUERY_GPU_FINISHED:
- result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
- vs LastRetiredId? */
- return;
+ /* nothing to do here */
break;
default:
- /* Any query that needs SwrCore stats */
- break;
- }
-
- /*
- * All other results are collected from SwrCore counters
- */
+ /*
+ * All other results are collected from SwrCore counters via
+ * SwrGetStats. This returns immediately, but results are later filled
+ * in by the backend. Fence status is the only indication of
+ * completion. */
+ SwrGetStats(ctx->swrContext, &result->core);
+
+ if (!pq->fence) {
+ struct swr_screen *screen = swr_screen(pipe->screen);
+ swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
+ }
+ swr_fence_submit(ctx, pq->fence);
- /* XXX, Should turn this into a fence callback and skip the stall */
- SwrGetStats(ctx->swrContext, &swr_stats);
- /* SwrGetStats returns immediately, wait for collection */
- SwrWaitForIdle(ctx->swrContext);
+ /* Only change stat collection if there are no active queries */
+ if (ctx->active_queries == 0)
+ SwrEnableStats(ctx->swrContext, enable_stats);
- switch (pq->type) {
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_COUNTER:
- result->u64 = swr_stats.DepthPassCount;
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 = swr_stats.IaPrimitives;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
- break;
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- so_stats->num_primitives_written =
- swr_stats.SoNumPrimsWritten[pq->index];
- so_stats->primitives_storage_needed =
- swr_stats.SoPrimStorageNeeded[pq->index];
- } break;
- case PIPE_QUERY_PIPELINE_STATISTICS: {
- struct pipe_query_data_pipeline_statistics *p_stats =
- &result->pipeline_statistics;
- p_stats->ia_vertices = swr_stats.IaVertices;
- p_stats->ia_primitives = swr_stats.IaPrimitives;
- p_stats->vs_invocations = swr_stats.VsInvocations;
- p_stats->gs_invocations = swr_stats.GsInvocations;
- p_stats->gs_primitives = swr_stats.GsPrimitives;
- p_stats->c_invocations = swr_stats.CPrimitives;
- p_stats->c_primitives = swr_stats.CPrimitives;
- p_stats->ps_invocations = swr_stats.PsInvocations;
- p_stats->hs_invocations = swr_stats.HsInvocations;
- p_stats->ds_invocations = swr_stats.DsInvocations;
- p_stats->cs_invocations = swr_stats.CsInvocations;
- } break;
- default:
- assert(0 && "Unsupported query");
break;
}
-
- /* Only change stat collection if there are no active queries */
- if (ctx->active_queries == 0)
- SwrEnableStats(ctx->swrContext, enable_stats);
}
@@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
boolean wait,
union pipe_query_result *result)
{
- struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
+ struct swr_query_result *start = &pq->start;
+ struct swr_query_result *end = &pq->end;
+ unsigned index = pq->index;
if (pq->fence) {
- if (!swr_is_fence_pending(pq->fence)) {
- swr_fence_submit(ctx, pq->fence);
- if (!wait)
- return FALSE;
- swr_fence_finish(pipe->screen, pq->fence, 0);
- }
+ if (!wait && !swr_is_fence_done(pq->fence))
+ return FALSE;
+
+ swr_fence_finish(pipe->screen, pq->fence, 0);
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
@@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
switch (pq->type) {
/* Booleans */
case PIPE_QUERY_OCCLUSION_PREDICATE:
- result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
+ result->b = end->core.DepthPassCount != start->core.DepthPassCount;
break;
case PIPE_QUERY_GPU_FINISHED:
- result->b = pq->end.b;
+ result->b = TRUE;
break;
/* Counters */
case PIPE_QUERY_OCCLUSION_COUNTER:
+ result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
+ break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
+ result->u64 = end->timestamp - start->timestamp;
+ break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
+ result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = pq->end.u64 - pq->start.u64;
+ result->u64 = end->core.SoNumPrimsWritten[index]
+ - start->core.SoNumPrimsWritten[index];
break;
/* Structures */
case PIPE_QUERY_SO_STATISTICS: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
- struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+ struct SWR_STATS *start = &pq->start.core;
+ struct SWR_STATS *end = &pq->end.core;
so_stats->num_primitives_written =
- end->num_primitives_written - start->num_primitives_written;
+ end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
so_stats->primitives_storage_needed =
- end->primitives_storage_needed - start->primitives_storage_needed;
+ end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
} break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* os_get_time_nano returns nanoseconds */
result->timestamp_disjoint.frequency = UINT64_C(1000000000);
result->timestamp_disjoint.disjoint = FALSE;
- } break;
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
struct pipe_query_data_pipeline_statistics *p_stats =
&result->pipeline_statistics;
- struct pipe_query_data_pipeline_statistics *start =
- &pq->start.pipeline_statistics;
- struct pipe_query_data_pipeline_statistics *end =
- &pq->end.pipeline_statistics;
- p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
- p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
- p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
- p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
- p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
- p_stats->c_invocations = end->c_invocations - start->c_invocations;
- p_stats->c_primitives = end->c_primitives - start->c_primitives;
- p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
- p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
- p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
- p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
- } break;
+ struct SWR_STATS *start = &pq->start.core;
+ struct SWR_STATS *end = &pq->end.core;
+ p_stats->ia_vertices = end->IaVertices - start->IaVertices;
+ p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
+ p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
+ p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
+ p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
+ p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
+ p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
+ p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
+ p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
+ p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
+ p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
+ } break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
- struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+ struct SWR_STATS *start = &pq->start.core;
+ struct SWR_STATS *end = &pq->end.core;
uint64_t num_primitives_written =
- end->num_primitives_written - start->num_primitives_written;
+ end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
uint64_t primitives_storage_needed =
- end->primitives_storage_needed - start->primitives_storage_needed;
+ end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
result->b = num_primitives_written > primitives_storage_needed;
- } break;
+ }
+ break;
default:
assert(0 && "Unsupported query");
break;
@@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
+ assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
+
/* Initialize Results */
memset(&pq->start, 0, sizeof(pq->start));
memset(&pq->end, 0, sizeof(pq->end));
@@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
/* override start timestamp to 0 for TIMESTAMP query */
if (pq->type == PIPE_QUERY_TIMESTAMP)
- pq->start.u64 = 0;
+ pq->start.timestamp = 0;
return true;
}
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index 836d07b..0ab034d 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -27,13 +27,18 @@
#include <limits.h>
+struct swr_query_result {
+ SWR_STATS core;
+ uint64_t timestamp;
+};
+
struct swr_query {
unsigned type; /* PIPE_QUERY_* */
unsigned index;
- union pipe_query_result *result;
- union pipe_query_result start;
- union pipe_query_result end;
+ struct swr_query_result *result;
+ struct swr_query_result start;
+ struct swr_query_result end;
struct pipe_fence_handle *fence;
--
1.7.1
More information about the mesa-dev
mailing list