[Mesa-dev] [PATCH] swr: Remove stall waiting for core query counters.
Kyriazis, George
george.kyriazis at intel.com
Wed May 4 15:47:07 UTC 2016
Reviewed-By: George Kyriazis <george.kyriazis at intel.com>
> -----Original Message-----
> From: mesa-dev [mailto:mesa-dev-bounces at lists.freedesktop.org] On
> Behalf Of BruceCherniak
> Sent: Thursday, April 28, 2016 12:13 PM
> To: mesa-dev at lists.freedesktop.org
> Subject: [Mesa-dev] [PATCH] swr: Remove stall waiting for core query
> counters.
>
> When gathering query results, swr_gather_stats was
> unnecessarily stalling the entire pipeline. Results are now
> collected asynchronously, with a fence marking completion.
> ---
> src/gallium/drivers/swr/swr_fence.cpp | 6 -
> src/gallium/drivers/swr/swr_fence.h | 8 ++
> src/gallium/drivers/swr/swr_query.cpp | 180 ++++++++++++------------------
> ---
> src/gallium/drivers/swr/swr_query.h | 11 ++-
> 4 files changed, 81 insertions(+), 124 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/swr_fence.cpp
> b/src/gallium/drivers/swr/swr_fence.cpp
> index 2e95b39..8a8e864 100644
> --- a/src/gallium/drivers/swr/swr_fence.cpp
> +++ b/src/gallium/drivers/swr/swr_fence.cpp
> @@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
> swr_fence_destroy(old);
> }
>
> -static INLINE boolean
> -swr_is_fence_done(struct pipe_fence_handle *fence_handle)
> -{
> - struct swr_fence *fence = swr_fence(fence_handle);
> - return (fence->read == fence->write);
> -}
>
> /*
> * Wait for the fence to finish.
> diff --git a/src/gallium/drivers/swr/swr_fence.h
> b/src/gallium/drivers/swr/swr_fence.h
> index df3776e..47f4d2e 100644
> --- a/src/gallium/drivers/swr/swr_fence.h
> +++ b/src/gallium/drivers/swr/swr_fence.h
> @@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
> return (struct swr_fence *)fence;
> }
>
> +
> +static INLINE boolean
> +swr_is_fence_done(struct pipe_fence_handle *fence_handle)
> +{
> + struct swr_fence *fence = swr_fence(fence_handle);
> + return (fence->read == fence->write);
> +}
> +
> static INLINE boolean
> swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
> {
> diff --git a/src/gallium/drivers/swr/swr_query.cpp
> b/src/gallium/drivers/swr/swr_query.cpp
> index f038a6e..5c59965 100644
> --- a/src/gallium/drivers/swr/swr_query.cpp
> +++ b/src/gallium/drivers/swr/swr_query.cpp
> @@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct
> pipe_query *q)
> struct swr_query *pq = swr_query(q);
>
> if (pq->fence) {
> - if (!swr_is_fence_pending(pq->fence)) {
> - swr_fence_submit(swr_context(pipe), pq->fence);
> + if (swr_is_fence_pending(pq->fence))
> swr_fence_finish(pipe->screen, pq->fence, 0);
> - }
> swr_fence_reference(pipe->screen, &pq->fence, NULL);
> }
>
> @@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe,
> struct pipe_query *q)
> }
>
>
> -// XXX Create a fence callback, rather than stalling SwrWaitForIdle
> static void
> swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
> {
> struct swr_context *ctx = swr_context(pipe);
>
> assert(pq->result);
> - union pipe_query_result *result = pq->result;
> + struct swr_query_result *result = pq->result;
> boolean enable_stats = pq->enable_stats;
> - SWR_STATS swr_stats = {0};
> -
> - if (pq->fence) {
> - if (!swr_is_fence_pending(pq->fence)) {
> - swr_fence_submit(ctx, pq->fence);
> - swr_fence_finish(pipe->screen, pq->fence, 0);
> - }
> - swr_fence_reference(pipe->screen, &pq->fence, NULL);
> - }
>
> - /*
> - * These queries don't need SWR Stats enabled in the core
> - * Set and return.
> - */
> + /* A few results don't require the core, so don't involve it */
> switch (pq->type) {
> case PIPE_QUERY_TIMESTAMP:
> case PIPE_QUERY_TIME_ELAPSED:
> - result->u64 = swr_get_timestamp(pipe->screen);
> - return;
> + result->timestamp = swr_get_timestamp(pipe->screen);
> break;
> case PIPE_QUERY_TIMESTAMP_DISJOINT:
> - /* nothing to do here */
> - return;
> - break;
> case PIPE_QUERY_GPU_FINISHED:
> - result->b = TRUE; /* XXX TODO Add an api func to SWR to compare
> drawId
> - vs LastRetiredId? */
> - return;
> + /* nothing to do here */
> break;
> default:
> - /* Any query that needs SwrCore stats */
> - break;
> - }
> -
> - /*
> - * All other results are collected from SwrCore counters
> - */
> + /*
> + * All other results are collected from SwrCore counters via
> + * SwrGetStats. This returns immediately, but results are later filled
> + * in by the backend. Fence status is the only indication of
> + * completion. */
> + SwrGetStats(ctx->swrContext, &result->core);
> +
> + if (!pq->fence) {
> + struct swr_screen *screen = swr_screen(pipe->screen);
> + swr_fence_reference(pipe->screen, &pq->fence, screen-
> >flush_fence);
> + }
> + swr_fence_submit(ctx, pq->fence);
>
> - /* XXX, Should turn this into a fence callback and skip the stall */
> - SwrGetStats(ctx->swrContext, &swr_stats);
> - /* SwrGetStats returns immediately, wait for collection */
> - SwrWaitForIdle(ctx->swrContext);
> + /* Only change stat collection if there are no active queries */
> + if (ctx->active_queries == 0)
> + SwrEnableStats(ctx->swrContext, enable_stats);
>
> - switch (pq->type) {
> - case PIPE_QUERY_OCCLUSION_PREDICATE:
> - case PIPE_QUERY_OCCLUSION_COUNTER:
> - result->u64 = swr_stats.DepthPassCount;
> - break;
> - case PIPE_QUERY_PRIMITIVES_GENERATED:
> - result->u64 = swr_stats.IaPrimitives;
> - break;
> - case PIPE_QUERY_PRIMITIVES_EMITTED:
> - result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
> - break;
> - case PIPE_QUERY_SO_STATISTICS:
> - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
> - struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
> - so_stats->num_primitives_written =
> - swr_stats.SoNumPrimsWritten[pq->index];
> - so_stats->primitives_storage_needed =
> - swr_stats.SoPrimStorageNeeded[pq->index];
> - } break;
> - case PIPE_QUERY_PIPELINE_STATISTICS: {
> - struct pipe_query_data_pipeline_statistics *p_stats =
> - &result->pipeline_statistics;
> - p_stats->ia_vertices = swr_stats.IaVertices;
> - p_stats->ia_primitives = swr_stats.IaPrimitives;
> - p_stats->vs_invocations = swr_stats.VsInvocations;
> - p_stats->gs_invocations = swr_stats.GsInvocations;
> - p_stats->gs_primitives = swr_stats.GsPrimitives;
> - p_stats->c_invocations = swr_stats.CPrimitives;
> - p_stats->c_primitives = swr_stats.CPrimitives;
> - p_stats->ps_invocations = swr_stats.PsInvocations;
> - p_stats->hs_invocations = swr_stats.HsInvocations;
> - p_stats->ds_invocations = swr_stats.DsInvocations;
> - p_stats->cs_invocations = swr_stats.CsInvocations;
> - } break;
> - default:
> - assert(0 && "Unsupported query");
> break;
> }
> -
> - /* Only change stat collection if there are no active queries */
> - if (ctx->active_queries == 0)
> - SwrEnableStats(ctx->swrContext, enable_stats);
> }
>
>
> @@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
> boolean wait,
> union pipe_query_result *result)
> {
> - struct swr_context *ctx = swr_context(pipe);
> struct swr_query *pq = swr_query(q);
> + struct swr_query_result *start = &pq->start;
> + struct swr_query_result *end = &pq->end;
> + unsigned index = pq->index;
>
> if (pq->fence) {
> - if (!swr_is_fence_pending(pq->fence)) {
> - swr_fence_submit(ctx, pq->fence);
> - if (!wait)
> - return FALSE;
> - swr_fence_finish(pipe->screen, pq->fence, 0);
> - }
> + if (!wait && !swr_is_fence_done(pq->fence))
> + return FALSE;
> +
> + swr_fence_finish(pipe->screen, pq->fence, 0);
> swr_fence_reference(pipe->screen, &pq->fence, NULL);
> }
>
> @@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
> switch (pq->type) {
> /* Booleans */
> case PIPE_QUERY_OCCLUSION_PREDICATE:
> - result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
> + result->b = end->core.DepthPassCount != start->core.DepthPassCount;
> break;
> case PIPE_QUERY_GPU_FINISHED:
> - result->b = pq->end.b;
> + result->b = TRUE;
> break;
> /* Counters */
> case PIPE_QUERY_OCCLUSION_COUNTER:
> + result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
> + break;
> case PIPE_QUERY_TIMESTAMP:
> case PIPE_QUERY_TIME_ELAPSED:
> + result->u64 = end->timestamp - start->timestamp;
> + break;
> case PIPE_QUERY_PRIMITIVES_GENERATED:
> + result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
> case PIPE_QUERY_PRIMITIVES_EMITTED:
> - result->u64 = pq->end.u64 - pq->start.u64;
> + result->u64 = end->core.SoNumPrimsWritten[index]
> + - start->core.SoNumPrimsWritten[index];
> break;
> /* Structures */
> case PIPE_QUERY_SO_STATISTICS: {
> struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
> - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
> - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
> + struct SWR_STATS *start = &pq->start.core;
> + struct SWR_STATS *end = &pq->end.core;
> so_stats->num_primitives_written =
> - end->num_primitives_written - start->num_primitives_written;
> + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
> so_stats->primitives_storage_needed =
> - end->primitives_storage_needed - start->primitives_storage_needed;
> + end->SoPrimStorageNeeded[index] - start-
> >SoPrimStorageNeeded[index];
> } break;
> - case PIPE_QUERY_TIMESTAMP_DISJOINT: {
> + case PIPE_QUERY_TIMESTAMP_DISJOINT:
> /* os_get_time_nano returns nanoseconds */
> result->timestamp_disjoint.frequency = UINT64_C(1000000000);
> result->timestamp_disjoint.disjoint = FALSE;
> - } break;
> + break;
> case PIPE_QUERY_PIPELINE_STATISTICS: {
> struct pipe_query_data_pipeline_statistics *p_stats =
> &result->pipeline_statistics;
> - struct pipe_query_data_pipeline_statistics *start =
> - &pq->start.pipeline_statistics;
> - struct pipe_query_data_pipeline_statistics *end =
> - &pq->end.pipeline_statistics;
> - p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
> - p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
> - p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
> - p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
> - p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
> - p_stats->c_invocations = end->c_invocations - start->c_invocations;
> - p_stats->c_primitives = end->c_primitives - start->c_primitives;
> - p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
> - p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
> - p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
> - p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
> - } break;
> + struct SWR_STATS *start = &pq->start.core;
> + struct SWR_STATS *end = &pq->end.core;
> + p_stats->ia_vertices = end->IaVertices - start->IaVertices;
> + p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
> + p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
> + p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
> + p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
> + p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
> + p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
> + p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
> + p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
> + p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
> + p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
> + } break;
> case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
> - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
> - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
> + struct SWR_STATS *start = &pq->start.core;
> + struct SWR_STATS *end = &pq->end.core;
> uint64_t num_primitives_written =
> - end->num_primitives_written - start->num_primitives_written;
> + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
> uint64_t primitives_storage_needed =
> - end->primitives_storage_needed - start->primitives_storage_needed;
> + end->SoPrimStorageNeeded[index] - start-
> >SoPrimStorageNeeded[index];
> result->b = num_primitives_written > primitives_storage_needed;
> - } break;
> + }
> + break;
> default:
> assert(0 && "Unsupported query");
> break;
> @@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct
> pipe_query *q)
> struct swr_context *ctx = swr_context(pipe);
> struct swr_query *pq = swr_query(q);
>
> + assert(!pq->enable_stats && "swr_begin_query: Query is already
> active!");
> +
> /* Initialize Results */
> memset(&pq->start, 0, sizeof(pq->start));
> memset(&pq->end, 0, sizeof(pq->end));
> @@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct
> pipe_query *q)
>
> /* override start timestamp to 0 for TIMESTAMP query */
> if (pq->type == PIPE_QUERY_TIMESTAMP)
> - pq->start.u64 = 0;
> + pq->start.timestamp = 0;
>
> return true;
> }
> diff --git a/src/gallium/drivers/swr/swr_query.h
> b/src/gallium/drivers/swr/swr_query.h
> index 836d07b..0ab034d 100644
> --- a/src/gallium/drivers/swr/swr_query.h
> +++ b/src/gallium/drivers/swr/swr_query.h
> @@ -27,13 +27,18 @@
>
> #include <limits.h>
>
> +struct swr_query_result {
> + SWR_STATS core;
> + uint64_t timestamp;
> +};
> +
> struct swr_query {
> unsigned type; /* PIPE_QUERY_* */
> unsigned index;
>
> - union pipe_query_result *result;
> - union pipe_query_result start;
> - union pipe_query_result end;
> + struct swr_query_result *result;
> + struct swr_query_result start;
> + struct swr_query_result end;
>
> struct pipe_fence_handle *fence;
>
> --
> 1.7.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list