[Mesa-dev] [PATCH 6/9] i965: Use 'available' fence for polling query results
Ian Romanick
idr at freedesktop.org
Sat Oct 6 02:02:07 UTC 2018
On 10/02/2018 11:06 AM, Chris Wilson wrote:
> If we always write the 'available' flag after writing the final result
> of the query, we can probe that predicate to quickly query whether the
> result is ready from userspace. The primary advantage of checking the
> predicate is that it allows for more fine-grained queries, we do not
> have to wait for the batch to finish before the query is marked as
> ready.
>
> We still do check the status of the batch after probing the query so
> that if the worst happens and the batch did hang without completing the
> query, we do not spin forever (although it is not as nice as completely
> eliminating the ioctl, the busy-ioctl is lightweight!).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 4 +-
> src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 ++++++++++-------------
> 2 files changed, 25 insertions(+), 33 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 840332294e6..418941c9194 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -468,8 +468,8 @@ struct brw_query_object {
> bool flushed;
> };
>
> -#define GEN6_QUERY_PREDICATE (2)
> -#define GEN6_QUERY_RESULTS (0)
> +#define GEN6_QUERY_PREDICATE (0)
> +#define GEN6_QUERY_RESULTS (1)
>
> static inline unsigned
> gen6_query_predicate_offset(const struct brw_query_object *query)
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index dc70e2a568a..b6832588333 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -40,8 +40,7 @@
> #include "intel_buffer_objects.h"
>
> static inline void
> -set_query_availability(struct brw_context *brw, struct brw_query_object *query,
> - bool available)
> +set_query_available(struct brw_context *brw, struct brw_query_object *query)
> {
> /* For platforms that support ARB_query_buffer_object, we write the
> * query availability for "pipelined" queries.
> @@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query,
> * PIPE_CONTROL with an immediate write will synchronize with
> * those earlier writes, so we write 1 when the value has landed.
> */
> - if (brw->ctx.Extensions.ARB_query_buffer_object &&
> - brw_is_query_pipelined(query)) {
> - unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
>
> - if (available) {
> - /* Order available *after* the query results. */
> - flags |= PIPE_CONTROL_FLUSH_ENABLE;
> - } else {
> - /* Make it unavailable *before* any pipelined reads. */
> - flags |= PIPE_CONTROL_CS_STALL;
> - }
> -
> - brw_emit_pipe_control_write(brw, flags,
> - query->bo, gen6_query_predicate_offset(query),
> - available);
> - }
> + brw_emit_pipe_control_write(brw,
> + PIPE_CONTROL_WRITE_IMMEDIATE |
> + PIPE_CONTROL_FLUSH_ENABLE,
> + query->bo, gen6_query_predicate_offset(query),
> + true);
> }
>
> static void
> @@ -144,12 +133,12 @@ write_xfb_overflow_streams(struct gl_context *ctx,
> }
>
> static bool
> -check_xfb_overflow_streams(uint64_t *results, int count)
> +check_xfb_overflow_streams(const uint64_t *results, int count)
> {
> bool overflow = false;
>
> for (int i = 0; i < count; i++) {
> - uint64_t *result_i = &results[4 * i];
> + const uint64_t *result_i = &results[4 * i];
>
> if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
> overflow = true;
This hunk seems unrelated, but I'm almost always in favor of more const.
> @@ -221,7 +210,8 @@ emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo,
> */
> static void
> gen6_queryobj_get_results(struct gl_context *ctx,
> - struct brw_query_object *query)
> + struct brw_query_object *query,
> + uint64_t *results)
> {
> struct brw_context *brw = brw_context(ctx);
> const struct gen_device_info *devinfo = &brw->screen->devinfo;
> @@ -229,9 +219,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> if (query->bo == NULL)
> return;
>
> - brw_bo_wait_rendering(query->bo);
> - uint64_t *results = query->results;
> -
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED:
> /* The query BO contains the starting and ending timestamps.
> @@ -329,10 +316,10 @@ gen6_alloc_query(struct brw_context *brw, struct brw_query_object *query)
>
> query->results = brw_bo_map(brw, query->bo,
> MAP_COHERENT | MAP_PERSISTENT |
> - MAP_READ | MAP_ASYNC);
> + MAP_READ | MAP_WRITE);
>
> /* For ARB_query_buffer_object: The result is not available */
> - set_query_availability(brw, query, false);
> + query->results[GEN6_QUERY_PREDICATE] = false;
>
> return 0;
> }
> @@ -487,7 +474,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
> query->flushed = false;
>
> /* For ARB_query_buffer_object: The result is now available */
> - set_query_availability(brw, query, true);
> + set_query_available(brw, query);
> }
>
> /**
> @@ -523,7 +510,11 @@ static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
> */
> flush_batch_if_needed(brw, query);
>
> - gen6_queryobj_get_results(ctx, query);
> + uint64_t *results = query->results;
> + if (!results[GEN6_QUERY_PREDICATE]) /* not yet available, must wait */
> + brw_bo_wait_rendering(query->bo);
> +
> + gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
> }
>
> /**
> @@ -552,9 +543,10 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
> */
> flush_batch_if_needed(brw, query);
>
> - if (!brw_bo_busy(query->bo)) {
> - gen6_queryobj_get_results(ctx, query);
> - }
> + uint64_t *results = query->results;
> + if (results[GEN6_QUERY_PREDICATE] || /* already available, can read async */
> + !brw_bo_map_busy(query->bo, MAP_READ)) /* GPU hang, results lost? */
> + gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
> }
>
> static void
> @@ -565,7 +557,7 @@ gen6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
> const int idx = gen6_alloc_query(brw, query) + GEN6_QUERY_RESULTS;
>
> brw_write_timestamp(brw, query->bo, idx);
> - set_query_availability(brw, query, true);
> + set_query_available(brw, query);
>
> query->flushed = false;
> }
>
More information about the mesa-dev
mailing list