[Mesa-dev] [PATCH 6/9] i965: Use 'available' fence for polling query results

Sat Oct 6 02:02:07 UTC 2018

On 10/02/2018 11:06 AM, Chris Wilson wrote:
> If we always write the 'available' flag after writing the final result
> of the query, we can probe that predicate to quickly query whether the
> result is ready from userspace. The primary advantage of checking the
> predicate is that it allows for more fine-grained queries, we do not
> have to wait for the batch to finish before the query is marked as
> ready.
> 
> We still do check the status of the batch after probing the query so
> that if the worst happens and the batch did hang without completing the
> query, we do not spin forever (although it is not as nice as completely
> eliminating the ioctl, the busy-ioctl is lightweight!).
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Matt Turner <mattst88 at gmail.com>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  4 +-
>  src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 ++++++++++-------------
>  2 files changed, 25 insertions(+), 33 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 840332294e6..418941c9194 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -468,8 +468,8 @@ struct brw_query_object {
>     bool flushed;
>  };
>  
> -#define GEN6_QUERY_PREDICATE (2)
> -#define GEN6_QUERY_RESULTS (0)
> +#define GEN6_QUERY_PREDICATE (0)
> +#define GEN6_QUERY_RESULTS (1)
>  
>  static inline unsigned
>  gen6_query_predicate_offset(const struct brw_query_object *query)
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index dc70e2a568a..b6832588333 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -40,8 +40,7 @@
>  #include "intel_buffer_objects.h"
>  
>  static inline void
> -set_query_availability(struct brw_context *brw, struct brw_query_object *query,
> -                       bool available)
> +set_query_available(struct brw_context *brw, struct brw_query_object *query)
>  {
>     /* For platforms that support ARB_query_buffer_object, we write the
>      * query availability for "pipelined" queries.
> @@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query,
>      * PIPE_CONTROL with an immediate write will synchronize with
>      * those earlier writes, so we write 1 when the value has landed.
>      */
> -   if (brw->ctx.Extensions.ARB_query_buffer_object &&
> -       brw_is_query_pipelined(query)) {
> -      unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
>  
> -      if (available) {
> -         /* Order available *after* the query results. */
> -         flags |= PIPE_CONTROL_FLUSH_ENABLE;
> -      } else {
> -         /* Make it unavailable *before* any pipelined reads. */
> -         flags |= PIPE_CONTROL_CS_STALL;
> -      }
> -
> -      brw_emit_pipe_control_write(brw, flags,
> -                                  query->bo, gen6_query_predicate_offset(query),
> -                                  available);
> -   }
> +   brw_emit_pipe_control_write(brw,
> +                               PIPE_CONTROL_WRITE_IMMEDIATE |
> +                               PIPE_CONTROL_FLUSH_ENABLE,
> +                               query->bo, gen6_query_predicate_offset(query),
> +                               true);
>  }
>  
>  static void
> @@ -144,12 +133,12 @@ write_xfb_overflow_streams(struct gl_context *ctx,
>  }
>  
>  static bool
> -check_xfb_overflow_streams(uint64_t *results, int count)
> +check_xfb_overflow_streams(const uint64_t *results, int count)
>  {
>     bool overflow = false;
>  
>     for (int i = 0; i < count; i++) {
> -      uint64_t *result_i = &results[4 * i];
> +      const uint64_t *result_i = &results[4 * i];
>  
>        if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
>           overflow = true;

This hunk seems unrelated, but I'm almost always in favor of more const.

> @@ -221,7 +210,8 @@ emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo,
>   */
>  static void
>  gen6_queryobj_get_results(struct gl_context *ctx,
> -                          struct brw_query_object *query)
> +                          struct brw_query_object *query,
> +                          uint64_t *results)
>  {
>     struct brw_context *brw = brw_context(ctx);
>     const struct gen_device_info *devinfo = &brw->screen->devinfo;
> @@ -229,9 +219,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
>     if (query->bo == NULL)
>        return;
>  
> -   brw_bo_wait_rendering(query->bo);
> -   uint64_t *results = query->results;
> -
>     switch (query->Base.Target) {
>     case GL_TIME_ELAPSED:
>        /* The query BO contains the starting and ending timestamps.
> @@ -329,10 +316,10 @@ gen6_alloc_query(struct brw_context *brw, struct brw_query_object *query)
>  
>     query->results = brw_bo_map(brw, query->bo,
>                                 MAP_COHERENT | MAP_PERSISTENT |
> -                               MAP_READ | MAP_ASYNC);
> +                               MAP_READ | MAP_WRITE);
>  
>     /* For ARB_query_buffer_object: The result is not available */
> -   set_query_availability(brw, query, false);
> +   query->results[GEN6_QUERY_PREDICATE] = false;
>  
>     return 0;
>  }
> @@ -487,7 +474,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
>     query->flushed = false;
>  
>     /* For ARB_query_buffer_object: The result is now available */
> -   set_query_availability(brw, query, true);
> +   set_query_available(brw, query);
>  }
>  
>  /**
> @@ -523,7 +510,11 @@ static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
>      */
>     flush_batch_if_needed(brw, query);
>  
> -   gen6_queryobj_get_results(ctx, query);
> +   uint64_t *results = query->results;
> +   if (!results[GEN6_QUERY_PREDICATE]) /* not yet available, must wait */
> +      brw_bo_wait_rendering(query->bo);
> +
> +   gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
>  }
>  
>  /**
> @@ -552,9 +543,10 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
>      */
>     flush_batch_if_needed(brw, query);
>  
> -   if (!brw_bo_busy(query->bo)) {
> -      gen6_queryobj_get_results(ctx, query);
> -   }
> +   uint64_t *results = query->results;
> +   if (results[GEN6_QUERY_PREDICATE] || /* already available, can read async */
> +       !brw_bo_map_busy(query->bo, MAP_READ)) /* GPU hang, results lost? */
> +      gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
>  }
>  
>  static void
> @@ -565,7 +557,7 @@ gen6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
>     const int idx = gen6_alloc_query(brw, query) + GEN6_QUERY_RESULTS;
>  
>     brw_write_timestamp(brw, query->bo, idx);
> -   set_query_availability(brw, query, true);
> +   set_query_available(brw, query);
>  
>     query->flushed = false;
>  }
>