[Mesa-dev] [PATCH 8/9] i965: Use 'available' fence for polling query results

Fri Jun 9 13:01:39 UTC 2017

If we always write the 'available' flag after writing the final result
of the query, we can probe that predicate to quickly query whether the
result is ready from userspace. The primary advantage of checking the
predicate is that it allows for more fine-grained queries, we do not
have to wait for the batch to finish before the query is marked as
ready.

We still do check the status of the batch after probing the query so
that if the worst happens and the batch did hang without completing the
query, we do not spin forever (although it is not as nice as completely
eliminating the ioctl, the busy-ioctl is lightweight!).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_context.h   |  4 +--
 src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 +++++++++++++------------------
 2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 117b1ecdca..44e0d31c6d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -428,8 +428,8 @@ struct brw_query_object {
    bool flushed;
 };
 
-#define GEN6_QUERY_PREDICATE (2)
-#define GEN6_QUERY_RESULTS (0)
+#define GEN6_QUERY_PREDICATE (0)
+#define GEN6_QUERY_RESULTS (1)
 
 static inline unsigned gen6_query_predicate_offset(const struct brw_query_object *query)
 {
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index 5c95a4bae9..ae7fd06c1c 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -40,8 +40,7 @@
 #include "intel_buffer_objects.h"
 
 static inline void
-set_query_availability(struct brw_context *brw, struct brw_query_object *query,
-                       bool available)
+set_query_available(struct brw_context *brw, struct brw_query_object *query)
 {
    /* For platforms that support ARB_query_buffer_object, we write the
     * query availability for "pipelined" queries.
@@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query,
     * PIPE_CONTROL with an immediate write will synchronize with
     * those earlier writes, so we write 1 when the value has landed.
     */
-   if (brw->ctx.Extensions.ARB_query_buffer_object &&
-       brw_is_query_pipelined(query)) {
-      unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 
-      if (available) {
-         /* Order available *after* the query results. */
-         flags |= PIPE_CONTROL_FLUSH_ENABLE;
-      } else {
-         /* Make it unavailable *before* any pipelined reads. */
-         flags |= PIPE_CONTROL_CS_STALL;
-      }
-
-      brw_emit_pipe_control_write(brw, flags,
-                                  query->bo, gen6_query_predicate_offset(query),
-                                  available, 0);
-   }
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_IMMEDIATE |
+                               PIPE_CONTROL_FLUSH_ENABLE,
+                               query->bo, gen6_query_predicate_offset(query),
+                               true, 0);
 }
 
 static void
@@ -139,12 +128,12 @@ write_xfb_overflow_streams(struct gl_context *ctx,
 }
 
 static bool
-check_xfb_overflow_streams(uint64_t *results, int count)
+check_xfb_overflow_streams(const uint64_t *results, int count)
 {
    bool overflow = false;
 
    for (int i = 0; i < count; i++) {
-      uint64_t *result_i = &results[4 * i];
+      const uint64_t *result_i = &results[4 * i];
 
       if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
          overflow = true;
@@ -214,16 +203,14 @@ emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo,
  */
 static void
 gen6_queryobj_get_results(struct gl_context *ctx,
-                          struct brw_query_object *query)
+                          struct brw_query_object *query,
+                          const uint64_t *results)
 {
    struct brw_context *brw = brw_context(ctx);
 
    if (query->bo == NULL)
       return;
 
-   brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT);
-   uint64_t *results = query->results;
-
    switch (query->Base.Target) {
    case GL_TIME_ELAPSED:
       /* The query BO contains the starting and ending timestamps.
@@ -319,10 +306,10 @@ static int gen6_alloc_query(struct brw_context *brw,
    brw_bo_set_cache_coherent(query->bo);
 
    query->results = brw_bo_map(brw, query->bo,
-                               MAP_READ | MAP_COHERENT | MAP_ASYNC);
+                               MAP_READ | MAP_WRITE | MAP_COHERENT | MAP_ASYNC);
 
    /* For ARB_query_buffer_object: The result is not available */
-   set_query_availability(brw, query, false);
+   query->results[GEN6_QUERY_PREDICATE] = false;
 
    return 0;
 }
@@ -477,7 +464,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
    query->flushed = false;
 
    /* For ARB_query_buffer_object: The result is now available */
-   set_query_availability(brw, query, true);
+   set_query_available(brw, query);
 }
 
 /**
@@ -513,7 +500,11 @@ static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
     */
    flush_batch_if_needed(brw, query);
 
-   gen6_queryobj_get_results(ctx, query);
+   uint64_t *results = query->results;
+   if (!results[GEN6_QUERY_PREDICATE]) /* not yet available, must wait */
+       brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT);
+
+   gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
 }
 
 /**
@@ -542,9 +533,10 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
     */
    flush_batch_if_needed(brw, query);
 
-   if (!brw_bo_busy(query->bo)) {
-      gen6_queryobj_get_results(ctx, query);
-   }
+   uint64_t *results = query->results;
+   if (results[GEN6_QUERY_PREDICATE] || /* already available, can read async */
+       !brw_bo_write_busy(query->bo)) /* GPU hang, results incomplete? */
+      gen6_queryobj_get_results(ctx, query, results + GEN6_QUERY_RESULTS);
 }
 
 static void
@@ -555,7 +547,7 @@ gen6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
    const int idx = gen6_alloc_query(brw, query) + GEN6_QUERY_RESULTS;
 
    brw_write_timestamp(brw, query->bo, idx);
-   set_query_availability(brw, query, true);
+   set_query_available(brw, query);
 
    query->flushed = false;
 }
-- 
2.11.0