[Mesa-dev] [PATCH v2 11/26] gallium/u_threaded: avoid syncs for get_query_result

Mon Nov 6 10:23:42 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Queries should still get marked as flushed when flushes are executed
asynchronously in the driver thread.

To this end, the management of the unflushed_queries list is moved into
the driver thread.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
---
 src/gallium/auxiliary/util/u_threaded_context.c | 65 ++++++++++++++++++-------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 0bb645e8522..4908ea8a7ba 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -321,91 +321,107 @@ tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
 {
    struct threaded_context *tc = threaded_context(_pipe);
    struct pipe_context *pipe = tc->pipe;
 
    return pipe->create_batch_query(pipe, num_queries, query_types);
 }
 
 static void
 tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
 {
+   struct threaded_query *tq = threaded_query(payload->query);
+
+   if (tq->head_unflushed.next)
+      LIST_DEL(&tq->head_unflushed);
+
    pipe->destroy_query(pipe, payload->query);
 }
 
 static void
 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
 {
    struct threaded_context *tc = threaded_context(_pipe);
-   struct threaded_query *tq = threaded_query(query);
-
-   if (tq->head_unflushed.next)
-      LIST_DEL(&tq->head_unflushed);
 
    tc_add_small_call(tc, TC_CALL_destroy_query)->query = query;
 }
 
 static void
 tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
 {
    pipe->begin_query(pipe, payload->query);
 }
 
 static boolean
 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
 {
    struct threaded_context *tc = threaded_context(_pipe);
    union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query);
 
    payload->query = query;
    return true; /* we don't care about the return value for this call */
 }
 
+struct tc_end_query_payload {
+   struct threaded_context *tc;
+   struct pipe_query *query;
+};
+
 static void
 tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
 {
-   pipe->end_query(pipe, payload->query);
+   struct tc_end_query_payload *p = (struct tc_end_query_payload *)payload;
+   struct threaded_query *tq = threaded_query(p->query);
+
+   if (!tq->head_unflushed.next)
+      LIST_ADD(&tq->head_unflushed, &p->tc->unflushed_queries);
+
+   pipe->end_query(pipe, p->query);
 }
 
 static bool
 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
 {
    struct threaded_context *tc = threaded_context(_pipe);
    struct threaded_query *tq = threaded_query(query);
-   union tc_payload *payload = tc_add_small_call(tc, TC_CALL_end_query);
+   struct tc_end_query_payload *payload =
+      tc_add_struct_typed_call(tc, TC_CALL_end_query, tc_end_query_payload);
+
+   tc_add_small_call(tc, TC_CALL_end_query);
 
+   payload->tc = tc;
    payload->query = query;
 
    tq->flushed = false;
-   if (!tq->head_unflushed.next)
-      LIST_ADD(&tq->head_unflushed, &tc->unflushed_queries);
 
    return true; /* we don't care about the return value for this call */
 }
 
 static boolean
 tc_get_query_result(struct pipe_context *_pipe,
                     struct pipe_query *query, boolean wait,
                     union pipe_query_result *result)
 {
    struct threaded_context *tc = threaded_context(_pipe);
    struct threaded_query *tq = threaded_query(query);
    struct pipe_context *pipe = tc->pipe;
 
    if (!tq->flushed)
       tc_sync_msg(tc, wait ? "wait" : "nowait");
 
    bool success = pipe->get_query_result(pipe, query, wait, result);
 
    if (success) {
       tq->flushed = true;
-      if (tq->head_unflushed.next)
+      if (tq->head_unflushed.next) {
+         /* This is safe because it can only happen after we sync'd. */
          LIST_DEL(&tq->head_unflushed);
+      }
    }
    return success;
 }
 
 struct tc_query_result_resource {
    struct pipe_query *query;
    boolean wait;
    enum pipe_query_value_type result_type;
    int index;
    struct pipe_resource *resource;
@@ -1806,42 +1822,60 @@ tc_create_video_buffer(struct pipe_context *_pipe,
    unreachable("Threaded context should not be enabled for video APIs");
    return NULL;
 }
 
 
 /********************************************************************
  * draw, launch, clear, blit, copy, flush
  */
 
 struct tc_flush_payload {
+   struct threaded_context *tc;
    struct pipe_fence_handle *fence;
    unsigned flags;
 };
 
 static void
+tc_flush_queries(struct threaded_context *tc)
+{
+   struct threaded_query *tq, *tmp;
+   LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
+      LIST_DEL(&tq->head_unflushed);
+
+      /* Memory release semantics: due to a possible race with
+       * tc_get_query_result, we must ensure that the linked list changes
+       * are visible before setting tq->flushed.
+       */
+      p_atomic_set(&tq->flushed, true);
+   }
+}
+
+static void
 tc_call_flush(struct pipe_context *pipe, union tc_payload *payload)
 {
    struct tc_flush_payload *p = (struct tc_flush_payload *)payload;
    struct pipe_screen *screen = pipe->screen;
 
    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
    screen->fence_reference(screen, &p->fence, NULL);
+
+   if (!(p->flags & PIPE_FLUSH_DEFERRED))
+      tc_flush_queries(p->tc);
 }
 
 static void
 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
          unsigned flags)
 {
    struct threaded_context *tc = threaded_context(_pipe);
    struct pipe_context *pipe = tc->pipe;
    struct pipe_screen *screen = pipe->screen;
-   struct threaded_query *tq, *tmp;
    bool async = flags & PIPE_FLUSH_DEFERRED;
 
    if (flags & PIPE_FLUSH_ASYNC) {
       struct tc_batch *last = &tc->batch_slots[tc->last];
 
       /* Prefer to do the flush in the driver thread, but avoid the inter-thread
        * communication overhead if the driver thread is currently idle and the
        * caller is going to wait for the fence immediately anyway.
        */
       if (!(util_queue_fence_is_signalled(&last->fence) &&
@@ -1863,38 +1897,35 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
             next->token->tc = tc;
          }
 
          screen->fence_reference(screen, fence, tc->create_fence(pipe, token));
          if (!*fence)
             goto out_of_memory;
       }
 
       struct tc_flush_payload *p =
          tc_add_struct_typed_call(tc, TC_CALL_flush, tc_flush_payload);
+      p->tc = tc;
       p->fence = fence ? *fence : NULL;
       p->flags = flags | TC_FLUSH_ASYNC;
 
       if (!(flags & PIPE_FLUSH_DEFERRED))
          tc_batch_flush(tc);
       return;
    }
 
 out_of_memory:
-   if (!(flags & PIPE_FLUSH_DEFERRED)) {
-      LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
-         tq->flushed = true;
-         LIST_DEL(&tq->head_unflushed);
-      }
-   }
-
    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
+
+   if (!(flags & PIPE_FLUSH_DEFERRED))
+      tc_flush_queries(tc);
    pipe->flush(pipe, fence, flags);
 }
 
 /* This is actually variable-sized, because indirect isn't allocated if it's
  * not needed. */
 struct tc_full_draw_info {
    struct pipe_draw_info draw;
    struct pipe_draw_indirect_info indirect;
 };
 
-- 
2.11.0