[Mesa-dev] [PATCH 06/11] mesa/glthread: add glthread "perf" counters and pass them to gallium HUD

Thu Jun 22 01:03:01 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

for HUD integration in following commits. This valuable profiling data
will allow us to see on the HUD how well glthread is able to utilize
parallelism. This is better than benchmarking, because you can see
exactly what's happening and you don't have to be CPU-bound.

u_threaded_context has the same counters.
---
 src/gallium/auxiliary/hud/hud_context.c     |  8 ++++++++
 src/gallium/auxiliary/hud/hud_context.h     |  5 +++++
 src/gallium/auxiliary/hud/hud_private.h     |  2 ++
 src/gallium/include/state_tracker/st_api.h  |  4 +++-
 src/gallium/state_trackers/dri/dri_screen.c |  6 +++++-
 src/mesa/main/dd.h                          |  4 +++-
 src/mesa/main/glthread.c                    | 21 +++++++++++++++++++--
 src/mesa/main/glthread.h                    |  3 +++
 src/mesa/state_tracker/st_context.c         |  5 +++--
 src/util/u_queue.h                          | 14 ++++++++++++++
 10 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index f32831b..551cea9 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1687,10 +1687,18 @@ hud_destroy(struct hud_context *hud)
    }
 
    hud_batch_query_cleanup(&hud->batch_query);
    pipe->delete_fs_state(pipe, hud->fs_color);
    pipe->delete_fs_state(pipe, hud->fs_text);
    pipe->delete_vs_state(pipe, hud->vs);
    pipe_sampler_view_reference(&hud->font_sampler_view, NULL);
    pipe_resource_reference(&hud->font.texture, NULL);
    FREE(hud);
 }
+
+void
+hud_add_queue_for_monitoring(struct hud_context *hud,
+                             struct util_queue_monitoring *queue_info)
+{
+   assert(!hud->monitored_queue);
+   hud->monitored_queue = queue_info;
+}
diff --git a/src/gallium/auxiliary/hud/hud_context.h b/src/gallium/auxiliary/hud/hud_context.h
index abf2ad5..5a7e13b 100644
--- a/src/gallium/auxiliary/hud/hud_context.h
+++ b/src/gallium/auxiliary/hud/hud_context.h
@@ -25,21 +25,26 @@
  *
  **************************************************************************/
 
 #ifndef HUD_CONTEXT_H
 #define HUD_CONTEXT_H
 
 struct hud_context;
 struct cso_context;
 struct pipe_context;
 struct pipe_resource;
+struct util_queue_monitoring;
 
 struct hud_context *
 hud_create(struct pipe_context *pipe, struct cso_context *cso);
 
 void
 hud_destroy(struct hud_context *hud);
 
 void
 hud_draw(struct hud_context *hud, struct pipe_resource *tex);
 
+void
+hud_add_queue_for_monitoring(struct hud_context *hud,
+                             struct util_queue_monitoring *queue_info);
+
 #endif
diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h
index f765bd9..fba919e 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -33,20 +33,22 @@
 #include "util/list.h"
 #include "hud/font.h"
 
 struct hud_context {
    struct pipe_context *pipe;
    struct cso_context *cso;
 
    struct hud_batch_query_context *batch_query;
    struct list_head pane_list;
 
+   struct util_queue_monitoring *monitored_queue;
+
    /* states */
    struct pipe_blend_state no_blend, alpha_blend;
    struct pipe_depth_stencil_alpha_state dsa;
    void *fs_color, *fs_text;
    struct pipe_rasterizer_state rasterizer, rasterizer_aa_lines;
    void *vs;
    struct pipe_vertex_element velems[2];
 
    /* font */
    struct util_font font;
diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h
index 47d06c8..d641092 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -172,20 +172,21 @@ enum st_manager_param {
     *
     * For the mesa state tracker that means that it needs to invalidate
     * the framebuffer in glViewport itself.
     */
    ST_MANAGER_BROKEN_INVALIDATE
 };
 
 struct pipe_context;
 struct pipe_resource;
 struct pipe_fence_handle;
+struct util_queue_monitoring;
 
 /**
  * Used in st_context_iface->get_resource_for_egl_image.
  */
 struct st_context_resource
 {
    /* these fields are filled in by the caller */
    enum st_context_resource_type type;
    void *resource;
 
@@ -467,21 +468,22 @@ struct st_manager
    /**
     * Query an manager param.
     */
    int (*get_param)(struct st_manager *smapi,
                     enum st_manager_param param);
 
    /**
     * Call the loader function setBackgroundContext. Called from the worker
     * thread.
     */
-   void (*set_background_context)(struct st_context_iface *stctxi);
+   void (*set_background_context)(struct st_context_iface *stctxi,
+                                  struct util_queue_monitoring *queue_info);
 };
 
 /**
  * Represent a rendering API such as OpenGL or OpenVG.
  *
  * Implemented by the state tracker and used by the state tracker manager.
  */
 struct st_api
 {
    /**
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index aa215b0..6b58830 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -440,32 +440,36 @@ dri_postprocessing_init(struct dri_screen *screen)
 {
    unsigned i;
 
    for (i = 0; i < PP_FILTERS; i++) {
       screen->pp_enabled[i] = driQueryOptioni(&screen->optionCache,
                                               pp_filters[i].name);
    }
 }
 
 static void
-dri_set_background_context(struct st_context_iface *st)
+dri_set_background_context(struct st_context_iface *st,
+                           struct util_queue_monitoring *queue_info)
 {
    struct dri_context *ctx = (struct dri_context *)st->st_manager_private;
    const __DRIbackgroundCallableExtension *backgroundCallable =
       ctx->sPriv->dri2.backgroundCallable;
 
    /* Note: Mesa will only call this function if GL multithreading is enabled
     * We only do that if the loader exposed the __DRI_BACKGROUND_CALLABLE
     * extension. So we know that backgroundCallable is not NULL.
     */
    assert(backgroundCallable);
    backgroundCallable->setBackgroundContext(ctx->cPriv->loaderPrivate);
+
+   if (ctx->hud)
+      hud_add_queue_for_monitoring(ctx->hud, queue_info);
 }
 
 unsigned
 dri_init_options_get_screen_flags(struct dri_screen *screen,
                                   const char* driver_name)
 {
    unsigned flags = 0;
 
    driParseOptionInfo(&screen->optionCacheDefaults, gallium_config_options.xml);
    driParseConfigFiles(&screen->optionCache,
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 84ed57f..8e382e1 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -43,20 +43,21 @@ struct gl_framebuffer;
 struct gl_image_unit;
 struct gl_pixelstore_attrib;
 struct gl_program;
 struct gl_renderbuffer;
 struct gl_renderbuffer_attachment;
 struct gl_shader;
 struct gl_shader_program;
 struct gl_texture_image;
 struct gl_texture_object;
 struct gl_memory_info;
+struct util_queue_monitoring;
 
 /* GL_ARB_vertex_buffer_object */
 /* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
  * NULL) if buffer is unavailable for immediate mapping.
  *
  * Does GL_MAP_INVALIDATE_RANGE_BIT do this?  It seems so, but it
  * would require more book-keeping in the driver than seems necessary
  * at this point.
  *
  * Does GL_MAP_INVALDIATE_BUFFER_BIT do this?  Not really -- we don't
@@ -1032,21 +1033,22 @@ struct dd_function_table {
     * call.  Mesa takes advantage of this to re-use a background thread to
     * perform drawing on behalf of multiple contexts.
     *
     * Mesa may sometimes call this function from a non-background thread
     * (i.e. a thread that has already been bound to a context using
     * __DriverAPIRec::MakeCurrent()); when this happens, ctx will be equal to
     * the context that is bound to this thread.
     *
     * Mesa will only call this function if GL multithreading is enabled.
     */
-   void (*SetBackgroundContext)(struct gl_context *ctx);
+   void (*SetBackgroundContext)(struct gl_context *ctx,
+                                struct util_queue_monitoring *queue_info);
 
    /**
     * \name GL_ARB_sparse_buffer interface
     */
    /*@{*/
    void (*BufferPageCommitment)(struct gl_context *ctx,
                                 struct gl_buffer_object *bufferObj,
                                 GLintptr offset, GLsizeiptr size,
                                 GLboolean commit);
    /*@}*/
diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index d467298..c71c037 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -29,20 +29,21 @@
  * their time spent inside their rendering thread and half inside Mesa.  To
  * alleviate this, we put a shim layer in Mesa at the GL dispatch level that
  * quickly logs the GL commands to a buffer to be processed by a worker
  * thread.
  */
 
 #include "main/mtypes.h"
 #include "main/glthread.h"
 #include "main/marshal.h"
 #include "main/marshal_generated.h"
+#include "util/u_atomic.h"
 #include "util/u_thread.h"
 
 
 static void
 glthread_unmarshal_batch(void *job, int thread_index)
 {
    struct glthread_batch *batch = (struct glthread_batch*)job;
    struct gl_context *ctx = batch->ctx;
    size_t pos = 0;
 
@@ -53,21 +54,21 @@ glthread_unmarshal_batch(void *job, int thread_index)
 
    assert(pos == batch->used);
    batch->used = 0;
 }
 
 static void
 glthread_thread_initialization(void *job, int thread_index)
 {
    struct gl_context *ctx = (struct gl_context*)job;
 
-   ctx->Driver.SetBackgroundContext(ctx);
+   ctx->Driver.SetBackgroundContext(ctx, &ctx->GLThread->stats);
    _glapi_set_context(ctx);
 }
 
 void
 _mesa_glthread_init(struct gl_context *ctx)
 {
    struct glthread_state *glthread = calloc(1, sizeof(*glthread));
 
    if (!glthread)
       return;
@@ -83,20 +84,21 @@ _mesa_glthread_init(struct gl_context *ctx)
       util_queue_destroy(&glthread->queue);
       free(glthread);
       return;
    }
 
    for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++) {
       glthread->batches[i].ctx = ctx;
       util_queue_fence_init(&glthread->batches[i].fence);
    }
 
+   glthread->stats.queue = &glthread->queue;
    ctx->CurrentClientDispatch = ctx->MarshalExec;
    ctx->GLThread = glthread;
 
    /* Execute the thread initialization function in the thread. */
    struct util_queue_fence fence;
    util_queue_fence_init(&fence);
    util_queue_add_job(&glthread->queue, ctx, &fence,
                       glthread_thread_initialization, NULL);
    util_queue_fence_wait(&fence);
    util_queue_fence_destroy(&fence);
@@ -152,20 +154,22 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
     *
     * Note that glthread_unmarshal_batch() changes the dispatch table so we'll
     * need to restore it when it returns.
     */
    if (false) {
       glthread_unmarshal_batch(next, 0);
       _glapi_set_dispatch(ctx->CurrentClientDispatch);
       return;
    }
 
+   p_atomic_add(&glthread->stats.num_offloaded_items, next->used);
+
    util_queue_add_job(&glthread->queue, next, &next->fence,
                       glthread_unmarshal_batch, NULL);
    glthread->last = glthread->next;
    glthread->next = (glthread->next + 1) % MARSHAL_MAX_BATCHES;
 }
 
 /**
  * Waits for all pending batches have been unmarshaled.
  *
  * This can be used by the main thread to synchronize access to the context,
@@ -181,23 +185,36 @@ _mesa_glthread_finish(struct gl_context *ctx)
    /* If this is called from the worker thread, then we've hit a path that
     * might be called from either the main thread or the worker (such as some
     * dri interface entrypoints), in which case we don't need to actually
     * synchronize against ourself.
     */
    if (u_thread_is_self(glthread->queue.threads[0]))
       return;
 
    struct glthread_batch *last = &glthread->batches[glthread->last];
    struct glthread_batch *next = &glthread->batches[glthread->next];
+   bool synced = false;
 
-   if (!util_queue_fence_is_signalled(&last->fence))
+   if (!util_queue_fence_is_signalled(&last->fence)) {
       util_queue_fence_wait(&last->fence);
+      synced = true;
+   }
 
    if (next->used) {
+      p_atomic_add(&glthread->stats.num_direct_items, next->used);
+
       /* Since glthread_unmarshal_batch changes the dispatch to direct,
        * restore it after it's done.
        */
       struct _glapi_table *dispatch = _glapi_get_dispatch();
       glthread_unmarshal_batch(next, 0);
       _glapi_set_dispatch(dispatch);
+
+      /* It's not a sync because we don't enqueue partial batches, but
+       * it would be a sync if we did. So count it anyway.
+       */
+      synced = true;
    }
+
+   if (synced)
+      p_atomic_inc(&glthread->stats.num_syncs);
 }
diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
index 5b938fd..36692fe 100644
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -58,20 +58,23 @@ struct glthread_batch
 
    /** Data contained in the command buffer. */
    uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
 };
 
 struct glthread_state
 {
    /** Multithreaded queue. */
    struct util_queue queue;
 
+   /** This is sent to the driver for framebuffer overlay / HUD. */
+   struct util_queue_monitoring stats;
+
    /** The ring of batches in memory. */
    struct glthread_batch batches[MARSHAL_MAX_BATCHES];
 
    /** Index of the last submitted batch. */
    unsigned last;
 
    /** Index of the batch being filled and about to be submitted. */
    unsigned next;
 
    /**
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index f57cd6a..f535139 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -622,28 +622,29 @@ void st_destroy_context( struct st_context *st )
 }
 
 static void
 st_emit_string_marker(struct gl_context *ctx, const GLchar *string, GLsizei len)
 {
    struct st_context *st = ctx->st;
    st->pipe->emit_string_marker(st->pipe, string, len);
 }
 
 static void
-st_set_background_context(struct gl_context *ctx)
+st_set_background_context(struct gl_context *ctx,
+                          struct util_queue_monitoring *queue_info)
 {
    struct st_context *st = ctx->st;
    struct st_manager *smapi =
       (struct st_manager*)st->iface.st_context_private;
 
    assert(smapi->set_background_context);
-   smapi->set_background_context(&st->iface);
+   smapi->set_background_context(&st->iface, queue_info);
 }
 
 void st_init_driver_functions(struct pipe_screen *screen,
                               struct dd_function_table *functions)
 {
    _mesa_init_shader_object_functions(functions);
    _mesa_init_sampler_object_functions(functions);
 
    st_init_blit_functions(functions);
    st_init_bufferobject_functions(screen, functions);
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index 8ec9598..edd6bab 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -108,15 +108,29 @@ util_queue_is_initialized(struct util_queue *queue)
 {
    return queue->threads != NULL;
 }
 
 static inline bool
 util_queue_fence_is_signalled(struct util_queue_fence *fence)
 {
    return fence->signalled != 0;
 }
 
+/* Convenient structure for monitoring the queue externally and passing
+ * the structure between Mesa components. The queue doesn't use it directly.
+ */
+struct util_queue_monitoring
+{
+   /* For querying the thread busyness. */
+   struct util_queue *queue;
+
+   /* Counters updated by the user of the queue. */
+   unsigned num_offloaded_items;
+   unsigned num_direct_items;
+   unsigned num_syncs;
+};
+
 #ifdef __cplusplus
 }
 #endif
 
 #endif
-- 
2.7.4