[Mesa-dev] [PATCH] loader/dri3: Improve dri3 thread-safety

Nicolai Hähnle nhaehnle at gmail.com
Mon Nov 20 17:03:06 UTC 2017


Hi Thomas,

not actually a regression, but I am seeing hangs now with builds that 
include this patch when running the dEQP-GLES31 test. The backtrace is:

#3  0x00007f947918262a in xcb_wait_for_special_event () from 
/usr/lib/x86_64-linux-gnu/libxcb.so.1
#4  0x00007f947c80984e in dri3_wait_for_event_locked (draw=0x61400000cc78)
     at ../../../mesa-src/src/loader/loader_dri3_helper.c:486
#5  0x00007f947c80b8e8 in loader_dri3_wait_for_sbc 
(draw=draw at entry=0x61400000cc78, target_sbc=61,
     target_sbc at entry=0, ust=ust at entry=0x7ffd1510d230, 
msc=msc at entry=0x7ffd1510d270,
     sbc=sbc at entry=0x7ffd1510d2b0) at 
../../../mesa-src/src/loader/loader_dri3_helper.c:562
#6  0x00007f947c80fbe4 in loader_dri3_swapbuffer_barrier 
(draw=0x61400000cc78)
     at ../../../mesa-src/src/loader/loader_dri3_helper.c:1714
#7  0x00007f9474ced029 in dri_st_framebuffer_flush_swapbuffers 
(stctx=<optimized out>,
     stfbi=<optimized out>) at 
../../../../../mesa-src/src/gallium/state_trackers/dri/dri_drawable.c:135
#8  0x00007f94747730c0 in st_finish (st=st at entry=0x62600000c100)
     at ../../../mesa-src/src/mesa/state_tracker/st_cb_flush.c:74
#9  0x00007f947477319f in st_glFinish (ctx=<optimized out>)
     at ../../../mesa-src/src/mesa/state_tracker/st_cb_flush.c:104
#10 0x00005595fe7d5cfd in glu::CallLogWrapper::glFinish 
(this=0x6030002b9b10)
     at /home/nha/amd/tests/deqp/framework/opengl/gluCallLogWrapper.inl:1166
#11 0x00005595fe3179dc in deqp::gles31::Functional::(anonymous 
namespace)::GeometryShaderRenderTest::renderWithContext 
(this=this at entry=0x612000000dc0, ctx=..., program=..., dstSurface=...)
     at 
/home/nha/amd/tests/deqp/modules/gles31/functional/es31fGeometryShaderTests.cpp:2145
#12 0x00005595fe31d2dc in deqp::gles31::Functional::(anonymous 
namespace)::GeometryShaderRenderTest::iterate (this=0x612000000dc0)
     at 
/home/nha/amd/tests/deqp/modules/gles31/functional/es31fGeometryShaderTests.cpp:1934
#13 0x00005595fe2d6cfe in deqp::gles31::TestCaseWrapper::iterate 
(this=0x60200000e950,
     testCase=<optimized out>) at 
/home/nha/amd/tests/deqp/modules/gles31/tes31TestPackage.cpp:76
#14 0x00005595fe7417d3 in tcu::TestSessionExecutor::iterateTestCase 
(this=this at entry=0x60e00000df60,
     testCase=0x612000000dc0)

A command-line that fairly reliably causes the hang (but not always in 
the same subtest) is:

/home/nha/amd/tests/deqp/build/modules/gles31/deqp-gles31 
--deqp-caselist="{dEQP-GLES31{functional{geometry_shading{input{basic_primitive{points,lines,line_loop,line_strip,triangles,triangle_strip,triangle_fan,lines_adjacency,line_strip_adjacency,triangles_adjacency},triangle_strip_adjacency{vertex_count_0,vertex_count_1,vertex_count_2,vertex_count_3,vertex_count_4,vertex_count_5,vertex_count_6,vertex_count_7,vertex_count_8,vertex_count_9,vertex_count_10,vertex_count_11,vertex_count_12}},conversion{triangles_to_points,lines_to_points,points_to_lines,triangles_to_lines,points_to_triangles,lines_to_triangles},emit{points_emit_0_end_0,points_emit_0_end_1,points_emit_1_end_1,points_emit_0_end_2,points_emit_1_end_2,line_strip_emit_0_end_0,line_strip_emit_0_end_1,line_strip_emit_1_end_1,line_strip_emit_2_end_1,line_strip_emit_0_end_2,line_strip_emit_1_end_2,line_strip_emit_2_end_2,line_strip_emit_2_end_2_emit_2_end_0,triangle_strip_emit_0_end_0,triangle_strip_emit_0_end_1,triangle_strip_emit_1_end_1,triangle_strip_emit_2_end_1,triangle_strip_emit_3_end_1,triangle_strip_emit_0_end_2,triangle_strip_emit_1_end_2,triangle_strip_emit_2_end_2,triangle_strip_emit_3_end_2,triangle_strip_emit_3_end_2_emit_3_end_0},varying{vertex_no_op_geometry_out_1,vertex_out_0_geometry_out_1,vertex_out_0_geometry_out_2,vertex_out_1_geometry_out_0,vertex_out_1_geometry_out_2},layered{render_with_default_layer_cubemap,render_with_default_layer_3d,render_with_default_layer_2d_array,render_with_default_layer_2d_multisample_array,render_to_one_cubemap,render_to_one_3d,render_to_one_2d_array,render_to_one_2d_multisample_array,render_to_all_cubemap,render_to_all_3d,render_to_all_2d_array,render_to_all_2d_multisample_array,render_different_to_cubemap,render_different_to_3d,render_different_to_2d_array,render_different_to_2d_multisample_array,fragment_layer_cubemap,fragment_layer_3d,fragment_layer_2d_array,fragment_layer_2d_multisample_array,layer_provoking_vertex_cubemap,layer_provoking_vertex_3d,layer_provoking_vertex_2d_array,layer_provoking_vertex_2d_multisample_array},instanced{geometry_1_invocations,geometry_2_invocations,geometry_8_invocations,geometry_32_invocations,geometry_max_invocations,geometry_output_different_2_invocations,geometry_output_different_8_invocations,geometry_output_different_32_invocations,geometry_output_different_max_invocations,invocation_per_layer_cubemap,invocation_per_layer_3d,invocation_per_layer_2d_array,invocation_per_layer_2d_multisample_array,multiple_layers_per_invocation_cubemap,multiple_layers_per_invocation_3d,multiple_layers_per_invocation_2d_array,multiple_layers_per_invocation_2d_multisample_array,invocation_output_vary_by_attribute,invocation_output_vary_by_uniform}}}}} 
" --deqp-visibility hidden

This does not actually appear to be a regression in Mesa -- I started 
seeing it after updating to Ubuntu 17.10, so it may be related to 
Wayland or something else. Running the same setup on 17.04 (with Xorg) 
doesn't see the hangs. Disabling Gallium threading makes no difference. 
Any ideas what's going on?

Thanks,
Nicolai


On 03.11.2017 12:02, Thomas Hellstrom wrote:
> It turned out that with recent changes that call into dri3 from glFinish(),
> it appears like different thread end up waiting for X events simultaneously,
> causing deadlocks since they steal events from eachoter and update the dri3
> counters behind eachothers backs.
> 
> This patch intends to improve on that. It allows at most one thread at a
> time to wait on events for a single drawable. If another thread intends to
> do the same, it's put to sleep until the first thread finishes waiting, and
> then it rechecks counters and optionally retries the waiting. Threads that
> poll for X events never pulls X events off the event queue if there are
> other threads waiting for events on that drawable. Counters in the
> dri3 drawable structure are protected by a mutex. Finally, the mutex we
> introduce is never held while waiting for the X server to avoid
> unnecessary stalls.
> 
> This does not make dri3 drawables completely thread-safe but at least it's a
> first step.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102358
> Fixes: d5ba75f8881 "st/dri2 Plumb the flush_swapbuffer functionality through to dri3"
> Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
> ---
>   src/loader/loader_dri3_helper.c | 77 +++++++++++++++++++++++++++++++----------
>   src/loader/loader_dri3_helper.h | 10 ++++++
>   2 files changed, 69 insertions(+), 18 deletions(-)
> 
> diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
> index 19ab581..7e6b8b2 100644
> --- a/src/loader/loader_dri3_helper.c
> +++ b/src/loader/loader_dri3_helper.c
> @@ -32,7 +32,6 @@
>   
>   #include <X11/Xlib-xcb.h>
>   
> -#include <c11/threads.h>
>   #include "loader_dri3_helper.h"
>   
>   /* From xmlpool/options.h, user exposed so should be stable */
> @@ -186,8 +185,11 @@ dri3_fence_await(xcb_connection_t *c, struct loader_dri3_drawable *draw,
>   {
>      xcb_flush(c);
>      xshmfence_await(buffer->shm_fence);
> -   if (draw)
> +   if (draw) {
> +      mtx_lock(&draw->mtx);
>         dri3_flush_present_events(draw);
> +      mtx_unlock(&draw->mtx);
> +   }
>   }
>   
>   static void
> @@ -245,6 +247,9 @@ loader_dri3_drawable_fini(struct loader_dri3_drawable *draw)
>         xcb_discard_reply(draw->conn, cookie.sequence);
>         xcb_unregister_for_special_event(draw->conn, draw->special_event);
>      }
> +
> +   cnd_destroy(&draw->event_cnd);
> +   mtx_destroy(&draw->mtx);
>   }
>   
>   int
> @@ -276,6 +281,8 @@ loader_dri3_drawable_init(xcb_connection_t *conn,
>   
>      draw->cur_blit_source = -1;
>      draw->back_format = __DRI_IMAGE_FORMAT_NONE;
> +   mtx_init(&draw->mtx, mtx_plain);
> +   cnd_init(&draw->event_cnd);
>   
>      if (draw->ext->config)
>         draw->ext->config->configQueryi(draw->dri_screen,
> @@ -407,13 +414,27 @@ dri3_handle_present_event(struct loader_dri3_drawable *draw,
>   }
>   
>   static bool
> -dri3_wait_for_event(struct loader_dri3_drawable *draw)
> +dri3_wait_for_event_locked(struct loader_dri3_drawable *draw)
>   {
>      xcb_generic_event_t *ev;
>      xcb_present_generic_event_t *ge;
>   
>      xcb_flush(draw->conn);
> -   ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
> +
> +   /* Only have one thread waiting for events at a time */
> +   if (draw->has_event_waiter) {
> +      cnd_wait(&draw->event_cnd, &draw->mtx);
> +      /* Another thread has updated the protected info, so retest. */
> +      return true;
> +   } else {
> +      draw->has_event_waiter = true;
> +      /* Allow other threads access to the drawable while we're waiting. */
> +      mtx_unlock(&draw->mtx);
> +      ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
> +      mtx_lock(&draw->mtx);
> +      draw->has_event_waiter = false;
> +      cnd_broadcast(&draw->event_cnd);
> +   }
>      if (!ev)
>         return false;
>      ge = (void *) ev;
> @@ -442,19 +463,23 @@ loader_dri3_wait_for_msc(struct loader_dri3_drawable *draw,
>                             divisor,
>                             remainder);
>   
> +   mtx_lock(&draw->mtx);
>      xcb_flush(draw->conn);
>   
>      /* Wait for the event */
>      if (draw->special_event) {
>         while ((int32_t) (msc_serial - draw->recv_msc_serial) > 0) {
> -         if (!dri3_wait_for_event(draw))
> +         if (!dri3_wait_for_event_locked(draw)) {
> +            mtx_unlock(&draw->mtx);
>               return false;
> +         }
>         }
>      }
>   
>      *ust = draw->notify_ust;
>      *msc = draw->notify_msc;
>      *sbc = draw->recv_sbc;
> +   mtx_unlock(&draw->mtx);
>   
>      return true;
>   }
> @@ -476,17 +501,21 @@ loader_dri3_wait_for_sbc(struct loader_dri3_drawable *draw,
>       *      swaps requested with glXSwapBuffersMscOML for that window have
>       *      completed."
>       */
> +   mtx_lock(&draw->mtx);
>      if (!target_sbc)
>         target_sbc = draw->send_sbc;
>   
>      while (draw->recv_sbc < target_sbc) {
> -      if (!dri3_wait_for_event(draw))
> +      if (!dri3_wait_for_event_locked(draw)) {
> +         mtx_unlock(&draw->mtx);
>            return 0;
> +      }
>      }
>   
>      *ust = draw->ust;
>      *msc = draw->msc;
>      *sbc = draw->recv_sbc;
> +   mtx_unlock(&draw->mtx);
>      return 1;
>   }
>   
> @@ -499,16 +528,16 @@ static int
>   dri3_find_back(struct loader_dri3_drawable *draw)
>   {
>      int b;
> -   xcb_generic_event_t *ev;
> -   xcb_present_generic_event_t *ge;
> -   int num_to_consider = draw->num_back;
> +   int num_to_consider;
>   
> +   mtx_lock(&draw->mtx);
>      /* Increase the likelyhood of reusing current buffer */
>      dri3_flush_present_events(draw);
>   
>      /* Check whether we need to reuse the current back buffer as new back.
>       * In that case, wait until it's not busy anymore.
>       */
> +   num_to_consider = draw->num_back;
>      if (!loader_dri3_have_image_blit(draw) && draw->cur_blit_source != -1) {
>         num_to_consider = 1;
>         draw->cur_blit_source = -1;
> @@ -521,15 +550,14 @@ dri3_find_back(struct loader_dri3_drawable *draw)
>   
>            if (!buffer || !buffer->busy) {
>               draw->cur_back = id;
> +            mtx_unlock(&draw->mtx);
>               return id;
>            }
>         }
> -      xcb_flush(draw->conn);
> -      ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
> -      if (!ev)
> +      if (!dri3_wait_for_event_locked(draw)) {
> +         mtx_unlock(&draw->mtx);
>            return -1;
> -      ge = (void *) ev;
> -      dri3_handle_present_event(draw, ge);
> +      }
>      }
>   }
>   
> @@ -745,6 +773,9 @@ dri3_flush_present_events(struct loader_dri3_drawable *draw)
>      /* Check to see if any configuration changes have occurred
>       * since we were last invoked
>       */
> +   if (draw->has_event_waiter)
> +      return;
> +
>      if (draw->special_event) {
>         xcb_generic_event_t    *ev;
>   
> @@ -774,6 +805,7 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
>   
>      back = dri3_find_back_alloc(draw);
>   
> +   mtx_lock(&draw->mtx);
>      if (draw->is_different_gpu && back) {
>         /* Update the linear buffer before presenting the pixmap */
>         (void) loader_dri3_blit_image(draw,
> @@ -893,6 +925,7 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
>         if (draw->stamp)
>            ++(*draw->stamp);
>      }
> +   mtx_unlock(&draw->mtx);
>   
>      draw->ext->flush->invalidate(draw->dri_drawable);
>   
> @@ -903,11 +936,14 @@ int
>   loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw)
>   {
>      struct loader_dri3_buffer *back = dri3_find_back_alloc(draw);
> +   int ret;
>   
> -   if (!back || back->last_swap == 0)
> -      return 0;
> +   mtx_lock(&draw->mtx);
> +   ret = (!back || back->last_swap == 0) ? 0 :
> +      draw->send_sbc - back->last_swap + 1;
> +   mtx_unlock(&draw->mtx);
>   
> -   return draw->send_sbc - back->last_swap + 1;
> +   return ret;
>   }
>   
>   /** loader_dri3_open
> @@ -1125,6 +1161,7 @@ static int
>   dri3_update_drawable(__DRIdrawable *driDrawable,
>                        struct loader_dri3_drawable *draw)
>   {
> +   mtx_lock(&draw->mtx);
>      if (draw->first_init) {
>         xcb_get_geometry_cookie_t                 geom_cookie;
>         xcb_get_geometry_reply_t                  *geom_reply;
> @@ -1165,8 +1202,10 @@ dri3_update_drawable(__DRIdrawable *driDrawable,
>   
>         geom_reply = xcb_get_geometry_reply(draw->conn, geom_cookie, NULL);
>   
> -      if (!geom_reply)
> +      if (!geom_reply) {
> +         mtx_unlock(&draw->mtx);
>            return false;
> +      }
>   
>         draw->width = geom_reply->width;
>         draw->height = geom_reply->height;
> @@ -1198,6 +1237,7 @@ dri3_update_drawable(__DRIdrawable *driDrawable,
>         if (error) {
>            if (error->error_code != BadWindow) {
>               free(error);
> +            mtx_unlock(&draw->mtx);
>               return false;
>            }
>            draw->is_pixmap = true;
> @@ -1206,6 +1246,7 @@ dri3_update_drawable(__DRIdrawable *driDrawable,
>         }
>      }
>      dri3_flush_present_events(draw);
> +   mtx_unlock(&draw->mtx);
>      return true;
>   }
>   
> diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
> index d3f4b0c..0dd37e9 100644
> --- a/src/loader/loader_dri3_helper.h
> +++ b/src/loader/loader_dri3_helper.h
> @@ -33,6 +33,7 @@
>   
>   #include <GL/gl.h>
>   #include <GL/internal/dri_interface.h>
> +#include <c11/threads.h>
>   
>   enum loader_dri3_buffer_type {
>      loader_dri3_buffer_back = 0,
> @@ -159,6 +160,15 @@ struct loader_dri3_drawable {
>   
>      unsigned int swap_method;
>      unsigned int back_format;
> +
> +   /* Currently protects the following fields:
> +    * event_cnd, has_event_waiter,
> +    * recv_sbc, ust, msc, recv_msc_serial,
> +    * notify_ust, notify_msc
> +    */
> +   mtx_t mtx;
> +   cnd_t event_cnd;
> +   bool has_event_waiter;
>   };
>   
>   void
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list