[Mesa-dev] [PATCH] loader/dri3: Improve dri3 thread-safety

Thomas Hellstrom thellstrom at vmware.com
Tue Nov 21 18:54:43 UTC 2017


Hi, Nicolai,

On 11/21/2017 11:50 AM, Nicolai Hähnle wrote:
> Hi Thomas,
>
> On 20.11.2017 18:19, Thomas Hellstrom wrote:
>> Is this a multithreaded test? Do you see the error with Ubuntu 17.10 
>> + Xorg?
>
> The dEQP-GLES31 tests are all single-threaded.
>
> I could finally test with the Ubuntu 17.10 on Xorg desktop on the same 
> system (some other tests were running before in parallel), and indeed 
> I don't get hangs there. Looks like it's due to Xwayland -- whether a 
> bug in there or "just" some surprising interaction.
>
> Cheers,
> Nicolai
>
>

Yes, it indeed sounds like the event is never being sent. Could you file 
a bug so that we can track this problem?

/Thomas

>>
>> /Thomas
>>
>>
>> On 11/20/2017 06:03 PM, Nicolai Hähnle wrote:
>>> Hi Thomas,
>>>
>>> not actually a regression, but I am seeing hangs now with builds 
>>> that include this patch when running the dEQP-GLES31 test. The 
>>> backtrace is:
>>>
>>> #3  0x00007f947918262a in xcb_wait_for_special_event () from 
>>> /usr/lib/x86_64-linux-gnu/libxcb.so.1
>>> #4  0x00007f947c80984e in dri3_wait_for_event_locked 
>>> (draw=0x61400000cc78)
>>>     at ../../../mesa-src/src/loader/loader_dri3_helper.c:486
>>> #5  0x00007f947c80b8e8 in loader_dri3_wait_for_sbc 
>>> (draw=draw at entry=0x61400000cc78, target_sbc=61,
>>>     target_sbc at entry=0, ust=ust at entry=0x7ffd1510d230, 
>>> msc=msc at entry=0x7ffd1510d270,
>>>     sbc=sbc at entry=0x7ffd1510d2b0) at 
>>> ../../../mesa-src/src/loader/loader_dri3_helper.c:562
>>> #6  0x00007f947c80fbe4 in loader_dri3_swapbuffer_barrier 
>>> (draw=0x61400000cc78)
>>>     at ../../../mesa-src/src/loader/loader_dri3_helper.c:1714
>>> #7  0x00007f9474ced029 in dri_st_framebuffer_flush_swapbuffers 
>>> (stctx=<optimized out>,
>>>     stfbi=<optimized out>) at 
>>> ../../../../../mesa-src/src/gallium/state_trackers/dri/dri_drawable.c:135
>>> #8  0x00007f94747730c0 in st_finish (st=st at entry=0x62600000c100)
>>>     at ../../../mesa-src/src/mesa/state_tracker/st_cb_flush.c:74
>>> #9  0x00007f947477319f in st_glFinish (ctx=<optimized out>)
>>>     at ../../../mesa-src/src/mesa/state_tracker/st_cb_flush.c:104
>>> #10 0x00005595fe7d5cfd in glu::CallLogWrapper::glFinish 
>>> (this=0x6030002b9b10)
>>>     at 
>>> /home/nha/amd/tests/deqp/framework/opengl/gluCallLogWrapper.inl:1166
>>> #11 0x00005595fe3179dc in deqp::gles31::Functional::(anonymous 
>>> namespace)::GeometryShaderRenderTest::renderWithContext 
>>> (this=this at entry=0x612000000dc0, ctx=..., program=..., dstSurface=...)
>>>     at 
>>> /home/nha/amd/tests/deqp/modules/gles31/functional/es31fGeometryShaderTests.cpp:2145 
>>>
>>> #12 0x00005595fe31d2dc in deqp::gles31::Functional::(anonymous 
>>> namespace)::GeometryShaderRenderTest::iterate (this=0x612000000dc0)
>>>     at 
>>> /home/nha/amd/tests/deqp/modules/gles31/functional/es31fGeometryShaderTests.cpp:1934 
>>>
>>> #13 0x00005595fe2d6cfe in deqp::gles31::TestCaseWrapper::iterate 
>>> (this=0x60200000e950,
>>>     testCase=<optimized out>) at 
>>> /home/nha/amd/tests/deqp/modules/gles31/tes31TestPackage.cpp:76
>>> #14 0x00005595fe7417d3 in tcu::TestSessionExecutor::iterateTestCase 
>>> (this=this at entry=0x60e00000df60,
>>>     testCase=0x612000000dc0)
>>>
>>> A command-line that fairly reliably causes the hang (but not always 
>>> in the same subtest) is:
>>>
>>> /home/nha/amd/tests/deqp/build/modules/gles31/deqp-gles31 
>>> --deqp-caselist="{dEQP-GLES31{functional{geometry_shading{input{basic_primitive{points,lines,line_loop,line_strip,triangles,triangle_strip,triangle_fan,lines_adjacency,line_strip_adjacency,triangles_adjacency},triangle_strip_adjacency{vertex_count_0,vertex_count_1,vertex_count_2,vertex_count_3,vertex_count_4,vertex_count_5,vertex_count_6,vertex_count_7,vertex_count_8,vertex_count_9,vertex_count_10,vertex_count_11,vertex_count_12}},conversion{triangles_to_points,lines_to_points,points_to_lines,triangles_to_lines,points_to_triangles,lines_to_triangles},emit{points_emit_0_end_0,points_emit_0_end_1,points_emit_1_end_1,points_emit_0_end_2,points_emit_1_end_2,line_strip_emit_0_end_0,line_strip_emit_0_end_1,line_strip_emit_1_end_1,line_strip_emit_2_end_1,line_strip_emit_0_end_2,line_strip_emit_1_end_2,line_strip_emit_2_end_2,line_strip_emit_2_end_2_emit_2_end_0,triangle_strip_emit_0_end_0,triangle_strip_emit_0_end_1,triangle_strip_emit_1_end_1,triangle_strip_emit_2_end_1,triangle_strip_emit_3_end_1,triangle_strip_emit_0_end_2,triangle_strip_emit_1_end_2,triangle_strip_emit_2_end_2,triangle_strip_emit_3_end_2,triangle_strip_emit_3_end_2_emit_3_end_0},varying{vertex_no_op_geometry_out_1,vertex_out_0_geometry_out_1,vertex_out_0_geometry_out_2,vertex_out_1_geometry_out_0,vertex_out_1_geometry_out_2},layered{render_with_default_layer_cubemap,render_with_default_layer_3d,render_with_default_layer_2d_array,render_with_default_layer_2d_multisample_array,render_to_one_cubemap,render_to_one_3d,render_to_one_2d_array,render_to_one_2d_multisample_array,render_to_all_cubemap,render_to_all_3d,render_to_all_2d_array,render_to_all_2d_multisample_array,render_different_to_cubemap,render_different_to_3d,render_different_to_2d_array,render_different_to_2d_multisample_array,fragment_layer_cubemap,fragment_layer_3d,fragment_layer_2d_array,fragment_layer_2d_multisample_array,layer_provoking_vertex_cubemap,layer_provoking_vertex_3d,layer_provoking_vertex_2d_array,layer_provoking_vertex_2d_multisample_array},instanced{geometry_1_invocations,geometry_2_invocations,geometry_8_invocations,geometry_32_invocations,geometry_max_invocations,geometry_output_different_2_invocations,geometry_output_different_8_invocations,geometry_output_different_32_invocations,geometry_output_different_max_invocations,invocation_per_layer_cubemap,invocation_per_layer_3d,invocation_per_layer_2d_array,invocation_per_layer_2d_multisample_array,multiple_layers_per_invocation_cubemap,multiple_layers_per_invocation_3d,multiple_layers_per_invocation_2d_array,multiple_layers_per_invocation_2d_multisample_array,invocation_output_vary_by_attribute,invocation_output_vary_by_uniform}}}}} 
>>> " --deqp-visibility hidden
>>>
>>> This does not actually appear to be a regression in Mesa -- I 
>>> started seeing it after updating to Ubuntu 17.10, so it may be 
>>> related to Wayland or something else. Running the same setup on 
>>> 17.04 (with Xorg) doesn't see the hangs. Disabling Gallium threading 
>>> makes no difference. Any ideas what's going on?
>>>
>>> Thanks,
>>> Nicolai
>>>
>>>
>>> On 03.11.2017 12:02, Thomas Hellstrom wrote:
>>>> It turned out that with recent changes that call into dri3 from 
>>>> glFinish(),
>>>> it appears like different thread end up waiting for X events 
>>>> simultaneously,
>>>> causing deadlocks since they steal events from eachoter and update 
>>>> the dri3
>>>> counters behind eachothers backs.
>>>>
>>>> This patch intends to improve on that. It allows at most one thread 
>>>> at a
>>>> time to wait on events for a single drawable. If another thread 
>>>> intends to
>>>> do the same, it's put to sleep until the first thread finishes 
>>>> waiting, and
>>>> then it rechecks counters and optionally retries the waiting. 
>>>> Threads that
>>>> poll for X events never pulls X events off the event queue if there 
>>>> are
>>>> other threads waiting for events on that drawable. Counters in the
>>>> dri3 drawable structure are protected by a mutex. Finally, the 
>>>> mutex we
>>>> introduce is never held while waiting for the X server to avoid
>>>> unnecessary stalls.
>>>>
>>>> This does not make dri3 drawables completely thread-safe but at 
>>>> least it's a
>>>> first step.
>>>>
>>>> Bugzilla: 
>>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D102358&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ&m=iDJcqIMFeVj6iTFCMMETKjNE_Ea04MZmEuaK4Qd9pVk&s=6HqE58_DXmtahSSCMcZlXab6GfjZ-S6fFA6ZkJKAprc&e= 
>>>>
>>>> Fixes: d5ba75f8881 "st/dri2 Plumb the flush_swapbuffer 
>>>> functionality through to dri3"
>>>> Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
>>>> ---
>>>>   src/loader/loader_dri3_helper.c | 77 
>>>> +++++++++++++++++++++++++++++++----------
>>>>   src/loader/loader_dri3_helper.h | 10 ++++++
>>>>   2 files changed, 69 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/src/loader/loader_dri3_helper.c 
>>>> b/src/loader/loader_dri3_helper.c
>>>> index 19ab581..7e6b8b2 100644
>>>> --- a/src/loader/loader_dri3_helper.c
>>>> +++ b/src/loader/loader_dri3_helper.c
>>>> @@ -32,7 +32,6 @@
>>>>     #include <X11/Xlib-xcb.h>
>>>>   -#include <c11/threads.h>
>>>>   #include "loader_dri3_helper.h"
>>>>     /* From xmlpool/options.h, user exposed so should be stable */
>>>> @@ -186,8 +185,11 @@ dri3_fence_await(xcb_connection_t *c, struct 
>>>> loader_dri3_drawable *draw,
>>>>   {
>>>>      xcb_flush(c);
>>>>      xshmfence_await(buffer->shm_fence);
>>>> -   if (draw)
>>>> +   if (draw) {
>>>> +      mtx_lock(&draw->mtx);
>>>>         dri3_flush_present_events(draw);
>>>> +      mtx_unlock(&draw->mtx);
>>>> +   }
>>>>   }
>>>>     static void
>>>> @@ -245,6 +247,9 @@ loader_dri3_drawable_fini(struct 
>>>> loader_dri3_drawable *draw)
>>>>         xcb_discard_reply(draw->conn, cookie.sequence);
>>>>         xcb_unregister_for_special_event(draw->conn, 
>>>> draw->special_event);
>>>>      }
>>>> +
>>>> +   cnd_destroy(&draw->event_cnd);
>>>> +   mtx_destroy(&draw->mtx);
>>>>   }
>>>>     int
>>>> @@ -276,6 +281,8 @@ loader_dri3_drawable_init(xcb_connection_t *conn,
>>>>        draw->cur_blit_source = -1;
>>>>      draw->back_format = __DRI_IMAGE_FORMAT_NONE;
>>>> +   mtx_init(&draw->mtx, mtx_plain);
>>>> +   cnd_init(&draw->event_cnd);
>>>>        if (draw->ext->config)
>>>> draw->ext->config->configQueryi(draw->dri_screen,
>>>> @@ -407,13 +414,27 @@ dri3_handle_present_event(struct 
>>>> loader_dri3_drawable *draw,
>>>>   }
>>>>     static bool
>>>> -dri3_wait_for_event(struct loader_dri3_drawable *draw)
>>>> +dri3_wait_for_event_locked(struct loader_dri3_drawable *draw)
>>>>   {
>>>>      xcb_generic_event_t *ev;
>>>>      xcb_present_generic_event_t *ge;
>>>>        xcb_flush(draw->conn);
>>>> -   ev = xcb_wait_for_special_event(draw->conn, draw->special_event);
>>>> +
>>>> +   /* Only have one thread waiting for events at a time */
>>>> +   if (draw->has_event_waiter) {
>>>> +      cnd_wait(&draw->event_cnd, &draw->mtx);
>>>> +      /* Another thread has updated the protected info, so retest. */
>>>> +      return true;
>>>> +   } else {
>>>> +      draw->has_event_waiter = true;
>>>> +      /* Allow other threads access to the drawable while we're 
>>>> waiting. */
>>>> +      mtx_unlock(&draw->mtx);
>>>> +      ev = xcb_wait_for_special_event(draw->conn, 
>>>> draw->special_event);
>>>> +      mtx_lock(&draw->mtx);
>>>> +      draw->has_event_waiter = false;
>>>> +      cnd_broadcast(&draw->event_cnd);
>>>> +   }
>>>>      if (!ev)
>>>>         return false;
>>>>      ge = (void *) ev;
>>>> @@ -442,19 +463,23 @@ loader_dri3_wait_for_msc(struct 
>>>> loader_dri3_drawable *draw,
>>>>                             divisor,
>>>>                             remainder);
>>>>   +   mtx_lock(&draw->mtx);
>>>>      xcb_flush(draw->conn);
>>>>        /* Wait for the event */
>>>>      if (draw->special_event) {
>>>>         while ((int32_t) (msc_serial - draw->recv_msc_serial) > 0) {
>>>> -         if (!dri3_wait_for_event(draw))
>>>> +         if (!dri3_wait_for_event_locked(draw)) {
>>>> +            mtx_unlock(&draw->mtx);
>>>>               return false;
>>>> +         }
>>>>         }
>>>>      }
>>>>        *ust = draw->notify_ust;
>>>>      *msc = draw->notify_msc;
>>>>      *sbc = draw->recv_sbc;
>>>> +   mtx_unlock(&draw->mtx);
>>>>        return true;
>>>>   }
>>>> @@ -476,17 +501,21 @@ loader_dri3_wait_for_sbc(struct 
>>>> loader_dri3_drawable *draw,
>>>>       *      swaps requested with glXSwapBuffersMscOML for that 
>>>> window have
>>>>       *      completed."
>>>>       */
>>>> +   mtx_lock(&draw->mtx);
>>>>      if (!target_sbc)
>>>>         target_sbc = draw->send_sbc;
>>>>        while (draw->recv_sbc < target_sbc) {
>>>> -      if (!dri3_wait_for_event(draw))
>>>> +      if (!dri3_wait_for_event_locked(draw)) {
>>>> +         mtx_unlock(&draw->mtx);
>>>>            return 0;
>>>> +      }
>>>>      }
>>>>        *ust = draw->ust;
>>>>      *msc = draw->msc;
>>>>      *sbc = draw->recv_sbc;
>>>> +   mtx_unlock(&draw->mtx);
>>>>      return 1;
>>>>   }
>>>>   @@ -499,16 +528,16 @@ static int
>>>>   dri3_find_back(struct loader_dri3_drawable *draw)
>>>>   {
>>>>      int b;
>>>> -   xcb_generic_event_t *ev;
>>>> -   xcb_present_generic_event_t *ge;
>>>> -   int num_to_consider = draw->num_back;
>>>> +   int num_to_consider;
>>>>   +   mtx_lock(&draw->mtx);
>>>>      /* Increase the likelyhood of reusing current buffer */
>>>>      dri3_flush_present_events(draw);
>>>>        /* Check whether we need to reuse the current back buffer as 
>>>> new back.
>>>>       * In that case, wait until it's not busy anymore.
>>>>       */
>>>> +   num_to_consider = draw->num_back;
>>>>      if (!loader_dri3_have_image_blit(draw) && 
>>>> draw->cur_blit_source != -1) {
>>>>         num_to_consider = 1;
>>>>         draw->cur_blit_source = -1;
>>>> @@ -521,15 +550,14 @@ dri3_find_back(struct loader_dri3_drawable 
>>>> *draw)
>>>>              if (!buffer || !buffer->busy) {
>>>>               draw->cur_back = id;
>>>> +            mtx_unlock(&draw->mtx);
>>>>               return id;
>>>>            }
>>>>         }
>>>> -      xcb_flush(draw->conn);
>>>> -      ev = xcb_wait_for_special_event(draw->conn, 
>>>> draw->special_event);
>>>> -      if (!ev)
>>>> +      if (!dri3_wait_for_event_locked(draw)) {
>>>> +         mtx_unlock(&draw->mtx);
>>>>            return -1;
>>>> -      ge = (void *) ev;
>>>> -      dri3_handle_present_event(draw, ge);
>>>> +      }
>>>>      }
>>>>   }
>>>>   @@ -745,6 +773,9 @@ dri3_flush_present_events(struct 
>>>> loader_dri3_drawable *draw)
>>>>      /* Check to see if any configuration changes have occurred
>>>>       * since we were last invoked
>>>>       */
>>>> +   if (draw->has_event_waiter)
>>>> +      return;
>>>> +
>>>>      if (draw->special_event) {
>>>>         xcb_generic_event_t    *ev;
>>>>   @@ -774,6 +805,7 @@ loader_dri3_swap_buffers_msc(struct 
>>>> loader_dri3_drawable *draw,
>>>>        back = dri3_find_back_alloc(draw);
>>>>   +   mtx_lock(&draw->mtx);
>>>>      if (draw->is_different_gpu && back) {
>>>>         /* Update the linear buffer before presenting the pixmap */
>>>>         (void) loader_dri3_blit_image(draw,
>>>> @@ -893,6 +925,7 @@ loader_dri3_swap_buffers_msc(struct 
>>>> loader_dri3_drawable *draw,
>>>>         if (draw->stamp)
>>>>            ++(*draw->stamp);
>>>>      }
>>>> +   mtx_unlock(&draw->mtx);
>>>> draw->ext->flush->invalidate(draw->dri_drawable);
>>>>   @@ -903,11 +936,14 @@ int
>>>>   loader_dri3_query_buffer_age(struct loader_dri3_drawable *draw)
>>>>   {
>>>>      struct loader_dri3_buffer *back = dri3_find_back_alloc(draw);
>>>> +   int ret;
>>>>   -   if (!back || back->last_swap == 0)
>>>> -      return 0;
>>>> +   mtx_lock(&draw->mtx);
>>>> +   ret = (!back || back->last_swap == 0) ? 0 :
>>>> +      draw->send_sbc - back->last_swap + 1;
>>>> +   mtx_unlock(&draw->mtx);
>>>>   -   return draw->send_sbc - back->last_swap + 1;
>>>> +   return ret;
>>>>   }
>>>>     /** loader_dri3_open
>>>> @@ -1125,6 +1161,7 @@ static int
>>>>   dri3_update_drawable(__DRIdrawable *driDrawable,
>>>>                        struct loader_dri3_drawable *draw)
>>>>   {
>>>> +   mtx_lock(&draw->mtx);
>>>>      if (draw->first_init) {
>>>>         xcb_get_geometry_cookie_t geom_cookie;
>>>>         xcb_get_geometry_reply_t *geom_reply;
>>>> @@ -1165,8 +1202,10 @@ dri3_update_drawable(__DRIdrawable 
>>>> *driDrawable,
>>>>           geom_reply = xcb_get_geometry_reply(draw->conn, 
>>>> geom_cookie, NULL);
>>>>   -      if (!geom_reply)
>>>> +      if (!geom_reply) {
>>>> +         mtx_unlock(&draw->mtx);
>>>>            return false;
>>>> +      }
>>>>           draw->width = geom_reply->width;
>>>>         draw->height = geom_reply->height;
>>>> @@ -1198,6 +1237,7 @@ dri3_update_drawable(__DRIdrawable *driDrawable,
>>>>         if (error) {
>>>>            if (error->error_code != BadWindow) {
>>>>               free(error);
>>>> +            mtx_unlock(&draw->mtx);
>>>>               return false;
>>>>            }
>>>>            draw->is_pixmap = true;
>>>> @@ -1206,6 +1246,7 @@ dri3_update_drawable(__DRIdrawable *driDrawable,
>>>>         }
>>>>      }
>>>>      dri3_flush_present_events(draw);
>>>> +   mtx_unlock(&draw->mtx);
>>>>      return true;
>>>>   }
>>>>   diff --git a/src/loader/loader_dri3_helper.h 
>>>> b/src/loader/loader_dri3_helper.h
>>>> index d3f4b0c..0dd37e9 100644
>>>> --- a/src/loader/loader_dri3_helper.h
>>>> +++ b/src/loader/loader_dri3_helper.h
>>>> @@ -33,6 +33,7 @@
>>>>     #include <GL/gl.h>
>>>>   #include <GL/internal/dri_interface.h>
>>>> +#include <c11/threads.h>
>>>>     enum loader_dri3_buffer_type {
>>>>      loader_dri3_buffer_back = 0,
>>>> @@ -159,6 +160,15 @@ struct loader_dri3_drawable {
>>>>        unsigned int swap_method;
>>>>      unsigned int back_format;
>>>> +
>>>> +   /* Currently protects the following fields:
>>>> +    * event_cnd, has_event_waiter,
>>>> +    * recv_sbc, ust, msc, recv_msc_serial,
>>>> +    * notify_ust, notify_msc
>>>> +    */
>>>> +   mtx_t mtx;
>>>> +   cnd_t event_cnd;
>>>> +   bool has_event_waiter;
>>>>   };
>>>>     void
>>>>
>>>
>>>
>>
>
>



More information about the mesa-dev mailing list