[Mesa-dev] [PATCH 2/2] glx/dri2: implement new extension for throttling and asynchronous flushing

Marek Olšák maraeo at gmail.com
Wed Nov 14 19:43:23 PST 2012


and allow thread offloading for dri2SwapBuffers.

The extension takes care of throttling, drawable flushing and
GL context flushing (no need to do glFlush), and allows thread offloading
xcb_dri2_swap_buffers. The driver has to implement the offloading, libGL
only gives it the pointer to the swap function.
---
 include/GL/internal/dri_interface.h |   62 ++++++++++
 src/glx/dri2_glx.c                  |  234 +++++++++++++++++++++++++++--------
 2 files changed, 243 insertions(+), 53 deletions(-)

diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index 1e0f1d0..3667a1a 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -85,6 +85,7 @@ typedef struct __DRIdri2ExtensionRec		__DRIdri2Extension;
 typedef struct __DRIdri2LoaderExtensionRec	__DRIdri2LoaderExtension;
 typedef struct __DRI2flushExtensionRec	__DRI2flushExtension;
 typedef struct __DRI2throttleExtensionRec	__DRI2throttleExtension;
+typedef struct __DRI2asyncFlushExtensionRec	__DRI2asyncFlushExtension;
 
 /*@}*/
 
@@ -305,6 +306,67 @@ struct __DRI2throttleExtensionRec {
 		    enum __DRI2throttleReason reason);
 };
 
+
+/**
+ * Optional feature for drivers that implement DRI2
+ */
+#define __DRI2_ASYNC_FLUSH "DRI2_AsyncFlush"
+#define __DRI2_ASYNC_FLUSH_VERSION 1
+
+#define __DRI2_FLUSH_DRAWABLE (1 << 0) /* the drawable should be flushed. */
+#define __DRI2_FLUSH_CONTEXT  (1 << 1) /* glFlush should be called */
+#define __DRI2_FLUSH_ASYNC    (1 << 2) /* thread offloading is allowed */
+
+struct __DRI2asyncFlushExtensionRec {
+    __DRIextension base;
+
+    /**
+     * Initiate a synchronous or asynchronous drawable+context flush
+     * and throttle the rendering.
+     *
+     * This function is also recommended for synchronous flushing, because
+     * it helps reduce the number of flushes in the driver by combining
+     * several operations into one call.
+     *
+     * The flag __DRI2_FLUSH_ASYNC tells the driver it can offload the flush
+     * to some other thread and return immediately.
+     *
+     * Rules for thread offloading:
+     * - The throttling shouldn't offloaded.
+     * - For each context, there can only be one flush in-progress at a time.
+     *   Any new flush must wait until the previous one is finished, so that
+     *   the flushes are executed in order.
+     * - If any GL rendering commands are flushed, the context must wait until
+     *   the previous flush is finished.
+     *
+     * The 'finish' parameter allows to set a callback function, that is
+     * executed after the flush is finished (e.g. SwapBuffers).
+     *
+     * \param context           the context
+     * \param drawable          the drawable to flush
+     * \param flags             a combination of _DRI2_FLUSH_xxx flags
+     * \param throttle_reason   the reason for throttling, 0 = no throttling
+     * \param finish            the function to call after the offloaded work
+     *                          is done, it can be NULL if nothing needs to
+     *                          be done, used with __DRI2_FLUSH_ASYNC
+     * \param finish_ptr        the parameter to the finish function
+     */
+    void (*flush)(__DRIcontext *ctx,
+                  __DRIdrawable *drawable,
+                  unsigned flags,
+                  enum __DRI2throttleReason throttle_reason,
+                  void (*finish)(void *user_ptr),
+                  void *finish_ptr);
+
+    /**
+     * If the last flush was asynchronous, wait until the flush is finished.
+     *
+     * \param ctx  the context
+     */
+    void (*sync)(__DRIcontext *ctx);
+};
+
+
 /**
  * XML document describing the configuration options supported by the
  * driver.
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index f469431..1f5807f 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -87,6 +87,7 @@ struct dri2_screen {
    const __DRIcoreExtension *core;
 
    const __DRI2flushExtension *f;
+   const __DRI2asyncFlushExtension *async_flush;
    const __DRI2configQueryExtension *config;
    const __DRItexBufferExtension *texBuffer;
    const __DRI2throttleExtension *throttle;
@@ -96,6 +97,10 @@ struct dri2_screen {
    int fd;
 
    Bool show_fps;
+   Bool no_async_flush;
+
+   Display *second_dpy; /* for thread offloading */
+   pthread_mutex_t second_dpy_mutex;
 };
 
 struct dri2_context
@@ -143,6 +148,11 @@ dri2_destroy_context(struct glx_context *context)
    struct dri2_context *pcp = (struct dri2_context *) context;
    struct dri2_screen *psc = (struct dri2_screen *) context->psc;
 
+   /* Synchronize the flushing before destroying the drawables. */
+   if (psc->async_flush) {
+      psc->async_flush->sync(pcp->driContext);
+   }
+
    driReleaseDrawables(&pcp->base);
 
    free((char *) context->extensions);
@@ -509,6 +519,15 @@ dri2WaitForSBC(__GLXDRIdrawable *pdraw, int64_t target_sbc, int64_t *ust,
    return 1;
 }
 
+static __DRIcontext *
+dri2GetCurrentContext()
+{
+   struct glx_context *gc = __glXGetCurrentContext();
+   struct dri2_context *dri2Ctx = (struct dri2_context *)gc;
+
+   return dri2Ctx ? dri2Ctx->driContext : NULL;
+}
+
 /**
  * dri2Throttle - Request driver throttling
  *
@@ -522,10 +541,7 @@ dri2Throttle(struct dri2_screen *psc,
 	     enum __DRI2throttleReason reason)
 {
    if (psc->throttle) {
-      struct glx_context *gc = __glXGetCurrentContext();
-      struct dri2_context *dri2Ctx = (struct dri2_context *)gc;
-      __DRIcontext *ctx =
-	 (dri2Ctx) ? dri2Ctx->driContext : NULL;
+      __DRIcontext *ctx = dri2GetCurrentContext();
 
       psc->throttle->throttle(ctx, draw->driDrawable, reason);
    }
@@ -534,7 +550,7 @@ dri2Throttle(struct dri2_screen *psc,
 static void
 __dri2CopySubBuffer(__GLXDRIdrawable *pdraw, int x, int y,
 		    int width, int height,
-		    enum __DRI2throttleReason reason)
+		    enum __DRI2throttleReason reason, Bool flush)
 {
    struct dri2_drawable *priv = (struct dri2_drawable *) pdraw;
    struct dri2_screen *psc = (struct dri2_screen *) pdraw->psc;
@@ -550,10 +566,24 @@ __dri2CopySubBuffer(__GLXDRIdrawable *pdraw, int x, int y,
    xrect.width = width;
    xrect.height = height;
 
-   if (psc->f)
-      (*psc->f->flush) (priv->driDrawable);
+   if (psc->async_flush) {
+      unsigned flags = flush ? __DRI2_FLUSH_CONTEXT : 0;
+      __DRIcontext *ctx = dri2GetCurrentContext();
+
+      (*psc->async_flush->flush)(ctx, priv->driDrawable, flags, reason,
+                                 NULL, NULL);
+   }
+   else {
+      if (flush) {
+         glFlush();
+      }
+
+      if (psc->f) {
+         (*psc->f->flush) (priv->driDrawable);
+      }
 
-   dri2Throttle(psc, priv, reason);
+      dri2Throttle(psc, priv, reason);
+   }
 
    region = XFixesCreateRegion(psc->base.dpy, &xrect, 1);
    DRI2CopyRegion(psc->base.dpy, pdraw->xDrawable, region,
@@ -574,7 +604,7 @@ dri2CopySubBuffer(__GLXDRIdrawable *pdraw, int x, int y,
 		  int width, int height)
 {
    __dri2CopySubBuffer(pdraw, x, y, width, height,
-		       __DRI2_THROTTLE_COPYSUBBUFFER);
+		       __DRI2_THROTTLE_COPYSUBBUFFER, False);
 }
 
 
@@ -653,6 +683,27 @@ dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate)
    dri2_wait_gl(gc);
 }
 
+static Bool
+dri2OpenSecondDisplay(struct dri2_screen *psc)
+{
+   pthread_mutex_init(&psc->second_dpy_mutex, NULL);
+   psc->second_dpy = XOpenDisplay(DisplayString(psc->base.dpy));
+   return psc->second_dpy != NULL;
+}
+
+static void
+dri2CloseSecondDisplay(struct dri2_screen *psc)
+{
+   if (psc->second_dpy) {
+      pthread_mutex_lock(&psc->second_dpy_mutex);
+      if (psc->second_dpy) {
+         XCloseDisplay(psc->second_dpy);
+         psc->second_dpy = NULL;
+      }
+      pthread_mutex_unlock(&psc->second_dpy_mutex);
+      pthread_mutex_destroy(&psc->second_dpy_mutex);
+   }
+}
 
 static void
 dri2DestroyScreen(struct glx_screen *base)
@@ -661,6 +712,7 @@ dri2DestroyScreen(struct glx_screen *base)
 
    /* Free the direct rendering per screen data */
    (*psc->core->destroyScreen) (psc->driScreen);
+   dri2CloseSecondDisplay(psc);
    driDestroyConfigs(psc->driver_configs);
    close(psc->fd);
    free(psc);
@@ -730,6 +782,66 @@ static void show_fps(struct dri2_drawable *draw)
 }
 
 static int64_t
+dri2FinishSwapBuffers(Display *dpy,
+                      __GLXDRIdrawable *pdraw,
+                      int64_t target_msc,
+                      int64_t divisor,
+                      int64_t remainder,
+                      Bool async)
+{
+   xcb_dri2_swap_buffers_cookie_t swap_buffers_cookie;
+   xcb_dri2_swap_buffers_reply_t *swap_buffers_reply;
+   uint32_t target_msc_hi, target_msc_lo;
+   uint32_t divisor_hi, divisor_lo;
+   uint32_t remainder_hi, remainder_lo;
+   int64_t ret = 0;
+   xcb_connection_t *c = XGetXCBConnection(dpy);
+
+   split_counter(target_msc, &target_msc_hi, &target_msc_lo);
+   split_counter(divisor, &divisor_hi, &divisor_lo);
+   split_counter(remainder, &remainder_hi, &remainder_lo);
+
+   swap_buffers_cookie =
+      xcb_dri2_swap_buffers_unchecked(c, pdraw->xDrawable,
+                                      target_msc_hi, target_msc_lo,
+                                      divisor_hi, divisor_lo,
+                                      remainder_hi, remainder_lo);
+
+   if (!async) {
+      /* Immediately wait on the swapbuffers reply.  If we didn't, we'd have
+       * to do so some time before reusing a (non-pageflipped) backbuffer.
+       * Otherwise, the new rendering could get ahead of the X Server's
+       * dispatch of the swapbuffer and you'd display garbage.
+       *
+       * We use XSync() first to reap the invalidate events through the event
+       * filter, to ensure that the next drawing doesn't use an invalidated
+       * buffer.
+       */
+      XSync(dpy, False);
+   }
+
+   swap_buffers_reply =
+      xcb_dri2_swap_buffers_reply(c, swap_buffers_cookie, NULL);
+   ret = merge_counter(swap_buffers_reply->swap_hi,
+                       swap_buffers_reply->swap_lo);
+   free(swap_buffers_reply);
+   return ret;
+}
+
+static void
+dri2FinishSwapBuffersAsync(void *data)
+{
+   __GLXDRIdrawable *pdraw = (__GLXDRIdrawable*)data;
+   struct dri2_screen *psc = (struct dri2_screen*)pdraw->psc;
+
+   pthread_mutex_lock(&psc->second_dpy_mutex);
+   if (psc->second_dpy) {
+      dri2FinishSwapBuffers(psc->second_dpy, pdraw, 0, 0, 0, True);
+   }
+   pthread_mutex_unlock(&psc->second_dpy_mutex);
+}
+
+static int64_t
 dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
 		int64_t remainder, Bool flush)
 {
@@ -738,11 +850,7 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
     struct dri2_screen *psc = (struct dri2_screen *) priv->base.psc;
     struct dri2_display *pdp =
 	(struct dri2_display *)dpyPriv->dri2Display;
-    CARD64 ret = 0;
-
-    if (flush) {
-       glFlush();
-    }
+    int64_t ret = 0;
 
     /* Check we have the right attachments */
     if (!priv->have_back)
@@ -751,49 +859,50 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
     /* Old servers can't handle swapbuffers */
     if (!pdp->swapAvailable) {
        __dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height,
-			   __DRI2_THROTTLE_SWAPBUFFER);
+			   __DRI2_THROTTLE_SWAPBUFFER, flush);
     } else {
-       xcb_connection_t *c = XGetXCBConnection(pdraw->psc->dpy);
-       xcb_dri2_swap_buffers_cookie_t swap_buffers_cookie;
-       xcb_dri2_swap_buffers_reply_t *swap_buffers_reply;
-       uint32_t target_msc_hi, target_msc_lo;
-       uint32_t divisor_hi, divisor_lo;
-       uint32_t remainder_hi, remainder_lo;
-
-       if (psc->f) {
-          struct glx_context *gc = __glXGetCurrentContext();
-
-          if (gc) {
-             (*psc->f->flush)(priv->driDrawable);
+       if (psc->async_flush) {
+          unsigned flags = flush ? __DRI2_FLUSH_CONTEXT : 0;
+          __DRIcontext *ctx = dri2GetCurrentContext();
+
+          if (psc->no_async_flush ||
+              target_msc || divisor || remainder) {
+             /* We can't offload this call, because we have to return a value
+              * to SwapBuffersMscOML.
+              */
+             (*psc->async_flush->flush)(ctx, priv->driDrawable, flags,
+                                        __DRI2_THROTTLE_SWAPBUFFER,
+                                        NULL, NULL);
+             ret = dri2FinishSwapBuffers(pdraw->psc->dpy, pdraw,
+                                         target_msc, divisor, remainder,
+                                         False);
+          }
+          else {
+             /* Tell the driver it can safely offload this call. */
+             (*psc->async_flush->flush)(ctx, priv->driDrawable,
+                                        __DRI2_FLUSH_ASYNC | flags,
+                                        __DRI2_THROTTLE_SWAPBUFFER,
+                                        dri2FinishSwapBuffersAsync,
+                                        pdraw);
           }
        }
+       else {
+          if (flush) {
+             glFlush();
+          }
+
+          if (psc->f) {
+             struct glx_context *gc = __glXGetCurrentContext();
+
+             if (gc) {
+                (*psc->f->flush)(priv->driDrawable);
+             }
+          }
 
-       dri2Throttle(psc, priv, __DRI2_THROTTLE_SWAPBUFFER);
-
-       split_counter(target_msc, &target_msc_hi, &target_msc_lo);
-       split_counter(divisor, &divisor_hi, &divisor_lo);
-       split_counter(remainder, &remainder_hi, &remainder_lo);
-
-       swap_buffers_cookie =
-          xcb_dri2_swap_buffers_unchecked(c, pdraw->xDrawable,
-                                          target_msc_hi, target_msc_lo,
-                                          divisor_hi, divisor_lo,
-                                          remainder_hi, remainder_lo);
-       /* Immediately wait on the swapbuffers reply.  If we didn't, we'd have
-        * to do so some time before reusing a (non-pageflipped) backbuffer.
-        * Otherwise, the new rendering could get ahead of the X Server's
-        * dispatch of the swapbuffer and you'd display garbage.
-        *
-        * We use XSync() first to reap the invalidate events through the event
-        * filter, to ensure that the next drawing doesn't use an invalidated
-        * buffer.
-        */
-       XSync(pdraw->psc->dpy, False);
-       swap_buffers_reply =
-          xcb_dri2_swap_buffers_reply(c, swap_buffers_cookie, NULL);
-       ret = merge_counter(swap_buffers_reply->swap_hi,
-                           swap_buffers_reply->swap_lo);
-       free(swap_buffers_reply);
+          dri2Throttle(psc, priv, __DRI2_THROTTLE_SWAPBUFFER);
+          ret = dri2FinishSwapBuffers(pdraw->psc->dpy, pdraw,
+                                      target_msc, divisor, remainder, False);
+       }
     }
 
     if (psc->show_fps) {
@@ -946,6 +1055,9 @@ dri2_bind_tex_image(Display * dpy,
    if (pdraw != NULL) {
       psc = (struct dri2_screen *) base->psc;
 
+      /* We might be in a compositor. Turn off asynchronous flushing. */
+      psc->no_async_flush = True;
+
       if (!pdp->invalidateAvailable && psc->f &&
            psc->f->base.version >= 3 && psc->f->invalidate)
 	 psc->f->invalidate(pdraw->driDrawable);
@@ -1038,6 +1150,11 @@ dri2BindExtensions(struct dri2_screen *psc, const __DRIextension **extensions)
 	 /* internal driver extension, no GL extension exposed */
       }
 
+      if ((strcmp(extensions[i]->name, __DRI2_ASYNC_FLUSH) == 0)) {
+	 psc->async_flush = (__DRI2asyncFlushExtension *) extensions[i];
+	 /* internal driver extension, no GL extension exposed */
+      }
+
       if ((strcmp(extensions[i]->name, __DRI2_CONFIG_QUERY) == 0))
 	 psc->config = (__DRI2configQueryExtension *) extensions[i];
 
@@ -1084,8 +1201,16 @@ dri2CreateScreen(int screen, struct glx_display * priv)
       return NULL;
    }
 
+   if (!dri2OpenSecondDisplay(psc)) {
+      glx_screen_cleanup(&psc->base);
+      free(psc);
+      InfoMessageF("cannot create a second X connection\n");
+      return NULL;
+   }
+
    if (!DRI2Connect(priv->dpy, RootWindow(priv->dpy, screen),
 		    &driverName, &deviceName)) {
+      dri2CloseSecondDisplay(psc);
       glx_screen_cleanup(&psc->base);
       free(psc);
       InfoMessageF("screen %d does not appear to be DRI2 capable\n", screen);
@@ -1202,6 +1327,8 @@ dri2CreateScreen(int screen, struct glx_display * priv)
 
    tmp = getenv("LIBGL_SHOW_FPS");
    psc->show_fps = tmp && strcmp(tmp, "1") == 0;
+   tmp = getenv("LIBGL_NO_ASYNC");
+   psc->no_async_flush = tmp && strcmp(tmp, "1") == 0;
 
    return &psc->base;
 
@@ -1222,6 +1349,7 @@ handle_error:
 
    free(driverName);
    free(deviceName);
+   dri2CloseSecondDisplay(psc);
    glx_screen_cleanup(&psc->base);
    free(psc);
 
-- 
1.7.9.5



More information about the mesa-dev mailing list