Mesa (master): i965/sync: Implement DRI2_Fence extension

Thu May 7 15:13:12 UTC 2015

Module: Mesa
Branch: master
Commit: c636284ee8ee95bb3f3ad31aaf26a9512ec5006c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c636284ee8ee95bb3f3ad31aaf26a9512ec5006c

Author: Chad Versace <chad.versace at intel.com>
Date:   Tue May  5 19:05:32 2015 -0700

i965/sync: Implement DRI2_Fence extension

This enables EGL_KHR_fence_sync and EGL_KHR_wait_sync.

Below is the difference in piglit results, before and after this patch.
No regressions and several tests improve from 'skip' to 'pass'. Out of
EGL_KHR_fence_sync tests, two of the multithreaded tests skip; all other
tests pass.

  cmdline: piglit run -p gbm -t sync tests/quick.py
  mesa: master at 1ac7db0
  piglit: 4069bec
  hw: Ivybridge

        | before after
  ------+-------------
   pass |     32    46
   fail |      0     0
  crash |      0     0
   skip |     35    21
  total |     67    67

v2:
  - Set fence->signalled = true in brw_fence_has_completed() too.

Reviewed-by: Daniel Stone <daniels at collabora.com>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 docs/relnotes/10.6.0.html                 |    4 +-
 src/mesa/drivers/dri/i965/intel_screen.c  |    2 +
 src/mesa/drivers/dri/i965/intel_screen.h  |    4 +
 src/mesa/drivers/dri/i965/intel_syncobj.c |  191 +++++++++++++++++++++++------
 4 files changed, 160 insertions(+), 41 deletions(-)

diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html
index fb1a928..b004203 100644
--- a/docs/relnotes/10.6.0.html
+++ b/docs/relnotes/10.6.0.html
@@ -60,8 +60,8 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_viewport_array, GL_AMD_vertex_shader_viewport_index on i965/gen6</li>
 <li>GL_EXT_draw_buffers2 on freedreno</li>
 <li>GL_OES_EGL_sync on all drivers</li>
-<li>EGL_KHR_fence_sync on freedreno, nv50, nvc0, r600, radeonsi</li>
-<li>EGL_KHR_wait_sync on freedreno, nv50, nvc0, r600, radeonsi</li>
+<li>EGL_KHR_fence_sync on i965, freedreno, nv50, nvc0, r600, radeonsi</li>
+<li>EGL_KHR_wait_sync on i965, freedreno, nv50, nvc0, r600, radeonsi</li>
 <li>EGL_KHR_cl_event2 on freedreno, nv50, nvc0, r600, radeonsi</li>
 <li>GL_AMD_performance_monitor on nvc0</li>
 </ul>
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 015eaf1..dda1638 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -909,6 +909,7 @@ static const __DRIrobustnessExtension dri2Robustness = {
 
 static const __DRIextension *intelScreenExtensions[] = {
     &intelTexBufferExtension.base,
+    &intelFenceExtension.base,
     &intelFlushExtension.base,
     &intelImageExtension.base,
     &intelRendererQueryExtension.base,
@@ -918,6 +919,7 @@ static const __DRIextension *intelScreenExtensions[] = {
 
 static const __DRIextension *intelRobustScreenExtensions[] = {
     &intelTexBufferExtension.base,
+    &intelFenceExtension.base,
     &intelFlushExtension.base,
     &intelImageExtension.base,
     &intelRendererQueryExtension.base,
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index f814ed0..e7a1490 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -30,6 +30,9 @@
 
 #include <stdbool.h>
 #include <sys/time.h>
+
+#include <GL/internal/dri_interface.h>
+
 #include "dri_util.h"
 #include "intel_bufmgr.h"
 #include "brw_device_info.h"
@@ -76,6 +79,7 @@ extern void intelDestroyContext(__DRIcontext * driContextPriv);
 extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
 
 PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
+extern const __DRI2fenceExtension intelFenceExtension;
 
 extern GLboolean
 intelMakeCurrent(__DRIcontext * driContextPriv,
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
index dea6dba..3cfa7e5 100644
--- a/src/mesa/drivers/dri/i965/intel_syncobj.c
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -25,11 +25,11 @@
  *
  */
 
-/** @file intel_syncobj.c
+/**
+ * \file
+ * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
  *
- * Support for ARB_sync
- *
- * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
  * reference on it.  We can then check for completion or wait for completion
  * using the normal buffer object mechanisms.  This does mean that if an
  * application is using many sync objects, it will emit small batchbuffers
@@ -44,13 +44,94 @@
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
 
+struct brw_fence {
+   /** The fence waits for completion of this batch. */
+   drm_intel_bo *batch_bo;
+
+   bool signalled;
+};
+
 struct intel_gl_sync_object {
    struct gl_sync_object Base;
-
-   /** Batch associated with this sync object */
-   drm_intel_bo *bo;
+   struct brw_fence fence;
 };
 
+static void
+brw_fence_finish(struct brw_fence *fence)
+{
+   if (fence->batch_bo)
+      drm_intel_bo_unreference(fence->batch_bo);
+}
+
+static void
+brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
+{
+   assert(!fence->batch_bo);
+   assert(!fence->signalled);
+
+   intel_batchbuffer_emit_mi_flush(brw);
+   fence->batch_bo = brw->batch.bo;
+   drm_intel_bo_reference(fence->batch_bo);
+   intel_batchbuffer_flush(brw);
+}
+
+static bool
+brw_fence_has_completed(struct brw_fence *fence)
+{
+   if (fence->signalled)
+      return true;
+
+   if (fence->batch_bo && !drm_intel_bo_busy(fence->batch_bo)) {
+      drm_intel_bo_unreference(fence->batch_bo);
+      fence->batch_bo = NULL;
+      fence->signalled = true;
+      return true;
+   }
+
+   return false;
+}
+
+/**
+ * Return true if the function successfully signals or has already signalled.
+ * (This matches the behavior expected from __DRI2fence::client_wait_sync).
+ */
+static bool
+brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
+                      uint64_t timeout)
+{
+   if (fence->signalled)
+      return true;
+
+   assert(fence->batch_bo);
+
+   /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
+    * immediately for timeouts <= 0.  The best we can do is to clamp the
+    * timeout to INT64_MAX.  This limits the maximum timeout from 584 years to
+    * 292 years - likely not a big deal.
+    */
+   if (timeout > INT64_MAX)
+      timeout = INT64_MAX;
+
+   if (drm_intel_gem_bo_wait(fence->batch_bo, timeout) != 0)
+      return false;
+
+   fence->signalled = true;
+   drm_intel_bo_unreference(fence->batch_bo);
+   fence->batch_bo = NULL;
+
+   return true;
+}
+
+static void
+brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
+{
+   /* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+    * so given that the sync object has already flushed the batchbuffer, any
+    * batchbuffers coming after this waitsync will naturally not occur until
+    * the previous one is done.
+    */
+}
+
 static struct gl_sync_object *
 intel_gl_new_sync_object(struct gl_context *ctx, GLuint id)
 {
@@ -68,9 +149,7 @@ intel_gl_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s)
 {
    struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
 
-   if (sync->bo)
-      drm_intel_bo_unreference(sync->bo);
-
+   brw_fence_finish(&sync->fence);
    free(sync);
 }
 
@@ -81,56 +160,37 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
    struct brw_context *brw = brw_context(ctx);
    struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
 
-   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
-   intel_batchbuffer_emit_mi_flush(brw);
-
-   sync->bo = brw->batch.bo;
-   drm_intel_bo_reference(sync->bo);
-
-   intel_batchbuffer_flush(brw);
+   brw_fence_insert(brw, &sync->fence);
 }
 
 static void
 intel_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
                           GLbitfield flags, GLuint64 timeout)
 {
+   struct brw_context *brw = brw_context(ctx);
    struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
 
-   /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
-    * immediately for timeouts <= 0.  The best we can do is to clamp the
-    * timeout to INT64_MAX.  This limits the maximum timeout from 584 years to
-    * 292 years - likely not a big deal.
-    */
-   if (timeout > INT64_MAX)
-      timeout = INT64_MAX;
-
-   if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) {
+   if (brw_fence_client_wait(brw, &sync->fence, timeout))
       s->StatusFlag = 1;
-      drm_intel_bo_unreference(sync->bo);
-      sync->bo = NULL;
-   }
 }
 
-/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
- * so given that the sync object has already flushed the batchbuffer,
- * any batchbuffers coming after this waitsync will naturally not occur until
- * the previous one is done.
- */
 static void
 intel_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
                           GLbitfield flags, GLuint64 timeout)
 {
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
+
+   brw_fence_server_wait(brw, &sync->fence);
 }
 
-static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
+static void
+intel_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
 {
    struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
 
-   if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
-      drm_intel_bo_unreference(sync->bo);
-      sync->bo = NULL;
+   if (brw_fence_has_completed(&sync->fence))
       s->StatusFlag = 1;
-   }
 }
 
 void
@@ -143,3 +203,56 @@ intel_init_syncobj_functions(struct dd_function_table *functions)
    functions->ClientWaitSync = intel_gl_client_wait_sync;
    functions->ServerWaitSync = intel_gl_server_wait_sync;
 }
+
+static void *
+intel_dri_create_fence(__DRIcontext *ctx)
+{
+   struct brw_context *brw = ctx->driverPrivate;
+   struct brw_fence *fence;
+
+   fence = calloc(1, sizeof(*fence));
+   if (!fence)
+      return NULL;
+
+   brw_fence_insert(brw, fence);
+
+   return fence;
+}
+
+static void
+intel_dri_destroy_fence(__DRIscreen *screen, void *driver_fence)
+{
+   struct brw_fence *fence = driver_fence;
+
+   brw_fence_finish(fence);
+   free(fence);
+}
+
+static GLboolean
+intel_dri_client_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags,
+                           uint64_t timeout)
+{
+   struct brw_context *brw = ctx->driverPrivate;
+   struct brw_fence *fence = driver_fence;
+
+   return brw_fence_client_wait(brw, fence, timeout);
+}
+
+static void
+intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags)
+{
+   struct brw_context *brw = ctx->driverPrivate;
+   struct brw_fence *fence = driver_fence;
+
+   brw_fence_server_wait(brw, fence);
+}
+
+const __DRI2fenceExtension intelFenceExtension = {
+   .base = { __DRI2_FENCE, 1 },
+
+   .create_fence = intel_dri_create_fence,
+   .destroy_fence = intel_dri_destroy_fence,
+   .client_wait_sync = intel_dri_client_wait_sync,
+   .server_wait_sync = intel_dri_server_wait_sync,
+   .get_fence_from_cl_event = NULL,
+};