[Mesa-dev] [PATCH 47/51] i965: Allow syncobjects to hook into the internal fence tracking

Tue Jan 10 21:24:10 UTC 2017

Since we use fences internally for tracking buffer busyness within
brw_batch.c, we can expose those directly for GL/DRI2 sync objects.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_batch.c |  87 ++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_batch.h |  22 ++++-
 src/mesa/drivers/dri/i965/brw_sync.c  | 167 +++++++++-------------------------
 3 files changed, 140 insertions(+), 136 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
index defa329e53..b257d000f8 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_batch.c
@@ -204,7 +204,7 @@ static void __brw_request_retire(struct brw_request * const rq)
       assert(RQ_BO(tmp)->exec == NULL);
       assert(RQ_RING(tmp) == ring);
 
-      list_for_each_entry_safe(struct __brw_fence, fence, &tmp->fences, link) {
+      list_for_each_entry_safe(struct brw_fence, fence, &tmp->fences, link) {
          int signal = brw_fence_get_signal(fence);
 
          assert(brw_fence_get_request(fence) == tmp);
@@ -755,7 +755,7 @@ static void __brw_batch_grow_exec(struct brw_batch *batch)
    if (new_exec != batch->exec) {
       struct list_head * const list = &batch->next_request->fences;
 
-      list_for_each_entry_rev(struct __brw_fence, fence, list, link) {
+      list_for_each_entry_rev(struct brw_fence, fence, list, link) {
          int signal = brw_fence_get_signal(fence);
          struct brw_bo *bo = NULL;
 
@@ -1321,7 +1321,7 @@ int brw_batch_flush(struct brw_batch *batch, struct perf_debug *perf)
           * At any rate, we have to decouple our fences so that we don't die
           * later on when trying to use them.
           */
-         list_for_each_entry_safe(struct __brw_fence, fence, &rq->fences, link) {
+         list_for_each_entry_safe(struct brw_fence, fence, &rq->fences, link) {
             int signal = brw_fence_get_signal(fence);
             list_inithead(&fence->link);
             fence->rq = NULL;
@@ -1360,7 +1360,7 @@ int brw_batch_flush(struct brw_batch *batch, struct perf_debug *perf)
    }
 
 skip:
-   list_for_each_entry_rev(struct __brw_fence, fence, &rq->fences, link) {
+   list_for_each_entry_rev(struct brw_fence, fence, &rq->fences, link) {
       int signal = brw_fence_get_signal(fence);
       struct brw_bo *bo = NULL;
 
@@ -1697,6 +1697,81 @@ struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch,
    return bo;
 }
 
+bool brw_batch_insert_fence(struct brw_batch *batch,
+                            struct brw_fence *fence,
+                            unsigned flags)
+{
+   struct brw_request *rq;
+
+   if (!batch->bo->dirty) {
+      rq = batch->requests[batch->ring].mru;
+      if (rq == NULL)
+         return false;
+
+      fence->seqno = rq->seqno;
+   } else {
+      batch->inside_begin_count++;
+      fence->seqno = __brw_batch_emit_seqno(batch, flags);
+      rq = batch->next_request;
+      batch->emit.nbatch = batch->tail - batch->map;
+      batch->inside_begin_count--;
+   }
+
+   fence->rq = FENCE_MARK_SIGNAL(rq, NO_SIGNAL);
+   list_addtail(&fence->link, &rq->fences);
+   return true;
+}
+
+bool
+brw_fence_busy(struct brw_fence *fence, struct perf_debug *perf)
+{
+   struct brw_request *rq = brw_fence_get_request(fence);
+   struct brw_batch *batch;
+
+   if (rq == NULL)
+      return false;
+
+   batch = RQ_BO(rq)->batch;
+
+   if (rq->seqno == 0)
+      return brw_batch_flush(batch, perf) == 0;
+
+   if (seqno_busy(fence->seqno, batch->seqno_map[CACHELINE_DWORDS*RQ_RING(rq)]))
+      return true;
+
+   list_del(&fence->link);
+   fence->rq = NULL;
+   return false;
+}
+
+int brw_fence_wait(struct brw_fence *fence,
+                   int64_t timeout,
+                   struct perf_debug *perf)
+{
+   struct brw_request *rq = brw_fence_get_request(fence);
+   int err;
+
+   err = 0;
+   if (seqno_busy(fence->seqno,
+                  RQ_BO(rq)->batch->seqno_map[CACHELINE_DWORDS*RQ_RING(rq)]))
+      err = __brw_request_wait(rq, timeout, perf);
+   if (err == 0) {
+      list_del(&fence->link);
+      fence->rq = NULL;
+   }
+
+   return err;
+}
+
+void brw_fence_finish(struct brw_fence *fence)
+{
+   if (fence->rq == NULL)
+      return;
+
+   list_del(&fence->link);
+   fence->rq = NULL;
+}
+
 /*
  * Provide a WC mmapping of the buffer. Coherent everywhere, but
  * reads are very slow (as they are uncached) unless streamed using movntdqa.
@@ -1822,7 +1897,7 @@ void *brw_bo_map(struct brw_bo *bo, unsigned flags, struct perf_debug *perf)
           bo->handle, flags));
 
    if ((flags & MAP_ASYNC) == 0) {
-      struct __brw_fence *fences;
+      struct brw_fence *fences;
       int nfence;
 
       if (flags & MAP_WRITE) {
@@ -2273,7 +2348,7 @@ __brw_batch_fini__requests(struct brw_batch *batch)
    }
 
    /* Incomplete batch, decouple buffers from the request */
-   list_for_each_entry_rev(struct __brw_fence, fence, &rq->fences, link) {
+   list_for_each_entry_rev(struct brw_fence, fence, &rq->fences, link) {
       int signal = brw_fence_get_signal(fence);
       struct brw_bo *bo = NULL;
 
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
index e6e2f801ad..264868f253 100644
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ b/src/mesa/drivers/dri/i965/brw_batch.h
@@ -65,7 +65,7 @@ enum brw_bo_domain { DOMAIN_NONE, DOMAIN_CPU, DOMAIN_GTT };
  * the GPU passes that point, the fence will be signalled. Or you can wait
  * for a fence to complete.
  */
-struct __brw_fence {
+struct brw_fence {
    struct brw_request *rq;
    struct list_head link;
    uint32_t seqno;
@@ -74,7 +74,7 @@ struct __brw_fence {
 typedef struct brw_bo {
    struct brw_batch *batch;
    struct drm_i915_gem_exec_object2 *exec;
-   struct __brw_fence read[__BRW_NUM_RINGS], write;
+   struct brw_fence read[__BRW_NUM_RINGS], write;
 
    unsigned active : __BRW_NUM_RINGS;
    unsigned dirty : 1;
@@ -317,12 +317,12 @@ void brw_bo_read(struct brw_bo *bo, uint64_t offset,
                  unsigned flags,
                  struct perf_debug *perf);
 
-static inline struct brw_request *brw_fence_get_request(struct __brw_fence *f)
+static inline struct brw_request *brw_fence_get_request(struct brw_fence *f)
 {
 	return (struct brw_request *)((uintptr_t)f->rq & ~3);
 }
 
-static inline int brw_fence_get_signal(struct __brw_fence *f)
+static inline int brw_fence_get_signal(struct brw_fence *f)
 {
 	return (uintptr_t)f->rq & 3;
 }
@@ -338,7 +338,7 @@ static inline bool brw_bo_busy(struct brw_bo *bo,
 #define BUSY_WRITE 1
 #define BUSY_FLUSH 2
 {
-   struct __brw_fence *fences;
+   struct brw_fence *fences;
    int nfence;
 
    if (!bo)
@@ -389,6 +389,18 @@ static inline void brw_bo_put(struct brw_bo *bo)
       __brw_bo_free(bo);
 }
 
+bool
+brw_batch_insert_fence(struct brw_batch *batch,
+                       struct brw_fence *fence,
+                       unsigned flags);
+
+bool brw_fence_busy(struct brw_fence *fence, struct perf_debug *perf);
+
+int brw_fence_wait(struct brw_fence *fence,
+                   int64_t timeout,
+                   struct perf_debug *perf);
+void brw_fence_finish(struct brw_fence *fence);
+
 /* Control batch command insertion and submission to hw */
 MUST_CHECK int __brw_batch_begin(struct brw_batch *batch,
                                  uint32_t estimated_bytes,
diff --git a/src/mesa/drivers/dri/i965/brw_sync.c b/src/mesa/drivers/dri/i965/brw_sync.c
index 988b1bc38b..03a32bec69 100644
--- a/src/mesa/drivers/dri/i965/brw_sync.c
+++ b/src/mesa/drivers/dri/i965/brw_sync.c
@@ -41,122 +41,35 @@
 #include "main/imports.h"
 
 #include "brw_context.h"
-
-struct brw_fence {
-   struct brw_context *brw;
-   /** The fence waits for completion of this batch. */
-   brw_bo *batch_bo;
-
-   mtx_t mutex;
-   bool signalled;
-};
+#include "brw_defines.h"
 
 struct brw_gl_sync {
    struct gl_sync_object gl;
    struct brw_fence fence;
 };
 
-static void
-brw_fence_init(struct brw_context *brw, struct brw_fence *fence)
-{
-   fence->brw = brw;
-   fence->batch_bo = NULL;
-   mtx_init(&fence->mutex, mtx_plain);
-}
-
-static void
-brw_fence_finish(struct brw_fence *fence)
-{
-   brw_bo_put(fence->batch_bo);
-
-   mtx_destroy(&fence->mutex);
-}
-
-static void
-brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
-{
-   assert(!fence->batch_bo);
-   assert(!fence->signalled);
-
-   brw_mi_flush(brw, brw->batch.ring);
-   fence->batch_bo = brw_bo_get(brw->batch.bo);
-   brw_batch_flush(&brw->batch, PERF_DEBUG(brw, "SyncFence"));
-}
-
-static bool
-brw_fence_has_completed_locked(struct brw_fence *fence)
-{
-   if (fence->signalled)
-      return true;
-
-   if (brw_bo_busy(fence->batch_bo, BUSY_WRITE | BUSY_FLUSH, NULL)) {
-      brw_bo_put(fence->batch_bo);
-      fence->batch_bo = NULL;
-      fence->signalled = true;
-      return true;
-   }
-
-   return false;
-}
-
-static bool
-brw_fence_has_completed(struct brw_fence *fence)
-{
-   bool ret;
-
-   mtx_lock(&fence->mutex);
-   ret = brw_fence_has_completed_locked(fence);
-   mtx_unlock(&fence->mutex);
-
-   return ret;
-}
-
-static bool
-brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
-                             uint64_t timeout)
-{
-   if (fence->signalled)
-      return true;
-
-   assert(fence->batch_bo);
-
-   /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
-    * immediately for timeouts <= 0.  The best we can do is to clamp the
-    * timeout to INT64_MAX.  This limits the maximum timeout from 584 years to
-    * 292 years - likely not a big deal.
-    */
-   if (timeout > INT64_MAX)
-      timeout = INT64_MAX;
-
-   if (drm_intel_gem_bo_wait(fence->batch_bo->base, timeout) != 0)
-      return false;
-
-   fence->signalled = true;
-   brw_bo_put(fence->batch_bo);
-   fence->batch_bo = NULL;
-
-   return true;
-}
-
 /**
  * Return true if the function successfully signals or has already signalled.
  * (This matches the behavior expected from __DRI2fence::client_wait_sync).
  */
 static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
-                      uint64_t timeout)
+brw_fence_client_wait(struct brw_fence *fence,
+                      uint64_t timeout,
+                      struct perf_debug *perf)
 {
-   bool ret;
-
-   mtx_lock(&fence->mutex);
-   ret = brw_fence_client_wait_locked(brw, fence, timeout);
-   mtx_unlock(&fence->mutex);
+   /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
+    * immediately for timeout == 0, and indefinitely if timeout is negative.
+    * The best we can do is to clamp the timeout to INT64_MAX.  This limits
+    * the maximum timeout from 584 years to 292 years - likely not a big deal.
+    */
+   if (timeout > INT64_MAX)
+      timeout = INT64_MAX;
 
-   return ret;
+   return brw_fence_wait(fence, timeout, perf) == 0;
 }
 
 static void
-brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
+brw_fence_server_wait(struct brw_fence *fence)
 {
    /* We have nothing to do for WaitSync.  Our GL command stream is sequential,
     * so given that the sync object has already flushed the batchbuffer, any
@@ -178,53 +91,55 @@ brw_gl_new_sync(struct gl_context *ctx, GLuint id)
 }
 
 static void
-brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
+brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *s)
 {
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
+   struct brw_gl_sync *sync = (struct brw_gl_sync *)s;
 
    brw_fence_finish(&sync->fence);
    free(sync);
 }
 
 static void
-brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
+brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
                   GLenum condition, GLbitfield flags)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
+   struct brw_gl_sync *sync = (struct brw_gl_sync *)s;
 
-   brw_fence_init(brw, &sync->fence);
-   brw_fence_insert(brw, &sync->fence);
+   s->StatusFlag =
+      !brw_batch_insert_fence(&brw->batch,
+                              &sync->fence,
+                              PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                              PIPE_CONTROL_DEPTH_CACHE_FLUSH);
 }
 
 static void
-brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
+brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
                         GLbitfield flags, GLuint64 timeout)
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
+   struct brw_gl_sync *sync = (struct brw_gl_sync *)s;
 
-   if (brw_fence_client_wait(brw, &sync->fence, timeout))
-      sync->gl.StatusFlag = 1;
+   s->StatusFlag =
+      brw_fence_client_wait(&sync->fence, timeout,
+                            PERF_DEBUG(brw_context(ctx), "ClientWaitSync"));
 }
 
 static void
-brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
+brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
                           GLbitfield flags, GLuint64 timeout)
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
+   struct brw_gl_sync *sync = (struct brw_gl_sync *)s;
 
-   brw_fence_server_wait(brw, &sync->fence);
+   brw_fence_server_wait(&sync->fence);
 }
 
 static void
-brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
+brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
 {
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
+   struct brw_gl_sync *sync = (struct brw_gl_sync *)s;
 
-   if (brw_fence_has_completed(&sync->fence))
-      sync->gl.StatusFlag = 1;
+   s->StatusFlag =
+      !brw_fence_busy(&sync->fence, PERF_DEBUG(brw_context(ctx), "CheckSync"));
 }
 
 void
@@ -248,8 +163,9 @@ brw_dri_create_fence(__DRIcontext *ctx)
    if (!fence)
       return NULL;
 
-   brw_fence_init(brw, fence);
-   brw_fence_insert(brw, fence);
+   brw_batch_insert_fence(&brw->batch, fence,
+                          PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                          PIPE_CONTROL_DEPTH_CACHE_FLUSH);
 
    return fence;
 }
@@ -264,12 +180,13 @@ brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
 }
 
 static GLboolean
-brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
+brw_dri_client_wait_sync(__DRIcontext *ctx, void *fence, unsigned flags,
                          uint64_t timeout)
 {
-   struct brw_fence *fence = _fence;
+   struct brw_context *brw = ctx->driverPrivate;
 
-   return brw_fence_client_wait(fence->brw, fence, timeout);
+   return brw_fence_client_wait(fence, timeout,
+                                PERF_DEBUG(brw, "DRI2ClientFenceWait"));
 }
 
 static void
@@ -283,7 +200,7 @@ brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
    if (!fence)
       return;
 
-   brw_fence_server_wait(fence->brw, fence);
+   brw_fence_server_wait(fence);
 }
 
 const __DRI2fenceExtension intelFenceExtension = {
-- 
2.11.0