Mesa (master): anv: Rework fences

Thu Nov 10 02:18:14 UTC 2016

Module: Mesa
Branch: master
Commit: 843775bab78a6b4d5cb4f02bd95d9d0e95c1c5e3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=843775bab78a6b4d5cb4f02bd95d9d0e95c1c5e3

Author: Jason Ekstrand <jason.ekstrand at intel.com>
Date:   Wed Nov  2 09:11:11 2016 -0700

anv: Rework fences

Our previous fence implementation was very simple.  Fences had two states:
signaled and unsignaled.  However, this didn't properly handle all of the
edge-cases that we need to handle.  In order to handle the case where the
client calls vkGetFenceStatus on a fence that has not yet been submitted
via vkQueueSubmit, we need a three-status system.  In order to handle the
case where the client calls vkWaitForFences on fences which have not yet
been submitted, we need more complex logic and a condition variable.  It's
rather annoying but, so long as the client doesn't do that, we should still
hit the fast path and use i915_gem_wait to do all our waiting.

Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
Cc: "13.0" <mesa-stable at lists.freedesktop.org>

---

 src/intel/vulkan/anv_device.c  | 159 ++++++++++++++++++++++++++++++++++-------
 src/intel/vulkan/anv_private.h |  15 +++-
 src/intel/vulkan/anv_wsi.c     |   2 +-
 3 files changed, 150 insertions(+), 26 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index e83887c..a9aa646 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -896,6 +896,12 @@ VkResult anv_CreateDevice(
 
    pthread_mutex_init(&device->mutex, NULL);
 
+   pthread_condattr_t condattr;
+   pthread_condattr_init(&condattr);
+   pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+   pthread_cond_init(&device->queue_submit, NULL);
+   pthread_condattr_destroy(&condattr);
+
    anv_bo_pool_init(&device->batch_bo_pool, device);
 
    anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
@@ -1141,6 +1147,11 @@ VkResult anv_QueueSubmit(
       result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
       if (result != VK_SUCCESS)
          goto out;
+
+      /* Update the fence and wake up any waiters */
+      assert(fence->state == ANV_FENCE_STATE_RESET);
+      fence->state = ANV_FENCE_STATE_SUBMITTED;
+      pthread_cond_broadcast(&device->queue_submit);
    }
 
 out:
@@ -1518,7 +1529,7 @@ VkResult anv_CreateFence(
    fence->execbuf.rsvd1 = device->context_id;
    fence->execbuf.rsvd2 = 0;
 
-   fence->ready = false;
+   fence->state = ANV_FENCE_STATE_RESET;
 
    *pFence = anv_fence_to_handle(fence);
 
@@ -1544,7 +1555,7 @@ VkResult anv_ResetFences(
 {
    for (uint32_t i = 0; i < fenceCount; i++) {
       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
-      fence->ready = false;
+      fence->state = ANV_FENCE_STATE_RESET;
    }
 
    return VK_SUCCESS;
@@ -1559,26 +1570,41 @@ VkResult anv_GetFenceStatus(
    int64_t t = 0;
    int ret;
 
-   if (fence->ready)
-      return VK_SUCCESS;
+   switch (fence->state) {
+   case ANV_FENCE_STATE_RESET:
+      /* If it hasn't even been sent off to the GPU yet, it's not ready */
+      return VK_NOT_READY;
 
-   ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
-   if (ret == 0) {
-      fence->ready = true;
+   case ANV_FENCE_STATE_SIGNALED:
+      /* It's been signaled, return success */
       return VK_SUCCESS;
-   }
 
-   return VK_NOT_READY;
+   case ANV_FENCE_STATE_SUBMITTED:
+      /* It's been submitted to the GPU but we don't know if it's done yet. */
+      ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+      if (ret == 0) {
+         fence->state = ANV_FENCE_STATE_SIGNALED;
+         return VK_SUCCESS;
+      } else {
+         return VK_NOT_READY;
+      }
+   default:
+      unreachable("Invalid fence status");
+   }
 }
 
+#define NSEC_PER_SEC 1000000000
+#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
+
 VkResult anv_WaitForFences(
     VkDevice                                    _device,
     uint32_t                                    fenceCount,
     const VkFence*                              pFences,
     VkBool32                                    waitAll,
-    uint64_t                                    timeout)
+    uint64_t                                    _timeout)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
+   int ret;
 
    /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
     * to block indefinitely timeouts <= 0.  Unfortunately, this was broken
@@ -1587,22 +1613,107 @@ VkResult anv_WaitForFences(
     * best we can do is to clamp the timeout to INT64_MAX.  This limits the
     * maximum timeout from 584 years to 292 years - likely not a big deal.
     */
-   if (timeout > INT64_MAX)
-      timeout = INT64_MAX;
-
-   int64_t t = timeout;
+   int64_t timeout = MIN2(_timeout, INT64_MAX);
+
+   uint32_t pending_fences = fenceCount;
+   while (pending_fences) {
+      pending_fences = 0;
+      bool signaled_fences = false;
+      for (uint32_t i = 0; i < fenceCount; i++) {
+         ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+         switch (fence->state) {
+         case ANV_FENCE_STATE_RESET:
+            /* This fence hasn't been submitted yet, we'll catch it the next
+             * time around.  Yes, this may mean we dead-loop but, short of
+             * lots of locking and a condition variable, there's not much that
+             * we can do about that.
+             */
+            pending_fences++;
+            continue;
+
+         case ANV_FENCE_STATE_SIGNALED:
+            /* This fence is not pending.  If waitAll isn't set, we can return
+             * early.  Otherwise, we have to keep going.
+             */
+            if (!waitAll)
+               return VK_SUCCESS;
+            continue;
+
+         case ANV_FENCE_STATE_SUBMITTED:
+            /* These are the fences we really care about.  Go ahead and wait
+             * on it until we hit a timeout.
+             */
+            ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
+            if (ret == -1 && errno == ETIME) {
+               return VK_TIMEOUT;
+            } else if (ret == -1) {
+               /* We don't know the real error. */
+               return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
+            } else {
+               fence->state = ANV_FENCE_STATE_SIGNALED;
+               signaled_fences = true;
+               if (!waitAll)
+                  return VK_SUCCESS;
+               continue;
+            }
+         }
+      }
 
-   /* FIXME: handle !waitAll */
+      if (pending_fences && !signaled_fences) {
+         /* If we've hit this then someone decided to vkWaitForFences before
+          * they've actually submitted any of them to a queue.  This is a
+          * fairly pessimal case, so it's ok to lock here and use a standard
+          * pthreads condition variable.
+          */
+         pthread_mutex_lock(&device->mutex);
+
+         /* It's possible that some of the fences have changed state since the
+          * last time we checked.  Now that we have the lock, check for
+          * pending fences again and don't wait if it's changed.
+          */
+         uint32_t now_pending_fences = 0;
+         for (uint32_t i = 0; i < fenceCount; i++) {
+            ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+            if (fence->state == ANV_FENCE_STATE_RESET)
+               now_pending_fences++;
+         }
+         assert(now_pending_fences <= pending_fences);
+
+         if (now_pending_fences == pending_fences) {
+            struct timespec before;
+            clock_gettime(CLOCK_MONOTONIC, &before);
+
+            uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
+            uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
+                               (timeout / NSEC_PER_SEC);
+            abs_nsec %= NSEC_PER_SEC;
+
+            /* Avoid roll-over in tv_sec on 32-bit systems if the user
+             * provided timeout is UINT64_MAX
+             */
+            struct timespec abstime;
+            abstime.tv_nsec = abs_nsec;
+            abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
+
+            ret = pthread_cond_timedwait(&device->queue_submit,
+                                         &device->mutex, &abstime);
+            assert(ret != EINVAL);
+
+            struct timespec after;
+            clock_gettime(CLOCK_MONOTONIC, &after);
+            uint64_t time_elapsed =
+               ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
+               ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
+
+            if (time_elapsed >= timeout) {
+               pthread_mutex_unlock(&device->mutex);
+               return VK_TIMEOUT;
+            }
+
+            timeout -= time_elapsed;
+         }
 
-   for (uint32_t i = 0; i < fenceCount; i++) {
-      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
-      int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
-      if (ret == -1 && errno == ETIME) {
-         return VK_TIMEOUT;
-      } else if (ret == -1) {
-         /* We don't know the real error. */
-         return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "gem wait failed: %m");
+         pthread_mutex_unlock(&device->mutex);
       }
    }
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 4ee6118..2cd487af 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -580,6 +580,7 @@ struct anv_device {
     uint32_t                                    default_mocs;
 
     pthread_mutex_t                             mutex;
+    pthread_cond_t                              queue_submit;
 };
 
 void anv_device_get_cache_uuid(void *uuid);
@@ -1254,11 +1255,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
 
+enum anv_fence_state {
+   /** Indicates that this is a new (or newly reset fence) */
+   ANV_FENCE_STATE_RESET,
+
+   /** Indicates that this fence has been submitted to the GPU but is still
+    * (as far as we know) in use by the GPU.
+    */
+   ANV_FENCE_STATE_SUBMITTED,
+
+   ANV_FENCE_STATE_SIGNALED,
+};
+
 struct anv_fence {
    struct anv_bo bo;
    struct drm_i915_gem_execbuffer2 execbuf;
    struct drm_i915_gem_exec_object2 exec2_objects[1];
-   bool ready;
+   enum anv_fence_state state;
 };
 
 struct anv_event {
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 61d5db0..b95e965 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -334,7 +334,7 @@ VkResult anv_AcquireNextImageKHR(
 
    /* Thanks to implicit sync, the image is ready immediately. */
    if (fence)
-      fence->ready = true;
+      fence->state = ANV_FENCE_STATE_SIGNALED;
 
    return result;
 }