[Mesa-dev] [PATCH 3/3] anv: Rework fences

Thu Nov 3 17:59:48 UTC 2016

On Wed 02 Nov 2016, Jason Ekstrand wrote:
> Our previous fence implementation was very simple.  Fences had two states:
> signaled and unsignaled.  However, this didn't properly handle all of the
> edge-cases that we need to handle.  In order to handle the case where the
> client calls vkGetFenceStatus on a fence that has not yet been submitted
> via vkQueueSubmit, we need a three-status system.  In order to handle the
> case where the client calls vkWaitForFences on fences which have not yet
> been submitted, we need more complex logic and a condition variable.  It's
> rather annoying but, so long as the client doesn't do that, we should still
> hit the fast path and use i915_gem_wait to do all our waiting.
> 
> Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
> ---
>  src/intel/vulkan/anv_device.c  | 142 ++++++++++++++++++++++++++++++++++-------
>  src/intel/vulkan/anv_private.h |  16 ++++-
>  src/intel/vulkan/anv_wsi.c     |   2 +-
>  3 files changed, 134 insertions(+), 26 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index a650212..a56c9ad 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -895,6 +895,7 @@ VkResult anv_CreateDevice(
>        pCreateInfo->pEnabledFeatures->robustBufferAccess;
>  
>     pthread_mutex_init(&device->mutex, NULL);
> +   pthread_cond_init(&device->queue_submit, NULL);
>  
>     anv_bo_pool_init(&device->batch_bo_pool, device);
>  
> @@ -1116,6 +1117,11 @@ VkResult anv_QueueSubmit(
>        result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
>        if (result != VK_SUCCESS)
>           goto out;
> +
> +      /* Update the fence and wake up any waiters */
> +      assert(fence->state == ANV_FENCE_STATE_RESET);

I think this assertion is wrong, and I think valid Vulkan usage can hit it.

The patch, as-written, only transitions the fence status from
ANV_FENCE_STATE_SUBMITTED to ANV_FENCE_STATE_RESET if the user calls
vkWaitForFences. But I can find no text in the spec that requires the
user to call vkWaitForFences. From my reading of the spec, if the user
submits a fence and a batch of work to the queue, and later determines
that all work has completed without using vkWaitForFences on that fence,
then the fence still enters the signalled state, and therefore the user
is allowed to re-submit it. And that would hit the assertion.

Please correct me if I'm wrong. I'm no expert (yet) on the Vulkan spec's
synchronization sections.

> +      fence->state = ANV_FENCE_STATE_SUBMITTED;
> +      pthread_cond_broadcast(&device->queue_submit);
>     }
>  
>  out:
> @@ -1493,7 +1499,7 @@ VkResult anv_CreateFence(
>     fence->execbuf.rsvd1 = device->context_id;
>     fence->execbuf.rsvd2 = 0;
>  
> -   fence->ready = false;
> +   fence->state = ANV_FENCE_STATE_RESET;
>  
>     *pFence = anv_fence_to_handle(fence);
>  
> @@ -1519,7 +1525,7 @@ VkResult anv_ResetFences(
>  {
>     for (uint32_t i = 0; i < fenceCount; i++) {
>        ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> -      fence->ready = false;
> +      fence->state = ANV_FENCE_STATE_RESET;
>     }
>  
>     return VK_SUCCESS;
> @@ -1534,16 +1540,27 @@ VkResult anv_GetFenceStatus(
>     int64_t t = 0;
>     int ret;
>  
> -   if (fence->ready)
> -      return VK_SUCCESS;
> +   switch (fence->state) {
> +   case ANV_FENCE_STATE_RESET:
> +      /* If it hasn't even been sent off to the GPU yet, it's not ready */
> +      return VK_NOT_READY;
>  
> -   ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
> -   if (ret == 0) {
> -      fence->ready = true;
> +   case ANV_FENCE_STATE_SIGNALED:
> +      /* It's been signaled, return success */
>        return VK_SUCCESS;
> -   }
>  
> -   return VK_NOT_READY;
> +   case ANV_FENCE_STATE_SUBMITTED:
> +      /* It's been submitted to the GPU but we don't know if it's done yet. */
> +      ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
> +      if (ret == 0) {
> +         fence->state = ANV_FENCE_STATE_SIGNALED;
> +         return VK_SUCCESS;
> +      } else {
> +         return VK_NOT_READY;
> +      }
> +   default:
> +      unreachable("Invalid fence status");
> +   }
>  }
>  
>  VkResult anv_WaitForFences(
> @@ -1551,9 +1568,10 @@ VkResult anv_WaitForFences(
>      uint32_t                                    fenceCount,
>      const VkFence*                              pFences,
>      VkBool32                                    waitAll,
> -    uint64_t                                    timeout)
> +    uint64_t                                    _timeout)
>  {
>     ANV_FROM_HANDLE(anv_device, device, _device);
> +   int ret;
>  
>     /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
>      * to block indefinitely timeouts <= 0.  Unfortunately, this was broken
> @@ -1562,22 +1580,98 @@ VkResult anv_WaitForFences(
>      * best we can do is to clamp the timeout to INT64_MAX.  This limits the
>      * maximum timeout from 584 years to 292 years - likely not a big deal.
>      */
> -   if (timeout > INT64_MAX)
> -      timeout = INT64_MAX;
> -
> -   int64_t t = timeout;
> +   int64_t timeout = MIN2(_timeout, INT64_MAX);
> +
> +   uint32_t pending_fences = fenceCount;
> +   while (pending_fences) {
> +      pending_fences = 0;
> +      bool signaled_fences = false;
> +      for (uint32_t i = 0; i < fenceCount; i++) {
> +         ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> +         switch (fence->state) {
> +         case ANV_FENCE_STATE_RESET:
> +            /* This fence hasn't been submitted yet, we'll catch it the next
> +             * time around.  Yes, this may mean we dead-loop but, short of
> +             * lots of locking and a condition variable, there's not much that
> +             * we can do about that.
> +             */
> +            pending_fences++;
> +            continue;
> +
> +         case ANV_FENCE_STATE_SIGNALED:
> +            /* This fence is not pending.  If waitAll isn't set, we can return
> +             * early.  Otherwise, we have to keep going.
> +             */
> +            if (!waitAll)
> +               return VK_SUCCESS;
> +            continue;
> +
> +         case ANV_FENCE_STATE_SUBMITTED:
> +            /* These are the fences we really care about.  Go ahead and wait
> +             * on it until we hit a timeout.
> +             */
> +            ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
> +            if (ret == -1 && errno == ETIME) {
> +               return VK_TIMEOUT;
> +            } else if (ret == -1) {
> +               /* We don't know the real error. */
> +               return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
> +            } else {
> +               fence->state = ANV_FENCE_STATE_SIGNALED;
> +               signaled_fences = true;
> +               if (!waitAll)
> +                  return VK_SUCCESS;
> +               continue;
> +            }
> +         }
> +      }
>  
> -   /* FIXME: handle !waitAll */
> +      if (pending_fences && !signaled_fences) {
> +         /* If we've hit this then someone decided to vkWaitForFences before
> +          * they've actually submitted any of them to a queue.  This is a
> +          * fairly pessimal case, so it's ok to lock here and use a standard
> +          * pthreads condition variable.
> +          */
> +         pthread_mutex_lock(&device->mutex);
> +
> +         /* It's possible that some of the fences have changed state since the
> +          * last time we checked.  Now that we have the lock, check for
> +          * pending fences again and don't wait if it's changed.
> +          */
> +         uint32_t now_pending_fences = 0;
> +         for (uint32_t i = 0; i < fenceCount; i++) {
> +            ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> +            if (fence->state == ANV_FENCE_STATE_RESET)
> +               now_pending_fences++;
> +         }
> +         assert(now_pending_fences <= pending_fences);
> +
> +         bool timeout = false;
> +         if (now_pending_fences == pending_fences) {
> +            struct timeval before;
> +            gettimeofday(&before, NULL);
> +
> +            struct timespec ts = {
> +               .tv_nsec = timeout % 1000000000,
> +               .tv_sec = timeout / 1000000000,
> +            };
> +            pthread_cond_timedwait(&device->queue_submit, &device->mutex, &ts);
> +
> +            struct timeval after;
> +            gettimeofday(&after, NULL);
> +            uint64_t time_elapsed =
> +               ((uint64_t)after.tv_sec * 1000000000 + after.tv_usec * 1000) -
> +               ((uint64_t)before.tv_sec * 1000000000 + before.tv_usec * 1000);
> +
> +            if (time_elapsed > timeout) {
> +               pthread_mutex_unlock(&device->mutex);
> +               return VK_TIMEOUT;
> +            }
> +
> +            timeout -= time_elapsed;
> +         }
>  
> -   for (uint32_t i = 0; i < fenceCount; i++) {
> -      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> -      int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
> -      if (ret == -1 && errno == ETIME) {
> -         return VK_TIMEOUT;
> -      } else if (ret == -1) {
> -         /* We don't know the real error. */
> -         return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
> -                          "gem wait failed: %m");
> +         pthread_mutex_unlock(&device->mutex);
>        }
>     }
>  
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index fab956b..5b1af1c 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -575,6 +575,8 @@ struct anv_device {
>      uint32_t                                    default_mocs;
>  
>      pthread_mutex_t                             mutex;
> +
> +    pthread_cond_t                              queue_submit;
>  };

Why the extra spacing? Dense is better here.

>  void anv_device_get_cache_uuid(void *uuid);
> @@ -1264,11 +1266,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
>  
>  void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
>  
> +enum anv_fence_state {
> +   /** Indicates that this is a new (or newly reset fence) */
> +   ANV_FENCE_STATE_RESET,
> +
> +   /** Indicates that this fence has been submitted to the GPU but is still
> +    * (as far as we know) in use by the GPU.
> +    */
> +   ANV_FENCE_STATE_SUBMITTED,
> +
> +   ANV_FENCE_STATE_SIGNALED,
> +};
> +
>  struct anv_fence {
>     struct anv_bo bo;
>     struct drm_i915_gem_execbuffer2 execbuf;
>     struct drm_i915_gem_exec_object2 exec2_objects[1];
> -   bool ready;
> +   enum anv_fence_state state;
>  };
>  
>  struct anv_event {
> diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
> index 72f79f1..d729051 100644
> --- a/src/intel/vulkan/anv_wsi.c
> +++ b/src/intel/vulkan/anv_wsi.c
> @@ -333,7 +333,7 @@ VkResult anv_AcquireNextImageKHR(
>                                                     semaphore, pImageIndex);
>  
>     /* Thanks to implicit sync, the image is ready immediately. */
> -   fence->ready = true;
> +   fence->state = ANV_FENCE_STATE_SIGNALED;
>  
>     return result;
>  }
> -- 
> 2.5.0.400.gff86faf
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev