[Mesa-dev] [PATCH] anv/query: Busy-wait for available query entries

Wed Apr 5 07:24:02 UTC 2017

On Tue, 2017-04-04 at 19:21 -0700, Jason Ekstrand wrote:
> Before, we were just looking at whether or not the user wanted us to
> wait and waiting on the BO.  This instead makes us busy-loop on each
> query until it's available.  This reduces some of the pipeline
> bubbles
> we were getting and improves performance of The Talos Principle on
> medium settings (where the GPU isn't overloaded) by around 20% on my
> SkyLake gt4.

Do you think busy looping is a good strategy in general? In scenarios
where the GPU is busier, isn't this expected to have a negative effect
on performance?

> ---
>  src/intel/vulkan/genX_query.c | 72
> +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 66 insertions(+), 6 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_query.c
> b/src/intel/vulkan/genX_query.c
> index 7ea9404..ebf99d2 100644
> --- a/src/intel/vulkan/genX_query.c
> +++ b/src/intel/vulkan/genX_query.c
> @@ -131,6 +131,64 @@ cpu_write_query_result(void *dst_slot,
> VkQueryResultFlags flags,
>     }
>  }
>  
> +#define NSEC_PER_SEC 1000000000
> +
> +static bool
> +query_is_available(struct anv_device *device, uint64_t *slot)
> +{
> +   if (!device->info.has_llc)
> +      __builtin_ia32_clflush(slot);
> +
> +   return slot[0];
> +}
> +
> +static VkResult
> +wait_for_available(struct anv_device *device,
> +                   struct anv_query_pool *pool, uint64_t *slot)
> +{
> +   while (true) {
> +      struct timespec start;
> +      clock_gettime(CLOCK_MONOTONIC, &start);
> +
> +      while (true) {
> +         if (!device->info.has_llc)
> +            __builtin_ia32_clflush(slot);
> +
> +         if (query_is_available(device, slot))
> +            return VK_SUCCESS;
> +
> +         struct timespec current;
> +         clock_gettime(CLOCK_MONOTONIC, &current);
> +
> +         if (current.tv_nsec < start.tv_nsec) {
> +            current.tv_nsec += NSEC_PER_SEC;
> +            current.tv_sec -= 1;
> +         }
> +
> +         /* If we've been looping for more than 1 ms, break out of
> the busy
> +          * loop and ask the kernel if the buffer is actually busy.
> +          */
> +         if (current.tv_sec > start.tv_sec ||

This should be && instead of ||, right?

> +             current.tv_nsec - start.tv_nsec > 1000000)
> +            break;
> +      }
> +
> +      VkResult result = anv_device_wait(device, &pool->bo, 0);
> +      switch (result) {
> +      case VK_SUCCESS:
> +         /* The BO is no longer busy.  If we haven't seen
> availability yet,
> +          * then we never will.
> +          */
> +         return query_is_available(device, slot) ? VK_SUCCESS :
> VK_NOT_READY;

By the comment above it sounds like returning VK_NOT_READY isn't what
we want here. If we don't expect the query to ever be available at this
point, returning VK_NOT_READY will only prompt the application to call
us again... shouldn't we return an error in this case?

> +      case VK_TIMEOUT:
> +         /* The BO is still busy, keep waiting. */
> +         continue;
> +      default:
> +         return result;
> +      }
> +   }
> +}
> +
>  VkResult genX(GetQueryPoolResults)(
>      VkDevice                                    _device,
>      VkQueryPool                                 queryPool,
> @@ -154,12 +212,6 @@ VkResult genX(GetQueryPoolResults)(
>     if (pData == NULL)
>        return VK_SUCCESS;
>  
> -   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
> -      VkResult result = anv_device_wait(device, &pool->bo,
> INT64_MAX);
> -      if (result != VK_SUCCESS)
> -         return result;
> -   }
> -
>     void *data_end = pData + dataSize;
>  
>     if (!device->info.has_llc) {
> @@ -176,6 +228,14 @@ VkResult genX(GetQueryPoolResults)(
>        /* Availability is always at the start of the slot */
>        bool available = slot[0];
>  
> +      if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
> +         status = wait_for_available(device, pool, slot);
> +         if (status != VK_SUCCESS)
> +            return status;
> +
> +         available = true;
> +      }
> +
>        /* From the Vulkan 1.0.42 spec:
>         *
>         *    "If VK_QUERY_RESULT_WAIT_BIT and
> VK_QUERY_RESULT_PARTIAL_BIT are