[Mesa-dev] [PATCH] anv/query: Busy-wait for available query entries
Iago Toral
itoral at igalia.com
Wed Apr 5 07:24:02 UTC 2017
On Tue, 2017-04-04 at 19:21 -0700, Jason Ekstrand wrote:
> Before, we were just looking at whether or not the user wanted us to
> wait and waiting on the BO. This instead makes us busy-loop on each
> query until it's available. This reduces some of the pipeline
> bubbles
> we were getting and improves performance of The Talos Principle on
> medium settings (where the GPU isn't overloaded) by around 20% on my
> SkyLake gt4.
Do you think busy looping is a good strategy in general? In scenarios
where the GPU is busier, isn't this expected to have a negative effect
on performance?
> ---
> src/intel/vulkan/genX_query.c | 72
> +++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 66 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/vulkan/genX_query.c
> b/src/intel/vulkan/genX_query.c
> index 7ea9404..ebf99d2 100644
> --- a/src/intel/vulkan/genX_query.c
> +++ b/src/intel/vulkan/genX_query.c
> @@ -131,6 +131,64 @@ cpu_write_query_result(void *dst_slot,
> VkQueryResultFlags flags,
> }
> }
>
> +#define NSEC_PER_SEC 1000000000
> +
> +static bool
> +query_is_available(struct anv_device *device, uint64_t *slot)
> +{
> + if (!device->info.has_llc)
> + __builtin_ia32_clflush(slot);
> +
> + return slot[0];
> +}
> +
> +static VkResult
> +wait_for_available(struct anv_device *device,
> + struct anv_query_pool *pool, uint64_t *slot)
> +{
> + while (true) {
> + struct timespec start;
> + clock_gettime(CLOCK_MONOTONIC, &start);
> +
> + while (true) {
> + if (!device->info.has_llc)
> + __builtin_ia32_clflush(slot);
> +
> + if (query_is_available(device, slot))
> + return VK_SUCCESS;
> +
> + struct timespec current;
> + clock_gettime(CLOCK_MONOTONIC, ¤t);
> +
> + if (current.tv_nsec < start.tv_nsec) {
> + current.tv_nsec += NSEC_PER_SEC;
> + current.tv_sec -= 1;
> + }
> +
> + /* If we've been looping for more than 1 ms, break out of
> the busy
> + * loop and ask the kernel if the buffer is actually busy.
> + */
> + if (current.tv_sec > start.tv_sec ||
This should be && instead of ||, right?
> + current.tv_nsec - start.tv_nsec > 1000000)
> + break;
> + }
> +
> + VkResult result = anv_device_wait(device, &pool->bo, 0);
> + switch (result) {
> + case VK_SUCCESS:
> + /* The BO is no longer busy. If we haven't seen
> availability yet,
> + * then we never will.
> + */
> + return query_is_available(device, slot) ? VK_SUCCESS :
> VK_NOT_READY;
By the comment above it sounds like returning VK_NOT_READY isn't what
we want here. If we don't expect the query to ever be available at this
point, returning VK_NOT_READY will only prompt the application to call
us again... shouldn't we return an error in this case?
> + case VK_TIMEOUT:
> + /* The BO is still busy, keep waiting. */
> + continue;
> + default:
> + return result;
> + }
> + }
> +}
> +
> VkResult genX(GetQueryPoolResults)(
> VkDevice _device,
> VkQueryPool queryPool,
> @@ -154,12 +212,6 @@ VkResult genX(GetQueryPoolResults)(
> if (pData == NULL)
> return VK_SUCCESS;
>
> - if (flags & VK_QUERY_RESULT_WAIT_BIT) {
> - VkResult result = anv_device_wait(device, &pool->bo,
> INT64_MAX);
> - if (result != VK_SUCCESS)
> - return result;
> - }
> -
> void *data_end = pData + dataSize;
>
> if (!device->info.has_llc) {
> @@ -176,6 +228,14 @@ VkResult genX(GetQueryPoolResults)(
> /* Availability is always at the start of the slot */
> bool available = slot[0];
>
> + if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
> + status = wait_for_available(device, pool, slot);
> + if (status != VK_SUCCESS)
> + return status;
> +
> + available = true;
> + }
> +
> /* From the Vulkan 1.0.42 spec:
> *
> * "If VK_QUERY_RESULT_WAIT_BIT and
> VK_QUERY_RESULT_PARTIAL_BIT are
More information about the mesa-dev
mailing list