Mesa (main): venus: add fence feedback
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Jun 16 19:06:12 UTC 2022
Module: Mesa
Branch: main
Commit: d7f2e6c8d033de19a1d473df4fb1a46c7d365159
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7f2e6c8d033de19a1d473df4fb1a46c7d365159
Author: Yiwei Zhang <zzyiwei at chromium.org>
Date: Wed May 25 07:13:13 2022 +0000
venus: add fence feedback
- intercept to record feedback cmds for:
- vkQueueSubmit
- add feedback code path for
- vkGetFenceStatus
- vkResetFences
- VN_PERF_NO_FENCE_FEEDBACK can disable fence feedback
Test: dEQP-VK.synchronization.basic.fence.*
Test: dEQP-VK.wsi.android.swapchain.render.basic*
Test: dEQP-VK.api.object_management.*
Test: dEQP-VK.api.external.fence.sync_fd.*
Signed-off-by: Yiwei Zhang <zzyiwei at chromium.org>
Reviewed-by: Ryan Neph <ryanneph at google.com>
Reviewed-by: Chad Versace <chadversary at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16731>
---
src/virtio/vulkan/vn_device.c | 4 +-
src/virtio/vulkan/vn_feedback.c | 106 +++++++++++++++++++
src/virtio/vulkan/vn_feedback.h | 11 ++
src/virtio/vulkan/vn_queue.c | 227 +++++++++++++++++++++++++++++++++++-----
src/virtio/vulkan/vn_queue.h | 6 ++
5 files changed, 324 insertions(+), 30 deletions(-)
diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c
index 5fc070a9ee4..ff17999d70e 100644
--- a/src/virtio/vulkan/vn_device.c
+++ b/src/virtio/vulkan/vn_device.c
@@ -310,7 +310,7 @@ vn_device_feedback_pool_init(struct vn_device *dev)
static const uint32_t pool_size = 4096;
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
- if (VN_PERF(NO_EVENT_FEEDBACK))
+ if (VN_PERF(NO_EVENT_FEEDBACK) && VN_PERF(NO_FENCE_FEEDBACK))
return VK_SUCCESS;
return vn_feedback_pool_init(dev, &dev->feedback_pool, pool_size, alloc);
@@ -319,7 +319,7 @@ vn_device_feedback_pool_init(struct vn_device *dev)
static inline void
vn_device_feedback_pool_fini(struct vn_device *dev)
{
- if (VN_PERF(NO_EVENT_FEEDBACK))
+ if (VN_PERF(NO_EVENT_FEEDBACK) && VN_PERF(NO_FENCE_FEEDBACK))
return;
vn_feedback_pool_fini(&dev->feedback_pool);
diff --git a/src/virtio/vulkan/vn_feedback.c b/src/virtio/vulkan/vn_feedback.c
index 31d653357de..7e2d1a65950 100644
--- a/src/virtio/vulkan/vn_feedback.c
+++ b/src/virtio/vulkan/vn_feedback.c
@@ -323,6 +323,112 @@ vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
&buf_barrier_after, 0, NULL);
}
+static VkResult
+vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
+ struct vn_feedback_slot *slot)
+
+{
+ STATIC_ASSERT(sizeof(*slot->status) == 4);
+
+ static const VkCommandBufferBeginInfo begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = NULL,
+ .flags = 0,
+ .pInheritanceInfo = NULL,
+ };
+ VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
+ if (result != VK_SUCCESS)
+ return result;
+
+ static const VkMemoryBarrier mem_barrier_before = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = NULL,
+ /* make pending writes available to stay close to fence signal op */
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ /* no need to make all memory visible for feedback update */
+ .dstAccessMask = 0,
+ };
+ const VkBufferMemoryBarrier buf_barrier_before = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = NULL,
+ /* slot memory has been made available via mem_barrier_before */
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slot->buffer,
+ .offset = slot->offset,
+ .size = 4,
+ };
+ vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
+ &mem_barrier_before, 1, &buf_barrier_before, 0,
+ NULL);
+ vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, VK_SUCCESS);
+
+ const VkBufferMemoryBarrier buf_barrier_after = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = NULL,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slot->buffer,
+ .offset = slot->offset,
+ .size = 4,
+ };
+ vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
+ &buf_barrier_after, 0, NULL);
+
+ return vn_EndCommandBuffer(cmd_handle);
+}
+
+VkResult
+vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
+ struct vn_feedback_cmd_pool *pool,
+ struct vn_feedback_slot *slot,
+ VkCommandBuffer *out_cmd_handle)
+{
+ const VkCommandBufferAllocateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .pNext = NULL,
+ .commandPool = pool->pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkCommandBuffer cmd_handle;
+ VkResult result;
+
+ simple_mtx_lock(&pool->mutex);
+ result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
+ if (result != VK_SUCCESS)
+ goto out_unlock;
+
+ result = vn_feedback_fence_cmd_record(cmd_handle, slot);
+ if (result != VK_SUCCESS) {
+ vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
+ goto out_unlock;
+ }
+
+ *out_cmd_handle = cmd_handle;
+
+out_unlock:
+ simple_mtx_unlock(&pool->mutex);
+
+ return result;
+}
+
+void
+vn_feedback_fence_cmd_free(VkDevice dev_handle,
+ struct vn_feedback_cmd_pool *pool,
+ VkCommandBuffer cmd_handle)
+{
+ simple_mtx_lock(&pool->mutex);
+ vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
+ simple_mtx_unlock(&pool->mutex);
+}
+
VkResult
vn_feedback_cmd_pools_init(struct vn_device *dev)
{
diff --git a/src/virtio/vulkan/vn_feedback.h b/src/virtio/vulkan/vn_feedback.h
index c391ed8d78e..8f7870c894b 100644
--- a/src/virtio/vulkan/vn_feedback.h
+++ b/src/virtio/vulkan/vn_feedback.h
@@ -113,6 +113,17 @@ vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
VkPipelineStageFlags stage_mask,
VkResult status);
+VkResult
+vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
+ struct vn_feedback_cmd_pool *pool,
+ struct vn_feedback_slot *slot,
+ VkCommandBuffer *out_cmd_handle);
+
+void
+vn_feedback_fence_cmd_free(VkDevice dev_handle,
+ struct vn_feedback_cmd_pool *pool,
+ VkCommandBuffer cmd_handle);
+
VkResult
vn_feedback_cmd_pools_init(struct vn_device *dev);
diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c
index e50d28b2eaf..7408ba50941 100644
--- a/src/virtio/vulkan/vn_queue.c
+++ b/src/virtio/vulkan/vn_queue.c
@@ -312,6 +312,38 @@ vn_queue_submission_cleanup(struct vn_queue_submission *submit)
vk_free(alloc, submit->temp.storage);
}
+static inline uint32_t
+vn_queue_family_array_index(struct vn_queue *queue)
+{
+ for (uint32_t i = 0; i < queue->device->queue_family_count; i++) {
+ if (queue->device->queue_families[i] == queue->family)
+ return i;
+ }
+ unreachable("invalid queue");
+}
+
+static VkResult
+vn_queue_submit(struct vn_instance *instance,
+ VkQueue queue_handle,
+ uint32_t batch_count,
+ const VkSubmitInfo *batches,
+ VkFence fence_handle,
+ bool sync_submit)
+{
+ /* skip no-op submit */
+ if (!batch_count && fence_handle == VK_NULL_HANDLE)
+ return VK_SUCCESS;
+
+ if (sync_submit) {
+ return vn_call_vkQueueSubmit(instance, queue_handle, batch_count,
+ batches, fence_handle);
+ }
+
+ vn_async_vkQueueSubmit(instance, queue_handle, batch_count, batches,
+ fence_handle);
+ return VK_SUCCESS;
+}
+
VkResult
vn_QueueSubmit(VkQueue _queue,
uint32_t submitCount,
@@ -322,15 +354,18 @@ vn_QueueSubmit(VkQueue _queue,
struct vn_queue *queue = vn_queue_from_handle(_queue);
struct vn_device *dev = queue->device;
struct vn_fence *fence = vn_fence_from_handle(_fence);
- const bool is_fence_external = fence && fence->is_external;
-
+ const bool external_fence = fence && fence->is_external;
+ const bool feedback_fence = fence && fence->feedback.slot;
struct vn_queue_submission submit;
- VkResult result = vn_queue_submission_prepare_submit(
- &submit, _queue, submitCount, pSubmits, _fence);
+ const struct vn_device_memory *wsi_mem = NULL;
+ bool sync_submit;
+ VkResult result;
+
+ result = vn_queue_submission_prepare_submit(&submit, _queue, submitCount,
+ pSubmits, _fence);
if (result != VK_SUCCESS)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- const struct vn_device_memory *wsi_mem = NULL;
if (submit.batch_count == 1) {
const struct wsi_memory_signal_submit_info *info = vk_find_struct_const(
submit.submit_batches[0].pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
@@ -340,22 +375,51 @@ vn_QueueSubmit(VkQueue _queue,
}
}
- /* TODO defer roundtrip for external fence until the next sync operation */
- if (!wsi_mem && !is_fence_external && !VN_PERF(NO_ASYNC_QUEUE_SUBMIT)) {
- vn_async_vkQueueSubmit(dev->instance, submit.queue, submit.batch_count,
- submit.submit_batches, submit.fence);
- vn_queue_submission_cleanup(&submit);
- return VK_SUCCESS;
- }
-
- result =
- vn_call_vkQueueSubmit(dev->instance, submit.queue, submit.batch_count,
- submit.submit_batches, submit.fence);
+ /* force synchronous submission if any of the below applies:
+ * - struct wsi_memory_signal_submit_info
+ * - fence is an external fence
+ * - NO_ASYNC_QUEUE_SUBMIT perf option enabled
+ */
+ sync_submit = wsi_mem || external_fence || VN_PERF(NO_ASYNC_QUEUE_SUBMIT);
+
+ /* if the original submission involves a feedback fence:
+ * - defer the feedback fence to another submit to avoid deep copy
+ * - defer the potential sync_submit to the feedback fence submission
+ */
+ result = vn_queue_submit(dev->instance, submit.queue, submit.batch_count,
+ submit.submit_batches,
+ feedback_fence ? VK_NULL_HANDLE : submit.fence,
+ !feedback_fence && sync_submit);
if (result != VK_SUCCESS) {
vn_queue_submission_cleanup(&submit);
return vn_error(dev->instance, result);
}
+ /* TODO intercept original submit batches to append the fence feedback cmd
+ * with a per-queue cached submission builder to avoid transient allocs.
+ *
+ * vn_queue_submission bits must be fixed for VkTimelineSemaphoreSubmitInfo
+ * before adding timeline semaphore feedback.
+ */
+ if (feedback_fence) {
+ const uint32_t feedback_cmd_index = vn_queue_family_array_index(queue);
+ const VkSubmitInfo info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = NULL,
+ .waitSemaphoreCount = 0,
+ .pWaitSemaphores = NULL,
+ .pWaitDstStageMask = NULL,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &fence->feedback.commands[feedback_cmd_index],
+ };
+ result = vn_queue_submit(dev->instance, submit.queue, 1, &info,
+ submit.fence, sync_submit);
+ if (result != VK_SUCCESS) {
+ vn_queue_submission_cleanup(&submit);
+ return vn_error(dev->instance, result);
+ }
+ }
+
if (wsi_mem) {
/* XXX this is always false and kills the performance */
if (dev->instance->renderer->info.has_implicit_fencing) {
@@ -463,6 +527,84 @@ vn_fence_signal_wsi(struct vn_device *dev, struct vn_fence *fence)
fence->payload = temp;
}
+static VkResult
+vn_fence_feedback_init(struct vn_device *dev,
+ struct vn_fence *fence,
+ bool signaled,
+ const VkAllocationCallbacks *alloc)
+{
+ VkDevice dev_handle = vn_device_to_handle(dev);
+ struct vn_feedback_slot *slot;
+ VkCommandBuffer *cmd_handles;
+ VkResult result;
+
+ /* Fence feedback implementation relies on vkWaitForFences to cover the gap
+ * between feedback slot signaling and the actual fence signal operation.
+ */
+ if (unlikely(!dev->instance->renderer->info.allow_vk_wait_syncs))
+ return VK_SUCCESS;
+
+ if (VN_PERF(NO_FENCE_FEEDBACK))
+ return VK_SUCCESS;
+
+ slot = vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_FENCE);
+ if (!slot)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ vn_feedback_set_status(slot, signaled ? VK_SUCCESS : VK_NOT_READY);
+
+ cmd_handles =
+ vk_zalloc(alloc, sizeof(*cmd_handles) * dev->queue_family_count,
+ VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!cmd_handles) {
+ vn_feedback_pool_free(&dev->feedback_pool, slot);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ for (uint32_t i = 0; i < dev->queue_family_count; i++) {
+ result = vn_feedback_fence_cmd_alloc(dev_handle, &dev->cmd_pools[i],
+ slot, &cmd_handles[i]);
+ if (result != VK_SUCCESS) {
+ for (uint32_t j = 0; j < i; j++) {
+ vn_feedback_fence_cmd_free(dev_handle, &dev->cmd_pools[j],
+ cmd_handles[j]);
+ }
+ break;
+ }
+ }
+
+ if (result != VK_SUCCESS) {
+ vk_free(alloc, cmd_handles);
+ vn_feedback_pool_free(&dev->feedback_pool, slot);
+ return result;
+ }
+
+ fence->feedback.slot = slot;
+ fence->feedback.commands = cmd_handles;
+
+ return VK_SUCCESS;
+}
+
+static void
+vn_fence_feedback_fini(struct vn_device *dev,
+ struct vn_fence *fence,
+ const VkAllocationCallbacks *alloc)
+{
+ VkDevice dev_handle = vn_device_to_handle(dev);
+
+ if (!fence->feedback.slot)
+ return;
+
+ for (uint32_t i = 0; i < dev->queue_family_count; i++) {
+ vn_feedback_fence_cmd_free(dev_handle, &dev->cmd_pools[i],
+ fence->feedback.commands[i]);
+ }
+
+ vn_feedback_pool_free(&dev->feedback_pool, fence->feedback.slot);
+
+ vk_free(alloc, fence->feedback.commands);
+}
+
VkResult
vn_CreateFence(VkDevice device,
const VkFenceCreateInfo *pCreateInfo,
@@ -472,6 +614,8 @@ vn_CreateFence(VkDevice device,
struct vn_device *dev = vn_device_from_handle(device);
const VkAllocationCallbacks *alloc =
pAllocator ? pAllocator : &dev->base.base.alloc;
+ const bool signaled = pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT;
+ VkResult result;
struct vn_fence *fence = vk_zalloc(alloc, sizeof(*fence), VN_DEFAULT_ALIGN,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -491,21 +635,27 @@ vn_CreateFence(VkDevice device,
fence->is_external = !!export_info->handleTypes;
}
- VkResult result = vn_fence_init_payloads(
- dev, fence, pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT, alloc);
- if (result != VK_SUCCESS) {
- vn_object_base_fini(&fence->base);
- vk_free(alloc, fence);
- return vn_error(dev->instance, result);
- }
+ result = vn_fence_init_payloads(dev, fence, signaled, alloc);
+ if (result != VK_SUCCESS)
+ goto out_object_base_fini;
- VkFence fence_handle = vn_fence_to_handle(fence);
- vn_async_vkCreateFence(dev->instance, device, pCreateInfo, NULL,
- &fence_handle);
+ result = vn_fence_feedback_init(dev, fence, signaled, alloc);
+ if (result != VK_SUCCESS)
+ goto out_payloads_fini;
- *pFence = fence_handle;
+ *pFence = vn_fence_to_handle(fence);
+ vn_async_vkCreateFence(dev->instance, device, pCreateInfo, NULL, pFence);
return VK_SUCCESS;
+
+out_payloads_fini:
+ vn_sync_payload_release(dev, &fence->permanent);
+ vn_sync_payload_release(dev, &fence->temporary);
+
+out_object_base_fini:
+ vn_object_base_fini(&fence->base);
+ vk_free(alloc, fence);
+ return vn_error(dev->instance, result);
}
void
@@ -523,6 +673,8 @@ vn_DestroyFence(VkDevice device,
vn_async_vkDestroyFence(dev->instance, device, _fence, NULL);
+ vn_fence_feedback_fini(dev, fence, alloc);
+
vn_sync_payload_release(dev, &fence->permanent);
vn_sync_payload_release(dev, &fence->temporary);
@@ -549,6 +701,9 @@ vn_ResetFences(VkDevice device, uint32_t fenceCount, const VkFence *pFences)
assert(perm->type == VN_SYNC_TYPE_DEVICE_ONLY);
fence->payload = perm;
+
+ if (fence->feedback.slot)
+ vn_feedback_reset_status(fence->feedback.slot);
}
return VK_SUCCESS;
@@ -564,7 +719,23 @@ vn_GetFenceStatus(VkDevice device, VkFence _fence)
VkResult result;
switch (payload->type) {
case VN_SYNC_TYPE_DEVICE_ONLY:
- result = vn_call_vkGetFenceStatus(dev->instance, device, _fence);
+ if (fence->feedback.slot) {
+ result = vn_feedback_get_status(fence->feedback.slot);
+ if (result == VK_SUCCESS) {
+ /* When fence feedback slot gets signaled, the real fence
+ * signal operation follows after but the signaling isr can be
+ * deferred or preempted. To avoid theoretical racing, we let
+ * the renderer wait for the fence. This also helps resolve
+ * synchronization validation errors, because the layer no
+ * longer sees any fence status checks and falsely believes the
+ * caller does not sync.
+ */
+ vn_async_vkWaitForFences(dev->instance, device, 1, &_fence,
+ VK_TRUE, UINT64_MAX);
+ }
+ } else {
+ result = vn_call_vkGetFenceStatus(dev->instance, device, _fence);
+ }
break;
case VN_SYNC_TYPE_WSI_SIGNALED:
result = VK_SUCCESS;
diff --git a/src/virtio/vulkan/vn_queue.h b/src/virtio/vulkan/vn_queue.h
index a66697b4f9c..594ca226e53 100644
--- a/src/virtio/vulkan/vn_queue.h
+++ b/src/virtio/vulkan/vn_queue.h
@@ -50,6 +50,12 @@ struct vn_fence {
struct vn_sync_payload permanent;
struct vn_sync_payload temporary;
+ struct {
+ /* non-NULL if VN_PERF_NO_FENCE_FEEDBACK is disabled */
+ struct vn_feedback_slot *slot;
+ VkCommandBuffer *commands;
+ } feedback;
+
bool is_external;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(vn_fence,
More information about the mesa-commit
mailing list