Mesa (staging/21.3): anv: fix multiple wait/signal on same binary semaphore

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Nov 12 19:30:12 UTC 2021


Module: Mesa
Branch: staging/21.3
Commit: 3b502385844d04f68f2e3d43ce7ece65fe319a70
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b502385844d04f68f2e3d43ce7ece65fe319a70

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Tue Nov  9 10:24:32 2021 +0200

anv: fix multiple wait/signal on same binary semaphore

We need to guarantee that when vkQueueSubmit() returns the application
can actually wait on a signaled semaphore/syncobj.

When using a thread to do the submission to i915, this gets a bit
tricky in the following case :

   A syncobj is used both as a wait & signal semaphore and has been
   signaled once already. It contains a fence before entering
   vkQueueSubmit().

   This means we need to reset the syncobj to ensure when we return
   from vkQueueSubmit(), the syncobj contains no stale fence.

   Currently in the Anv, the submission thread is in charge of putting
   the new fence in the syncobj and also picks up the wait fence
   directly from the syncobj. This means we can't reset the syncobj
   from vkQueueSubmit().

The solution to this has been pointed by Bas & Jason :

   In vkQueueSubmit(), clone the wait syncobj fence into a new
   temporary syncobj that will be destroy after submission and use
   this temporary syncobj as a wait fence for i915. This allows us to
   reset the original syncobj in vkQueueSubmit().

   For this to work with wait_before_signal behavior, we also need to
   do a wait-on-materialize on binary semaphores from vkQueueSubmit().
   Otherwise the application thread calling vkQueueSubmit() could race
   the submission thread and pick up the wrong fence when cloing.

v2: Use copy semantic for clone_syncobj_dma_fence() (Jason)
    Do the cloning prior to adding the syncobj to anv_queue_submit so
    that if the cloning fails don't have an invalid syncobj in
    anv_queue_submit (Jason)

v3: Fix another syncobj leak (Jason)

v4: Fix invalid argument order (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4945
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11474>
(cherry picked from commit d2ff2b9e4a22a83d59caf56052fdda1fd4493ca1)

---

 .pick_status.json            |   2 +-
 src/intel/vulkan/anv_queue.c | 148 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 130 insertions(+), 20 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index b66596c325a..3f0dad800d5 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -382,7 +382,7 @@
         "description": "anv: fix multiple wait/signal on same binary semaphore",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index d485aeeb51a..ed17729226d 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -829,26 +829,12 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
    return result;
 }
 
-/* Transfer ownership of temporary semaphores from the VkSemaphore object to
- * the anv_queue_submit object. Those temporary semaphores are then freed in
- * anv_queue_submit_free() once the driver is finished with them.
- */
 static VkResult
-maybe_transfer_temporary_semaphore(struct anv_queue *queue,
-                                   struct anv_queue_submit *submit,
-                                   struct anv_semaphore *semaphore,
-                                   struct anv_semaphore_impl **out_impl)
+add_temporary_semaphore(struct anv_queue *queue,
+                        struct anv_queue_submit *submit,
+                        struct anv_semaphore_impl *impl,
+                        struct anv_semaphore_impl **out_impl)
 {
-   struct anv_semaphore_impl *impl = &semaphore->temporary;
-
-   if (impl->type == ANV_SEMAPHORE_TYPE_NONE) {
-      *out_impl = &semaphore->permanent;
-      return VK_SUCCESS;
-   }
-
-   /* BO backed timeline semaphores cannot be temporary. */
-   assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
-
    /*
     * There is a requirement to reset semaphore to their permanent state after
     * submission. From the Vulkan 1.0.53 spec:
@@ -883,6 +869,109 @@ maybe_transfer_temporary_semaphore(struct anv_queue *queue,
    submit->temporary_semaphores[submit->temporary_semaphore_count++] = *impl;
    *out_impl = &submit->temporary_semaphores[submit->temporary_semaphore_count - 1];
 
+   return VK_SUCCESS;
+}
+
+static VkResult
+clone_syncobj_dma_fence(struct anv_queue *queue,
+                        struct anv_semaphore_impl *out,
+                        const struct anv_semaphore_impl *in)
+{
+   struct anv_device *device = queue->device;
+
+   out->syncobj = anv_gem_syncobj_create(device, 0);
+   if (!out->syncobj)
+      return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+   int fd = anv_gem_syncobj_export_sync_file(device, in->syncobj);
+   if (fd < 0) {
+      anv_gem_syncobj_destroy(device, out->syncobj);
+      return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   int ret = anv_gem_syncobj_import_sync_file(device,
+                                              out->syncobj,
+                                              fd);
+   close(fd);
+   if (ret < 0) {
+      anv_gem_syncobj_destroy(device, out->syncobj);
+      return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   return VK_SUCCESS;
+}
+
+/* Clone semaphore in the following cases :
+ *
+ *   - We're dealing with a temporary semaphore that needs to be reset to
+ *     follow the Vulkan spec requirements.
+ *
+ *   - We're dealing with a syncobj semaphore and are using threaded
+ *     submission to i915. Because we might want to export the semaphore right
+ *     after calling vkQueueSubmit, we need to make sure it doesn't contain a
+ *     staled DMA fence. In this case we reset the original syncobj, but make
+ *     a clone of the contained DMA fence into another syncobj for submission
+ *     to i915.
+ *
+ * Those temporary semaphores are later freed in anv_queue_submit_free().
+ */
+static VkResult
+maybe_transfer_temporary_semaphore(struct anv_queue *queue,
+                                   struct anv_queue_submit *submit,
+                                   struct anv_semaphore *semaphore,
+                                   struct anv_semaphore_impl **out_impl)
+{
+   struct anv_semaphore_impl *impl = &semaphore->temporary;
+   VkResult result;
+
+   if (impl->type == ANV_SEMAPHORE_TYPE_NONE) {
+      /* No temporary, use the permanent semaphore. */
+      impl = &semaphore->permanent;
+
+      /* We need to reset syncobj before submission so that they do not
+       * contain a stale DMA fence. When using a submission thread this is
+       * problematic because the i915 EXECBUF ioctl happens after
+       * vkQueueSubmit has returned. A subsequent vkQueueSubmit() call could
+       * reset the syncobj that i915 is about to see from the submission
+       * thread.
+       *
+       * To avoid this, clone the DMA fence in the semaphore, into a another
+       * syncobj that the submission thread will destroy when it's done with
+       * it.
+       */
+      if (queue->device->physical->has_thread_submit &&
+          impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
+         struct anv_semaphore_impl template = {
+            .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
+         };
+
+         /* Put the fence into a new syncobj so the old one can be reset. */
+         result = clone_syncobj_dma_fence(queue, &template, impl);
+         if (result != VK_SUCCESS)
+            return result;
+
+         /* Create a copy of the anv_semaphore structure. */
+         result = add_temporary_semaphore(queue, submit, &template, out_impl);
+         if (result != VK_SUCCESS) {
+            anv_gem_syncobj_destroy(queue->device, template.syncobj);
+            return result;
+         }
+
+         return VK_SUCCESS;
+      }
+
+      *out_impl = impl;
+      return VK_SUCCESS;
+   }
+
+   /* BO backed timeline semaphores cannot be temporary. */
+   assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
+
+   /* Copy anv_semaphore_impl into anv_queue_submit. */
+   result = add_temporary_semaphore(queue, submit, impl, out_impl);
+   if (result != VK_SUCCESS)
+      return result;
+
    /* Clear the incoming semaphore */
    impl->type = ANV_SEMAPHORE_TYPE_NONE;
 
@@ -896,9 +985,30 @@ anv_queue_submit_add_in_semaphore(struct anv_queue *queue,
                                   const uint64_t value)
 {
    ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
-   struct anv_semaphore_impl *impl;
+   struct anv_semaphore_impl *impl =
+      semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+      &semaphore->temporary : &semaphore->permanent;
    VkResult result;
 
+   /* When using a binary semaphore with threaded submission, wait for the
+    * dma-fence to materialize in the syncobj. This is needed to be able to
+    * clone in maybe_transfer_temporary_semaphore().
+    */
+   if (queue->device->has_thread_submit &&
+       impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
+      uint64_t value = 0;
+      int ret =
+         anv_gem_syncobj_timeline_wait(queue->device,
+                                       &impl->syncobj, &value, 1,
+                                       anv_get_absolute_timeout(INT64_MAX),
+                                       true /* wait_all */,
+                                       true /* wait_materialize */);
+      if (ret != 0) {
+         return anv_queue_set_lost(queue,
+                                   "unable to wait on syncobj to materialize");
+      }
+   }
+
    result = maybe_transfer_temporary_semaphore(queue, submit, semaphore, &impl);
    if (result != VK_SUCCESS)
       return result;



More information about the mesa-commit mailing list