Mesa (main): anv: implement VK_KHR_synchronization2

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Oct 6 20:51:01 UTC 2021


Module: Mesa
Branch: main
Commit: b996fa8efaa42558bda2c52377561d7eb3c4127e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b996fa8efaa42558bda2c52377561d7eb3c4127e

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Wed Nov 11 21:38:25 2020 +0200

anv: implement VK_KHR_synchronization2

v2: Use u_foreach_bit64() (Samuel)

v3: Add missing handling of VkMemoryBarrier2KHR in pNext of
    VkSubpassDependency2KHR (Samuel)

v4: Remove unused ANV_PIPELINE_STAGE_PIPELINED_BITS (Ivan)

v5: fix missing anv_measure_submit() (Jason)
    constify anv_pipeline_stage_pipelined_bits (Jason)

v6: Split flushes & invalidation emissions on
    vkCmdSetEvent2KHR()/vkCmdWaitEvents2KHR() (Jason)

v7: Only apply flushes once on events (Jason)

v8: Drop split flushes for this patch

v9: Add comment about ignore some fields of VkMemoryBarrier2 in
    VkSubpassDependency2KHR (Jason)
    Drop spurious PIPE_CONTROL change s/,/;/ (Jason)

v10: Fix build issue on Android (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9045>

---

 docs/relnotes/new_features.txt     |   1 +
 src/intel/vulkan/anv_android.c     |   3 +-
 src/intel/vulkan/anv_device.c      |   8 +++
 src/intel/vulkan/anv_pass.c        |  22 +++++--
 src/intel/vulkan/anv_private.h     |  54 ++++++++--------
 src/intel/vulkan/anv_queue.c       |  35 ++++------
 src/intel/vulkan/genX_cmd_buffer.c | 128 +++++++++++++++++++------------------
 src/intel/vulkan/genX_query.c      |  12 ++--
 8 files changed, 139 insertions(+), 124 deletions(-)

diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index d7ed2865292..3f0d07e81af 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -15,3 +15,4 @@ VK_KHR_shader_float16_int8 on lavapipe
 VK_KHR_shader_subgroup_extended_types on lavapipe
 VK_KHR_spirv_1_4 on lavapipe
 Experimental raytracing support on RADV
+VK_KHR_synchronization2 on Intel
diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index b3b102d4202..c87bfb9cefd 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -34,6 +34,7 @@
 #include <sync/sync.h>
 
 #include "anv_private.h"
+#include "vk_common_entrypoints.h"
 #include "vk_util.h"
 
 static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
@@ -875,7 +876,7 @@ anv_QueueSignalReleaseImageANDROID(
    if (waitSemaphoreCount == 0)
       goto done;
 
-   result = anv_QueueSubmit(queue, 1,
+   result = vk_common_QueueSubmit(queue, 1,
       &(VkSubmitInfo) {
             .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
             .waitSemaphoreCount = 1,
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 8c334302bd1..08e9e05252a 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -230,6 +230,7 @@ get_device_extensions(const struct anv_physical_device *device,
       .KHR_swapchain                         = true,
       .KHR_swapchain_mutable_format          = true,
 #endif
+      .KHR_synchronization2                  = true,
       .KHR_timeline_semaphore                = true,
       .KHR_uniform_buffer_standard_layout    = true,
       .KHR_variable_pointers                 = true,
@@ -1689,6 +1690,13 @@ void anv_GetPhysicalDeviceFeatures2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
+         VkPhysicalDeviceSynchronization2FeaturesKHR *features =
+            (VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
+         features->synchronization2 = true;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index 634a3a3e24e..18c689ac864 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -31,22 +31,36 @@ anv_render_pass_add_subpass_dep(struct anv_device *device,
                                 struct anv_render_pass *pass,
                                 const VkSubpassDependency2KHR *dep)
 {
+   /* From the Vulkan 1.2.195 spec:
+    *
+    *    "If an instance of VkMemoryBarrier2 is included in the pNext chain,
+    *    srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask
+    *    parameters are ignored. The synchronization and access scopes instead
+    *    are defined by the parameters of VkMemoryBarrier2."
+    */
+   const VkMemoryBarrier2KHR *barrier =
+      vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2_KHR);
+   VkAccessFlags2KHR src_access_mask =
+      barrier ? barrier->srcAccessMask : dep->srcAccessMask;
+   VkAccessFlags2KHR dst_access_mask =
+      barrier ? barrier->dstAccessMask : dep->dstAccessMask;
+
    if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) {
       pass->subpass_flushes[pass->subpass_count] |=
-         anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
+         anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
    } else {
       assert(dep->dstSubpass < pass->subpass_count);
       pass->subpass_flushes[dep->dstSubpass] |=
-         anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
+         anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
    }
 
    if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) {
       pass->subpass_flushes[0] |=
-         anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
+         anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
    } else {
       assert(dep->srcSubpass < pass->subpass_count);
       pass->subpass_flushes[dep->srcSubpass + 1] |=
-         anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
+         anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
    }
 }
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 09567490f30..8b8711d7d7b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -66,6 +66,7 @@
 #include "vk_alloc.h"
 #include "vk_debug_report.h"
 #include "vk_device.h"
+#include "vk_enum_defines.h"
 #include "vk_image.h"
 #include "vk_instance.h"
 #include "vk_physical_device.h"
@@ -2538,34 +2539,35 @@ enum anv_pipe_bits {
 
 static inline enum anv_pipe_bits
 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
-                                     VkAccessFlags flags)
+                                     VkAccessFlags2KHR flags)
 {
    enum anv_pipe_bits pipe_bits = 0;
 
-   u_foreach_bit(b, flags) {
-      switch ((VkAccessFlagBits)(1 << b)) {
-      case VK_ACCESS_SHADER_WRITE_BIT:
+   u_foreach_bit64(b, flags) {
+      switch ((VkAccessFlags2KHR)(1 << b)) {
+      case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
+      case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
          /* We're transitioning a buffer that was previously used as write
           * destination through the data port. To make its content available
           * to future operations, flush the hdc pipeline.
           */
          pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
          break;
-      case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+      case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
          /* We're transitioning a buffer that was previously used as render
           * target. To make its content available to future operations, flush
           * the render target cache.
           */
          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
          break;
-      case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+      case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
          /* We're transitioning a buffer that was previously used as depth
           * buffer. To make its content available to future operations, flush
           * the depth cache.
           */
          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
          break;
-      case VK_ACCESS_TRANSFER_WRITE_BIT:
+      case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
          /* We're transitioning a buffer that was previously used as a
           * transfer write destination. Generic write operations include color
           * & depth operations as well as buffer operations like :
@@ -2582,13 +2584,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
          break;
-      case VK_ACCESS_MEMORY_WRITE_BIT:
+      case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
          /* We're transitioning a buffer for generic write operations. Flush
           * all the caches.
           */
          pipe_bits |= ANV_PIPE_FLUSH_BITS;
          break;
-      case VK_ACCESS_HOST_WRITE_BIT:
+      case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
          /* We're transitioning a buffer for access by CPU. Invalidate
           * all the caches. Since data and tile caches don't have invalidate,
           * we are forced to flush those as well.
@@ -2596,8 +2598,8 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
          pipe_bits |= ANV_PIPE_FLUSH_BITS;
          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
          break;
-      case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
-      case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+      case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
          /* We're transitioning a buffer written either from VS stage or from
           * the command streamer (see CmdEndTransformFeedbackEXT), we just
           * need to stall the CS.
@@ -2614,13 +2616,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
 
 static inline enum anv_pipe_bits
 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
-                                          VkAccessFlags flags)
+                                          VkAccessFlags2KHR flags)
 {
    enum anv_pipe_bits pipe_bits = 0;
 
-   u_foreach_bit(b, flags) {
-      switch ((VkAccessFlagBits)(1 << b)) {
-      case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+   u_foreach_bit64(b, flags) {
+      switch ((VkAccessFlags2KHR)(1 << b)) {
+      case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
          /* Indirect draw commands take a buffer as input that we're going to
           * read from the command streamer to load some of the HW registers
           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
@@ -2642,15 +2644,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
           */
          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
          break;
-      case VK_ACCESS_INDEX_READ_BIT:
-      case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+      case VK_ACCESS_2_INDEX_READ_BIT_KHR:
+      case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
          /* We transitioning a buffer to be used for as input for vkCmdDraw*
           * commands, so we invalidate the VF cache to make sure there is no
           * stale data when we start rendering.
           */
          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
          break;
-      case VK_ACCESS_UNIFORM_READ_BIT:
+      case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
          /* We transitioning a buffer to be used as uniform data. Because
           * uniform is accessed through the data port & sampler, we need to
           * invalidate the texture cache (sampler) & constant cache (data
@@ -2662,28 +2664,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
          else
             pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
          break;
-      case VK_ACCESS_SHADER_READ_BIT:
-      case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
-      case VK_ACCESS_TRANSFER_READ_BIT:
+      case VK_ACCESS_2_SHADER_READ_BIT_KHR:
+      case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
+      case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
          /* Transitioning a buffer to be read through the sampler, so
           * invalidate the texture cache, we don't want any stale data.
           */
          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
          break;
-      case VK_ACCESS_MEMORY_READ_BIT:
+      case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
          /* Transitioning a buffer for generic read, invalidate all the
           * caches.
           */
          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
          break;
-      case VK_ACCESS_MEMORY_WRITE_BIT:
+      case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
          /* Generic write, make sure all previously written things land in
           * memory.
           */
          pipe_bits |= ANV_PIPE_FLUSH_BITS;
          break;
-      case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
-      case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
+      case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
+      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
          /* Transitioning a buffer for conditional rendering or transform
           * feedback. We'll load the content of this buffer into HW registers
           * using the command streamer, so we need to stall the command
@@ -2694,7 +2696,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
          break;
-      case VK_ACCESS_HOST_READ_BIT:
+      case VK_ACCESS_2_HOST_READ_BIT_KHR:
          /* We're transitioning a buffer that was written by CPU.  Flush
           * all the caches.
           */
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index ba170e38931..368dab8dd94 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -1206,10 +1206,10 @@ anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
    return VK_SUCCESS;
 }
 
-VkResult anv_QueueSubmit(
+VkResult anv_QueueSubmit2KHR(
     VkQueue                                     _queue,
     uint32_t                                    submitCount,
-    const VkSubmitInfo*                         pSubmits,
+    const VkSubmitInfo2KHR*                     pSubmits,
     VkFence                                     _fence)
 {
    ANV_FROM_HANDLE(anv_queue, queue, _queue);
@@ -1242,23 +1242,14 @@ VkResult anv_QueueSubmit(
          mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
          anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
 
-      const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
-         vk_find_struct_const(pSubmits[i].pNext,
-                              TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
       const VkPerformanceQuerySubmitInfoKHR *perf_info =
          vk_find_struct_const(pSubmits[i].pNext,
                               PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
       const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
-      const uint64_t *wait_values =
-         timeline_info && timeline_info->waitSemaphoreValueCount ?
-         timeline_info->pWaitSemaphoreValues : NULL;
-      const uint64_t *signal_values =
-         timeline_info && timeline_info->signalSemaphoreValueCount ?
-         timeline_info->pSignalSemaphoreValues : NULL;
 
       if (!anv_queue_submit_can_add_submit(submit,
-                                           pSubmits[i].waitSemaphoreCount,
-                                           pSubmits[i].signalSemaphoreCount,
+                                           pSubmits[i].waitSemaphoreInfoCount,
+                                           pSubmits[i].signalSemaphoreInfoCount,
                                            perf_pass)) {
          result = anv_queue_submit_post_and_alloc_new(queue, &submit);
          if (result != VK_SUCCESS)
@@ -1266,19 +1257,19 @@ VkResult anv_QueueSubmit(
       }
 
       /* Wait semaphores */
-      for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreCount; j++) {
+      for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
          result = anv_queue_submit_add_in_semaphore(submit,
                                                     device,
-                                                    pSubmits[i].pWaitSemaphores[j],
-                                                    wait_values ? wait_values[j] : 0);
+                                                    pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
+                                                    pSubmits[i].pWaitSemaphoreInfos[j].value);
          if (result != VK_SUCCESS)
             goto out;
       }
 
       /* Command buffers */
-      for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
+      for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
          ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
-                         pSubmits[i].pCommandBuffers[j]);
+                         pSubmits[i].pCommandBufferInfos[j].commandBuffer);
          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
          assert(!anv_batch_has_error(&cmd_buffer->batch));
          anv_measure_submit(cmd_buffer);
@@ -1298,11 +1289,11 @@ VkResult anv_QueueSubmit(
       }
 
       /* Signal semaphores */
-      for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreCount; j++) {
+      for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
          result = anv_queue_submit_add_out_semaphore(submit,
                                                      device,
-                                                     pSubmits[i].pSignalSemaphores[j],
-                                                     signal_values ? signal_values[j] : 0);
+                                                     pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
+                                                     pSubmits[i].pSignalSemaphoreInfos[j].value);
          if (result != VK_SUCCESS)
             goto out;
       }
@@ -1350,7 +1341,7 @@ out:
        * anv_device_set_lost() would have been called already by a callee of
        * anv_queue_submit().
        */
-      result = anv_device_set_lost(device, "vkQueueSubmit() failed");
+      result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
    }
 
    return result;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index fe450bd5888..93b8fc99d91 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2403,43 +2403,37 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
    cmd_buffer->state.pending_pipe_bits = bits;
 }
 
-void genX(CmdPipelineBarrier)(
-    VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlags                        srcStageMask,
-    VkPipelineStageFlags                        destStageMask,
-    VkBool32                                    byRegion,
-    uint32_t                                    memoryBarrierCount,
-    const VkMemoryBarrier*                      pMemoryBarriers,
-    uint32_t                                    bufferMemoryBarrierCount,
-    const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-    uint32_t                                    imageMemoryBarrierCount,
-    const VkImageMemoryBarrier*                 pImageMemoryBarriers)
+static void
+cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
+                   const VkDependencyInfoKHR *dep_info,
+                   const char *reason)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-
    /* XXX: Right now, we're really dumb and just flush whatever categories
     * the app asks for.  One of these days we may make this a bit better
     * but right now that's all the hardware allows for in most areas.
     */
-   VkAccessFlags src_flags = 0;
-   VkAccessFlags dst_flags = 0;
+   VkAccessFlags2KHR src_flags = 0;
+   VkAccessFlags2KHR dst_flags = 0;
 
-   for (uint32_t i = 0; i < memoryBarrierCount; i++) {
-      src_flags |= pMemoryBarriers[i].srcAccessMask;
-      dst_flags |= pMemoryBarriers[i].dstAccessMask;
+   for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
+      src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
+      dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
    }
 
-   for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
-      src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
-      dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
+   for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
+      src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
+      dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
    }
 
-   for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-      src_flags |= pImageMemoryBarriers[i].srcAccessMask;
-      dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
-      ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
-      const VkImageSubresourceRange *range =
-         &pImageMemoryBarriers[i].subresourceRange;
+   for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
+      const VkImageMemoryBarrier2KHR *img_barrier =
+         &dep_info->pImageMemoryBarriers[i];
+
+      src_flags |= img_barrier->srcAccessMask;
+      dst_flags |= img_barrier->dstAccessMask;
+
+      ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
+      const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
 
       uint32_t base_layer, layer_count;
       if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
@@ -2455,8 +2449,8 @@ void genX(CmdPipelineBarrier)(
       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
          transition_depth_buffer(cmd_buffer, image,
                                  base_layer, layer_count,
-                                 pImageMemoryBarriers[i].oldLayout,
-                                 pImageMemoryBarriers[i].newLayout,
+                                 img_barrier->oldLayout,
+                                 img_barrier->newLayout,
                                  false /* will_full_fast_clear */);
       }
 
@@ -2464,8 +2458,8 @@ void genX(CmdPipelineBarrier)(
          transition_stencil_buffer(cmd_buffer, image,
                                    range->baseMipLevel, level_count,
                                    base_layer, layer_count,
-                                   pImageMemoryBarriers[i].oldLayout,
-                                   pImageMemoryBarriers[i].newLayout,
+                                   img_barrier->oldLayout,
+                                   img_barrier->newLayout,
                                    false /* will_full_fast_clear */);
       }
 
@@ -2476,19 +2470,29 @@ void genX(CmdPipelineBarrier)(
             transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit,
                                     range->baseMipLevel, level_count,
                                     base_layer, layer_count,
-                                    pImageMemoryBarriers[i].oldLayout,
-                                    pImageMemoryBarriers[i].newLayout,
-                                    pImageMemoryBarriers[i].srcQueueFamilyIndex,
-                                    pImageMemoryBarriers[i].dstQueueFamilyIndex,
+                                    img_barrier->oldLayout,
+                                    img_barrier->newLayout,
+                                    img_barrier->srcQueueFamilyIndex,
+                                    img_barrier->dstQueueFamilyIndex,
                                     false /* will_full_fast_clear */);
          }
       }
    }
 
-   anv_add_pending_pipe_bits(cmd_buffer,
-                             anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
-                             anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags),
-                             "pipe barrier");
+   enum anv_pipe_bits bits =
+      anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
+      anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
+
+   anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
+}
+
+void genX(CmdPipelineBarrier2KHR)(
+    VkCommandBuffer                             commandBuffer,
+    const VkDependencyInfoKHR*                  pDependencyInfo)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier");
 }
 
 static void
@@ -6866,24 +6870,33 @@ void genX(CmdEndConditionalRenderingEXT)(
  * by the command streamer for later execution.
  */
 #define ANV_PIPELINE_STAGE_PIPELINED_BITS \
-   ~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | \
-     VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | \
-     VK_PIPELINE_STAGE_HOST_BIT | \
-     VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT)
+   ~(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | \
+     VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | \
+     VK_PIPELINE_STAGE_2_HOST_BIT_KHR | \
+     VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
 
-void genX(CmdSetEvent)(
+void genX(CmdSetEvent2KHR)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     _event,
-    VkPipelineStageFlags                        stageMask)
+    const VkDependencyInfoKHR*                  pDependencyInfo)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_event, event, _event);
 
+   VkPipelineStageFlags2KHR src_stages = 0;
+
+   for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
+      src_stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
+   for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++)
+      src_stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask;
+   for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++)
+      src_stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask;
+
    cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+      if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
          pc.StallAtPixelScoreboard = true;
          pc.CommandStreamerStallEnable = true;
       }
@@ -6899,10 +6912,10 @@ void genX(CmdSetEvent)(
    }
 }
 
-void genX(CmdResetEvent)(
+void genX(CmdResetEvent2KHR)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     _event,
-    VkPipelineStageFlags                        stageMask)
+    VkPipelineStageFlags2KHR                    stageMask)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_event, event, _event);
@@ -6927,22 +6940,15 @@ void genX(CmdResetEvent)(
    }
 }
 
-void genX(CmdWaitEvents)(
+void genX(CmdWaitEvents2KHR)(
     VkCommandBuffer                             commandBuffer,
     uint32_t                                    eventCount,
     const VkEvent*                              pEvents,
-    VkPipelineStageFlags                        srcStageMask,
-    VkPipelineStageFlags                        destStageMask,
-    uint32_t                                    memoryBarrierCount,
-    const VkMemoryBarrier*                      pMemoryBarriers,
-    uint32_t                                    bufferMemoryBarrierCount,
-    const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-    uint32_t                                    imageMemoryBarrierCount,
-    const VkImageMemoryBarrier*                 pImageMemoryBarriers)
+    const VkDependencyInfoKHR*                  pDependencyInfos)
 {
-#if GFX_VER >= 8
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
+#if GFX_VER >= 8
    for (uint32_t i = 0; i < eventCount; i++) {
       ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
 
@@ -6960,11 +6966,7 @@ void genX(CmdWaitEvents)(
    anv_finishme("Implement events on gfx7");
 #endif
 
-   genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
-                            false, /* byRegion */
-                            memoryBarrierCount, pMemoryBarriers,
-                            bufferMemoryBarrierCount, pBufferMemoryBarriers,
-                            imageMemoryBarrierCount, pImageMemoryBarriers);
+   cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
 }
 
 VkResult genX(CmdSetPerformanceOverrideINTEL)(
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 8978f5843a9..c45f4630990 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -1226,9 +1226,9 @@ void genX(CmdEndQueryIndexedEXT)(
 
 #define TIMESTAMP 0x2358
 
-void genX(CmdWriteTimestamp)(
+void genX(CmdWriteTimestamp2KHR)(
     VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlagBits                     pipelineStage,
+    VkPipelineStageFlags2KHR                    stage,
     VkQueryPool                                 queryPool,
     uint32_t                                    query)
 {
@@ -1241,13 +1241,10 @@ void genX(CmdWriteTimestamp)(
    struct mi_builder b;
    mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch);
 
-   switch (pipelineStage) {
-   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+   if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) {
       mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
                    mi_reg64(TIMESTAMP));
-      break;
-
-   default:
+   } else {
       /* Everything else is bottom-of-pipe */
       cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -1260,7 +1257,6 @@ void genX(CmdWriteTimestamp)(
          if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4)
             pc.CommandStreamerStallEnable = true;
       }
-      break;
    }
 
    emit_query_pc_availability(cmd_buffer, query_addr, true);



More information about the mesa-commit mailing list