Mesa (main): anv: implement VK_KHR_synchronization2
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Oct 6 20:51:01 UTC 2021
Module: Mesa
Branch: main
Commit: b996fa8efaa42558bda2c52377561d7eb3c4127e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b996fa8efaa42558bda2c52377561d7eb3c4127e
Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date: Wed Nov 11 21:38:25 2020 +0200
anv: implement VK_KHR_synchronization2
v2: Use u_foreach_bit64() (Samuel)
v3: Add missing handling of VkMemoryBarrier2KHR in pNext of
VkSubpassDependency2KHR (Samuel)
v4: Remove unused ANV_PIPELINE_STAGE_PIPELINED_BITS (Ivan)
v5: fix missing anv_measure_submit() (Jason)
constify anv_pipeline_stage_pipelined_bits (Jason)
v6: Split flushes & invalidation emissions on
vkCmdSetEvent2KHR()/vkCmdWaitEvents2KHR() (Jason)
v7: Only apply flushes once on events (Jason)
v8: Drop split flushes for this patch
v9: Add comment about ignore some fields of VkMemoryBarrier2 in
VkSubpassDependency2KHR (Jason)
Drop spurious PIPE_CONTROL change s/,/;/ (Jason)
v10: Fix build issue on Android (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9045>
---
docs/relnotes/new_features.txt | 1 +
src/intel/vulkan/anv_android.c | 3 +-
src/intel/vulkan/anv_device.c | 8 +++
src/intel/vulkan/anv_pass.c | 22 +++++--
src/intel/vulkan/anv_private.h | 54 ++++++++--------
src/intel/vulkan/anv_queue.c | 35 ++++------
src/intel/vulkan/genX_cmd_buffer.c | 128 +++++++++++++++++++------------------
src/intel/vulkan/genX_query.c | 12 ++--
8 files changed, 139 insertions(+), 124 deletions(-)
diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index d7ed2865292..3f0d07e81af 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -15,3 +15,4 @@ VK_KHR_shader_float16_int8 on lavapipe
VK_KHR_shader_subgroup_extended_types on lavapipe
VK_KHR_spirv_1_4 on lavapipe
Experimental raytracing support on RADV
+VK_KHR_synchronization2 on Intel
diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index b3b102d4202..c87bfb9cefd 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -34,6 +34,7 @@
#include <sync/sync.h>
#include "anv_private.h"
+#include "vk_common_entrypoints.h"
#include "vk_util.h"
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
@@ -875,7 +876,7 @@ anv_QueueSignalReleaseImageANDROID(
if (waitSemaphoreCount == 0)
goto done;
- result = anv_QueueSubmit(queue, 1,
+ result = vk_common_QueueSubmit(queue, 1,
&(VkSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = 1,
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 8c334302bd1..08e9e05252a 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -230,6 +230,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_swapchain = true,
.KHR_swapchain_mutable_format = true,
#endif
+ .KHR_synchronization2 = true,
.KHR_timeline_semaphore = true,
.KHR_uniform_buffer_standard_layout = true,
.KHR_variable_pointers = true,
@@ -1689,6 +1690,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
+ VkPhysicalDeviceSynchronization2FeaturesKHR *features =
+ (VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
+ features->synchronization2 = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
index 634a3a3e24e..18c689ac864 100644
--- a/src/intel/vulkan/anv_pass.c
+++ b/src/intel/vulkan/anv_pass.c
@@ -31,22 +31,36 @@ anv_render_pass_add_subpass_dep(struct anv_device *device,
struct anv_render_pass *pass,
const VkSubpassDependency2KHR *dep)
{
+ /* From the Vulkan 1.2.195 spec:
+ *
+ * "If an instance of VkMemoryBarrier2 is included in the pNext chain,
+ * srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask
+ * parameters are ignored. The synchronization and access scopes instead
+ * are defined by the parameters of VkMemoryBarrier2."
+ */
+ const VkMemoryBarrier2KHR *barrier =
+ vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2_KHR);
+ VkAccessFlags2KHR src_access_mask =
+ barrier ? barrier->srcAccessMask : dep->srcAccessMask;
+ VkAccessFlags2KHR dst_access_mask =
+ barrier ? barrier->dstAccessMask : dep->dstAccessMask;
+
if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[pass->subpass_count] |=
- anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
+ anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
} else {
assert(dep->dstSubpass < pass->subpass_count);
pass->subpass_flushes[dep->dstSubpass] |=
- anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
+ anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
}
if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[0] |=
- anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
+ anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
} else {
assert(dep->srcSubpass < pass->subpass_count);
pass->subpass_flushes[dep->srcSubpass + 1] |=
- anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
+ anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
}
}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 09567490f30..8b8711d7d7b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -66,6 +66,7 @@
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "vk_device.h"
+#include "vk_enum_defines.h"
#include "vk_image.h"
#include "vk_instance.h"
#include "vk_physical_device.h"
@@ -2538,34 +2539,35 @@ enum anv_pipe_bits {
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
- VkAccessFlags flags)
+ VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
- u_foreach_bit(b, flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_SHADER_WRITE_BIT:
+ u_foreach_bit64(b, flags) {
+ switch ((VkAccessFlags2KHR)(1 << b)) {
+ case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
+ case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available
* to future operations, flush the hdc pipeline.
*/
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break;
- case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush
* the render target cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush
* the depth cache.
*/
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
- case VK_ACCESS_TRANSFER_WRITE_BIT:
+ case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like :
@@ -2582,13 +2584,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
- case VK_ACCESS_MEMORY_WRITE_BIT:
+ case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
- case VK_ACCESS_HOST_WRITE_BIT:
+ case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
/* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well.
@@ -2596,8 +2598,8 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
- case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
- case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+ case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+ case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS.
@@ -2614,13 +2616,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
- VkAccessFlags flags)
+ VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
- u_foreach_bit(b, flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ u_foreach_bit64(b, flags) {
+ switch ((VkAccessFlags2KHR)(1 << b)) {
+ case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
/* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
@@ -2642,15 +2644,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
- case VK_ACCESS_INDEX_READ_BIT:
- case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ case VK_ACCESS_2_INDEX_READ_BIT_KHR:
+ case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
/* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
- case VK_ACCESS_UNIFORM_READ_BIT:
+ case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data
@@ -2662,28 +2664,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
else
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break;
- case VK_ACCESS_SHADER_READ_BIT:
- case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
- case VK_ACCESS_TRANSFER_READ_BIT:
+ case VK_ACCESS_2_SHADER_READ_BIT_KHR:
+ case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
+ case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
- case VK_ACCESS_MEMORY_READ_BIT:
+ case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
*/
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
- case VK_ACCESS_MEMORY_WRITE_BIT:
+ case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
- case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
- case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
+ case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
+ case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command
@@ -2694,7 +2696,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
- case VK_ACCESS_HOST_READ_BIT:
+ case VK_ACCESS_2_HOST_READ_BIT_KHR:
/* We're transitioning a buffer that was written by CPU. Flush
* all the caches.
*/
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index ba170e38931..368dab8dd94 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -1206,10 +1206,10 @@ anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
return VK_SUCCESS;
}
-VkResult anv_QueueSubmit(
+VkResult anv_QueueSubmit2KHR(
VkQueue _queue,
uint32_t submitCount,
- const VkSubmitInfo* pSubmits,
+ const VkSubmitInfo2KHR* pSubmits,
VkFence _fence)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
@@ -1242,23 +1242,14 @@ VkResult anv_QueueSubmit(
mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
- const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
- vk_find_struct_const(pSubmits[i].pNext,
- TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
const VkPerformanceQuerySubmitInfoKHR *perf_info =
vk_find_struct_const(pSubmits[i].pNext,
PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
- const uint64_t *wait_values =
- timeline_info && timeline_info->waitSemaphoreValueCount ?
- timeline_info->pWaitSemaphoreValues : NULL;
- const uint64_t *signal_values =
- timeline_info && timeline_info->signalSemaphoreValueCount ?
- timeline_info->pSignalSemaphoreValues : NULL;
if (!anv_queue_submit_can_add_submit(submit,
- pSubmits[i].waitSemaphoreCount,
- pSubmits[i].signalSemaphoreCount,
+ pSubmits[i].waitSemaphoreInfoCount,
+ pSubmits[i].signalSemaphoreInfoCount,
perf_pass)) {
result = anv_queue_submit_post_and_alloc_new(queue, &submit);
if (result != VK_SUCCESS)
@@ -1266,19 +1257,19 @@ VkResult anv_QueueSubmit(
}
/* Wait semaphores */
- for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreCount; j++) {
+ for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_in_semaphore(submit,
device,
- pSubmits[i].pWaitSemaphores[j],
- wait_values ? wait_values[j] : 0);
+ pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
+ pSubmits[i].pWaitSemaphoreInfos[j].value);
if (result != VK_SUCCESS)
goto out;
}
/* Command buffers */
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
+ for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
- pSubmits[i].pCommandBuffers[j]);
+ pSubmits[i].pCommandBufferInfos[j].commandBuffer);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
assert(!anv_batch_has_error(&cmd_buffer->batch));
anv_measure_submit(cmd_buffer);
@@ -1298,11 +1289,11 @@ VkResult anv_QueueSubmit(
}
/* Signal semaphores */
- for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreCount; j++) {
+ for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_out_semaphore(submit,
device,
- pSubmits[i].pSignalSemaphores[j],
- signal_values ? signal_values[j] : 0);
+ pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
+ pSubmits[i].pSignalSemaphoreInfos[j].value);
if (result != VK_SUCCESS)
goto out;
}
@@ -1350,7 +1341,7 @@ out:
* anv_device_set_lost() would have been called already by a callee of
* anv_queue_submit().
*/
- result = anv_device_set_lost(device, "vkQueueSubmit() failed");
+ result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
}
return result;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index fe450bd5888..93b8fc99d91 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2403,43 +2403,37 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.pending_pipe_bits = bits;
}
-void genX(CmdPipelineBarrier)(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- VkBool32 byRegion,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
+static void
+cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
+ const VkDependencyInfoKHR *dep_info,
+ const char *reason)
{
- ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-
/* XXX: Right now, we're really dumb and just flush whatever categories
* the app asks for. One of these days we may make this a bit better
* but right now that's all the hardware allows for in most areas.
*/
- VkAccessFlags src_flags = 0;
- VkAccessFlags dst_flags = 0;
+ VkAccessFlags2KHR src_flags = 0;
+ VkAccessFlags2KHR dst_flags = 0;
- for (uint32_t i = 0; i < memoryBarrierCount; i++) {
- src_flags |= pMemoryBarriers[i].srcAccessMask;
- dst_flags |= pMemoryBarriers[i].dstAccessMask;
+ for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
+ src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
+ dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
}
- for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
- src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
- dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
+ for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
+ src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
+ dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
}
- for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
- src_flags |= pImageMemoryBarriers[i].srcAccessMask;
- dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
- ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
- const VkImageSubresourceRange *range =
- &pImageMemoryBarriers[i].subresourceRange;
+ for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
+ const VkImageMemoryBarrier2KHR *img_barrier =
+ &dep_info->pImageMemoryBarriers[i];
+
+ src_flags |= img_barrier->srcAccessMask;
+ dst_flags |= img_barrier->dstAccessMask;
+
+ ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
+ const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
uint32_t base_layer, layer_count;
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
@@ -2455,8 +2449,8 @@ void genX(CmdPipelineBarrier)(
if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
transition_depth_buffer(cmd_buffer, image,
base_layer, layer_count,
- pImageMemoryBarriers[i].oldLayout,
- pImageMemoryBarriers[i].newLayout,
+ img_barrier->oldLayout,
+ img_barrier->newLayout,
false /* will_full_fast_clear */);
}
@@ -2464,8 +2458,8 @@ void genX(CmdPipelineBarrier)(
transition_stencil_buffer(cmd_buffer, image,
range->baseMipLevel, level_count,
base_layer, layer_count,
- pImageMemoryBarriers[i].oldLayout,
- pImageMemoryBarriers[i].newLayout,
+ img_barrier->oldLayout,
+ img_barrier->newLayout,
false /* will_full_fast_clear */);
}
@@ -2476,19 +2470,29 @@ void genX(CmdPipelineBarrier)(
transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit,
range->baseMipLevel, level_count,
base_layer, layer_count,
- pImageMemoryBarriers[i].oldLayout,
- pImageMemoryBarriers[i].newLayout,
- pImageMemoryBarriers[i].srcQueueFamilyIndex,
- pImageMemoryBarriers[i].dstQueueFamilyIndex,
+ img_barrier->oldLayout,
+ img_barrier->newLayout,
+ img_barrier->srcQueueFamilyIndex,
+ img_barrier->dstQueueFamilyIndex,
false /* will_full_fast_clear */);
}
}
}
- anv_add_pending_pipe_bits(cmd_buffer,
- anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
- anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags),
- "pipe barrier");
+ enum anv_pipe_bits bits =
+ anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
+ anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
+
+ anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
+}
+
+void genX(CmdPipelineBarrier2KHR)(
+ VkCommandBuffer commandBuffer,
+ const VkDependencyInfoKHR* pDependencyInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier");
}
static void
@@ -6866,24 +6870,33 @@ void genX(CmdEndConditionalRenderingEXT)(
* by the command streamer for later execution.
*/
#define ANV_PIPELINE_STAGE_PIPELINED_BITS \
- ~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | \
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | \
- VK_PIPELINE_STAGE_HOST_BIT | \
- VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT)
+ ~(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | \
+ VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | \
+ VK_PIPELINE_STAGE_2_HOST_BIT_KHR | \
+ VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
-void genX(CmdSetEvent)(
+void genX(CmdSetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
- VkPipelineStageFlags stageMask)
+ const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
+ VkPipelineStageFlags2KHR src_stages = 0;
+
+ for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
+ src_stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
+ for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++)
+ src_stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask;
+ for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++)
+ src_stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask;
+
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+ if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
pc.StallAtPixelScoreboard = true;
pc.CommandStreamerStallEnable = true;
}
@@ -6899,10 +6912,10 @@ void genX(CmdSetEvent)(
}
}
-void genX(CmdResetEvent)(
+void genX(CmdResetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
- VkPipelineStageFlags stageMask)
+ VkPipelineStageFlags2KHR stageMask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
@@ -6927,22 +6940,15 @@ void genX(CmdResetEvent)(
}
}
-void genX(CmdWaitEvents)(
+void genX(CmdWaitEvents2KHR)(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
+ const VkDependencyInfoKHR* pDependencyInfos)
{
-#if GFX_VER >= 8
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+#if GFX_VER >= 8
for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
@@ -6960,11 +6966,7 @@ void genX(CmdWaitEvents)(
anv_finishme("Implement events on gfx7");
#endif
- genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
- false, /* byRegion */
- memoryBarrierCount, pMemoryBarriers,
- bufferMemoryBarrierCount, pBufferMemoryBarriers,
- imageMemoryBarrierCount, pImageMemoryBarriers);
+ cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
}
VkResult genX(CmdSetPerformanceOverrideINTEL)(
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 8978f5843a9..c45f4630990 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -1226,9 +1226,9 @@ void genX(CmdEndQueryIndexedEXT)(
#define TIMESTAMP 0x2358
-void genX(CmdWriteTimestamp)(
+void genX(CmdWriteTimestamp2KHR)(
VkCommandBuffer commandBuffer,
- VkPipelineStageFlagBits pipelineStage,
+ VkPipelineStageFlags2KHR stage,
VkQueryPool queryPool,
uint32_t query)
{
@@ -1241,13 +1241,10 @@ void genX(CmdWriteTimestamp)(
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch);
- switch (pipelineStage) {
- case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+ if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) {
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(TIMESTAMP));
- break;
-
- default:
+ } else {
/* Everything else is bottom-of-pipe */
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -1260,7 +1257,6 @@ void genX(CmdWriteTimestamp)(
if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4)
pc.CommandStreamerStallEnable = true;
}
- break;
}
emit_query_pc_availability(cmd_buffer, query_addr, true);
More information about the mesa-commit
mailing list