[Mesa-dev] [PATCH v2 13/32] vulkan/wsi: Implement prime in a completely generic way

Jason Ekstrand jason at jlekstrand.net
Wed Nov 29 00:28:25 UTC 2017


---
 src/amd/vulkan/radv_wsi.c           | 137 +++------------
 src/intel/vulkan/anv_wsi.c          |  14 +-
 src/vulkan/wsi/wsi_common.c         | 341 +++++++++++++++++++++++++++++++++++-
 src/vulkan/wsi/wsi_common.h         |  54 +++++-
 src/vulkan/wsi/wsi_common_private.h |  16 ++
 src/vulkan/wsi/wsi_common_wayland.c |   6 +-
 src/vulkan/wsi/wsi_common_x11.c     |  87 +++++----
 7 files changed, 475 insertions(+), 180 deletions(-)

diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 247f7cc..589eb5c 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -40,6 +40,13 @@ radv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
 	return radv_lookup_entrypoint(pName);
 }
 
+static uint32_t
+anv_wsi_queue_get_family_index(VkQueue _queue)
+{
+	RADV_FROM_HANDLE(radv_queue, queue, _queue);
+	return queue->queue_family_index;
+}
+
 VkResult
 radv_init_wsi(struct radv_physical_device *physical_device)
 {
@@ -49,6 +56,9 @@ radv_init_wsi(struct radv_physical_device *physical_device)
 			radv_physical_device_to_handle(physical_device),
 			radv_wsi_proc_addr);
 
+	physical_device->wsi_device.queue_get_family_index =
+		anv_wsi_queue_get_family_index;
+
 #ifdef VK_USE_PLATFORM_XCB_KHR
 	result = wsi_x11_init_wsi(&physical_device->wsi_device, &physical_device->instance->alloc);
 	if (result != VK_SUCCESS)
@@ -151,8 +161,6 @@ static VkResult
 radv_wsi_image_create(VkDevice device_h,
 		      const VkSwapchainCreateInfoKHR *pCreateInfo,
 		      const VkAllocationCallbacks* pAllocator,
-		      bool needs_linear_copy,
-		      bool linear,
 		      struct wsi_image *wsi_image)
 {
 	VkResult result = VK_SUCCESS;
@@ -178,7 +186,7 @@ radv_wsi_image_create(VkDevice device_h,
 						   .arrayLayers = 1,
 						   .samples = 1,
 						   /* FIXME: Need a way to use X tiling to allow scanout */
-						   .tiling = linear ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+						   .tiling = VK_IMAGE_TILING_OPTIMAL,
 						   .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
 						   .flags = 0,
 					   },
@@ -203,7 +211,7 @@ radv_wsi_image_create(VkDevice device_h,
 	int memory_type_index = -1;
 	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
 		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-		if ((linear && !is_local) || (!linear && is_local)) {
+		if (is_local) {
 			memory_type_index = i;
 			break;
 		}
@@ -228,16 +236,10 @@ radv_wsi_image_create(VkDevice device_h,
 
 	radv_BindImageMemory(device_h, image_h, memory_h, 0);
 
-	/*
-	 * return the fd for the image in the no copy mode,
-	 * or the fd for the linear image if a copy is required.
-	 */
-	if (!needs_linear_copy || (needs_linear_copy && linear)) {
-		RADV_FROM_HANDLE(radv_device_memory, memory, memory_h);
-		if (!radv_get_memory_fd(device, memory, &fd))
-			goto fail_alloc_memory;
-		wsi_image->fd = fd;
-	}
+	RADV_FROM_HANDLE(radv_device_memory, memory, memory_h);
+	if (!radv_get_memory_fd(device, memory, &fd))
+		goto fail_alloc_memory;
+	wsi_image->fd = fd;
 
 	surface = &image->surface;
 
@@ -277,94 +279,6 @@ static const struct wsi_image_fns radv_wsi_image_fns = {
    .free_wsi_image = radv_wsi_image_free,
 };
 
-#define NUM_PRIME_POOLS RADV_QUEUE_TRANSFER
-static void
-radv_wsi_free_prime_command_buffers(struct radv_device *device,
-				    struct wsi_swapchain *swapchain)
-{
-	const int num_pools = NUM_PRIME_POOLS;
-	const int num_images = swapchain->image_count;
-	int i;
-	for (i = 0; i < num_pools; i++) {
-		radv_FreeCommandBuffers(radv_device_to_handle(device),
-				     swapchain->cmd_pools[i],
-				     swapchain->image_count,
-				     &swapchain->cmd_buffers[i * num_images]);
-
-		radv_DestroyCommandPool(radv_device_to_handle(device),
-				     swapchain->cmd_pools[i],
-				     &swapchain->alloc);
-	}
-}
-
-static VkResult
-radv_wsi_create_prime_command_buffers(struct radv_device *device,
-				      const VkAllocationCallbacks *alloc,
-				      struct wsi_swapchain *swapchain)
-{
-	const int num_pools = NUM_PRIME_POOLS;
-	const int num_images = swapchain->image_count;
-	int num_cmd_buffers = num_images * num_pools; //TODO bump to MAX_QUEUE_FAMILIES
-	VkResult result;
-	int i, j;
-
-	swapchain->cmd_buffers = vk_alloc(alloc, (sizeof(VkCommandBuffer) * num_cmd_buffers), 8,
-					  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-	if (!swapchain->cmd_buffers)
-		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-	memset(swapchain->cmd_buffers, 0, sizeof(VkCommandBuffer) * num_cmd_buffers);
-	memset(swapchain->cmd_pools, 0, sizeof(VkCommandPool) * num_pools);
-	for (i = 0; i < num_pools; i++) {
-		VkCommandPoolCreateInfo pool_create_info;
-
-		pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-		pool_create_info.pNext = NULL;
-		pool_create_info.flags = 0;
-		pool_create_info.queueFamilyIndex = i;
-
-		result = radv_CreateCommandPool(radv_device_to_handle(device),
-						&pool_create_info, alloc,
-						&swapchain->cmd_pools[i]);
-		if (result != VK_SUCCESS)
-			goto fail;
-
-		VkCommandBufferAllocateInfo cmd_buffer_info;
-		cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-		cmd_buffer_info.pNext = NULL;
-		cmd_buffer_info.commandPool = swapchain->cmd_pools[i];
-		cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-		cmd_buffer_info.commandBufferCount = num_images;
-
-		result = radv_AllocateCommandBuffers(radv_device_to_handle(device),
-						     &cmd_buffer_info,
-						     &swapchain->cmd_buffers[i * num_images]);
-		if (result != VK_SUCCESS)
-			goto fail;
-		for (j = 0; j < num_images; j++) {
-			VkImage image, linear_image;
-			int idx = (i * num_images) + j;
-
-			swapchain->get_image_and_linear(swapchain, j, &image, &linear_image);
-			VkCommandBufferBeginInfo begin_info = {0};
-
-			begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-
-			radv_BeginCommandBuffer(swapchain->cmd_buffers[idx], &begin_info);
-
-			radv_blit_to_prime_linear(radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx]),
-						  radv_image_from_handle(image),
-						  radv_image_from_handle(linear_image));
-
-			radv_EndCommandBuffer(swapchain->cmd_buffers[idx]);
-		}
-	}
-	return VK_SUCCESS;
-fail:
-	radv_wsi_free_prime_command_buffers(device, swapchain);
-	return result;
-}
-
 VkResult radv_CreateSwapchainKHR(
 	VkDevice                                     _device,
 	const VkSwapchainCreateInfoKHR*              pCreateInfo,
@@ -398,13 +312,6 @@ VkResult radv_CreateSwapchainKHR(
 	for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
 		swapchain->fences[i] = VK_NULL_HANDLE;
 
-	if (swapchain->needs_linear_copy) {
-		result = radv_wsi_create_prime_command_buffers(device, alloc,
-							       swapchain);
-		if (result != VK_SUCCESS)
-			return result;
-	}
-
 	*pSwapchain = wsi_swapchain_to_handle(swapchain);
 
 	return VK_SUCCESS;
@@ -432,9 +339,6 @@ void radv_DestroySwapchainKHR(
 			radv_DestroyFence(_device, swapchain->fences[i], pAllocator);
 	}
 
-	if (swapchain->needs_linear_copy)
-		radv_wsi_free_prime_command_buffers(device, swapchain);
-
 	swapchain->destroy(swapchain, alloc);
 }
 
@@ -519,11 +423,7 @@ VkResult radv_QueuePresentKHR(
 					 1, &swapchain->fences[0]);
 		}
 
-		if (swapchain->needs_linear_copy) {
-			int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i];
-			cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs;
-		} else
-			cs = queue->device->empty_cs[queue->queue_family_index];
+		cs = queue->device->empty_cs[queue->queue_family_index];
 		RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
 		struct radeon_winsys_fence *base_fence = fence->fence;
 		struct radeon_winsys_ctx *ctx = queue->hw_ctx;
@@ -539,6 +439,9 @@ VkResult radv_QueuePresentKHR(
 			region = &regions->pRegions[i];
 
 		item_result = swapchain->queue_present(swapchain,
+						  _queue,
+						  pPresentInfo->waitSemaphoreCount,
+						  pPresentInfo->pWaitSemaphores,
 						  pPresentInfo->pImageIndices[i],
 						  region);
 		/* TODO: What if one of them returns OUT_OF_DATE? */
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index f898a07..18ef61a 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -40,6 +40,12 @@ anv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
    return anv_lookup_entrypoint(&physical_device->info, pName);
 }
 
+static uint32_t
+anv_wsi_queue_get_family_index(VkQueue queue)
+{
+   return 0;
+}
+
 VkResult
 anv_init_wsi(struct anv_physical_device *physical_device)
 {
@@ -49,6 +55,9 @@ anv_init_wsi(struct anv_physical_device *physical_device)
                    anv_physical_device_to_handle(physical_device),
                    anv_wsi_proc_addr);
 
+   physical_device->wsi_device.queue_get_family_index =
+      anv_wsi_queue_get_family_index;
+
 #ifdef VK_USE_PLATFORM_XCB_KHR
    result = wsi_x11_init_wsi(&physical_device->wsi_device, &physical_device->instance->alloc);
    if (result != VK_SUCCESS)
@@ -182,8 +191,6 @@ static VkResult
 anv_wsi_image_create(VkDevice device_h,
                      const VkSwapchainCreateInfoKHR *pCreateInfo,
                      const VkAllocationCallbacks* pAllocator,
-                     bool different_gpu,
-                     bool linear,
                      struct wsi_image *wsi_image)
 {
    struct anv_device *device = anv_device_from_handle(device_h);
@@ -434,6 +441,9 @@ VkResult anv_QueuePresentKHR(
       anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
 
       item_result = swapchain->queue_present(swapchain,
+                                             _queue,
+                                             pPresentInfo->waitSemaphoreCount,
+                                             pPresentInfo->pWaitSemaphores,
                                              pPresentInfo->pImageIndices[i],
                                              region);
       /* TODO: What if one of them returns OUT_OF_DATE? */
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index bb35237..7cb5d89 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -22,6 +22,7 @@
  */
 
 #include "wsi_common_private.h"
+#include "util/macros.h"
 
 void
 wsi_device_init(struct wsi_device *wsi,
@@ -29,25 +30,363 @@ wsi_device_init(struct wsi_device *wsi,
                 WSI_FN_GetPhysicalDeviceProcAddr proc_addr)
 {
    memset(wsi, 0, sizeof(*wsi));
+
+#define WSI_GET_CB(func) \
+   PFN_vk##func func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
+   WSI_GET_CB(GetPhysicalDeviceMemoryProperties);
+   WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
+#undef WSI_GET_CB
+
+   GetPhysicalDeviceMemoryProperties(pdevice, &wsi->memory_props);
+   GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, NULL);
+
+#define WSI_GET_CB(func) \
+   wsi->func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
+   WSI_GET_CB(AllocateMemory);
+   WSI_GET_CB(AllocateCommandBuffers);
+   WSI_GET_CB(BindBufferMemory);
+   WSI_GET_CB(BindImageMemory);
+   WSI_GET_CB(BeginCommandBuffer);
+   WSI_GET_CB(CmdCopyImageToBuffer);
+   WSI_GET_CB(CreateBuffer);
+   WSI_GET_CB(CreateCommandPool);
+   WSI_GET_CB(CreateImage);
+   WSI_GET_CB(DestroyBuffer);
+   WSI_GET_CB(DestroyCommandPool);
+   WSI_GET_CB(DestroyImage);
+   WSI_GET_CB(EndCommandBuffer);
+   WSI_GET_CB(FreeMemory);
+   WSI_GET_CB(FreeCommandBuffers);
+   WSI_GET_CB(GetBufferMemoryRequirements);
+   WSI_GET_CB(GetImageMemoryRequirements);
+   WSI_GET_CB(GetMemoryFdKHR);
+   WSI_GET_CB(QueueSubmit);
+#undef WSI_GET_CB
 }
 
 VkResult
 wsi_swapchain_init(const struct wsi_device *wsi,
                    struct wsi_swapchain *chain,
                    VkDevice device,
-                   const VkSwapchainCreateInfoKHR* pCreateInfo,
+                   const VkSwapchainCreateInfoKHR *pCreateInfo,
                    const VkAllocationCallbacks *pAllocator)
 {
+   VkResult result;
+
    memset(chain, 0, sizeof(*chain));
 
    chain->wsi = wsi;
    chain->device = device;
    chain->alloc = *pAllocator;
 
+   chain->cmd_pools =
+      vk_zalloc(pAllocator, sizeof(VkCommandPool) * wsi->queue_family_count, 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!chain->cmd_pools)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   for (uint32_t i = 0; i < wsi->queue_family_count; i++) {
+      const VkCommandPoolCreateInfo cmd_pool_info = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+         .pNext = NULL,
+         .flags = 0,
+         .queueFamilyIndex = i,
+      };
+      result = wsi->CreateCommandPool(device, &cmd_pool_info, &chain->alloc,
+                                      &chain->cmd_pools[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
    return VK_SUCCESS;
+
+fail:
+   wsi_swapchain_finish(chain);
+   return result;
 }
 
 void
 wsi_swapchain_finish(struct wsi_swapchain *chain)
 {
+   for (uint32_t i = 0; i < chain->wsi->queue_family_count; i++) {
+      chain->wsi->DestroyCommandPool(chain->device, chain->cmd_pools[i],
+                                     &chain->alloc);
+   }
+}
+
+static uint32_t
+select_memory_type(const struct wsi_device *wsi,
+                   VkMemoryPropertyFlags props,
+                   uint32_t type_bits)
+{
+   for (uint32_t i = 0; i < wsi->memory_props.memoryTypeCount; i++) {
+       const VkMemoryType type = wsi->memory_props.memoryTypes[i];
+       if ((type_bits & (1 << i)) && (type.propertyFlags & props) == props)
+         return i;
+   }
+
+   unreachable("No memory type found");
+}
+
+static uint32_t
+vk_format_size(VkFormat format)
+{
+   switch (format) {
+   case VK_FORMAT_B8G8R8A8_UNORM:
+   case VK_FORMAT_B8G8R8A8_SRGB:
+      return 4;
+   default:
+      unreachable("Unknown WSI Format");
+   }
+}
+
+static inline uint32_t
+align_u32(uint32_t v, uint32_t a)
+{
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
+}
+
+#define WSI_PRIME_LINEAR_STRIDE_ALIGN 256
+
+VkResult
+wsi_create_prime_image(const struct wsi_swapchain *chain,
+                       const VkSwapchainCreateInfoKHR *pCreateInfo,
+                       struct wsi_image *image)
+{
+   const struct wsi_device *wsi = chain->wsi;
+   VkResult result;
+
+   memset(image, 0, sizeof(*image));
+
+   const uint32_t cpp = vk_format_size(pCreateInfo->imageFormat);
+   const uint32_t linear_stride = align_u32(pCreateInfo->imageExtent.width * cpp,
+                                            WSI_PRIME_LINEAR_STRIDE_ALIGN);
+
+   uint32_t linear_size = linear_stride * pCreateInfo->imageExtent.height;
+   linear_size = align_u32(linear_size, 4096);
+
+   const VkExternalMemoryBufferCreateInfoKHR prime_buffer_external_info = {
+      .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+      .pNext = NULL,
+      .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+   };
+   const VkBufferCreateInfo prime_buffer_info = {
+      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+      .pNext = &prime_buffer_external_info,
+      .size = linear_size,
+      .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+      .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+   };
+   result = wsi->CreateBuffer(chain->device, &prime_buffer_info,
+                              &chain->alloc, &image->prime.buffer);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkMemoryRequirements reqs;
+   wsi->GetBufferMemoryRequirements(chain->device, image->prime.buffer, &reqs);
+   assert(reqs.size <= linear_size);
+
+   const struct wsi_memory_allocate_info memory_wsi_info = {
+      .sType = VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA,
+      .pNext = NULL,
+      .implicit_sync = true,
+   };
+   const VkExportMemoryAllocateInfoKHR prime_memory_export_info = {
+      .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
+      .pNext = &memory_wsi_info,
+      .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+   };
+   const VkMemoryDedicatedAllocateInfoKHR prime_memory_dedicated_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
+      .pNext = &prime_memory_export_info,
+      .image = VK_NULL_HANDLE,
+      .buffer = image->prime.buffer,
+   };
+   const VkMemoryAllocateInfo prime_memory_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+      .pNext = &prime_memory_dedicated_info,
+      .allocationSize = linear_size,
+      .memoryTypeIndex = select_memory_type(wsi, 0, reqs.memoryTypeBits),
+   };
+   result = wsi->AllocateMemory(chain->device, &prime_memory_info,
+                                &chain->alloc, &image->prime.memory);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = wsi->BindBufferMemory(chain->device, image->prime.buffer,
+                                  image->prime.memory, 0);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   const VkImageCreateInfo image_info = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+      .pNext = NULL,
+      .flags = 0,
+      .imageType = VK_IMAGE_TYPE_2D,
+      .format = pCreateInfo->imageFormat,
+      .extent = {
+         .width = pCreateInfo->imageExtent.width,
+         .height = pCreateInfo->imageExtent.height,
+         .depth = 1,
+      },
+      .mipLevels = 1,
+      .arrayLayers = 1,
+      .samples = VK_SAMPLE_COUNT_1_BIT,
+      .tiling = VK_IMAGE_TILING_OPTIMAL,
+      .usage = pCreateInfo->imageUsage | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+      .sharingMode = pCreateInfo->imageSharingMode,
+      .queueFamilyIndexCount = pCreateInfo->queueFamilyIndexCount,
+      .pQueueFamilyIndices = pCreateInfo->pQueueFamilyIndices,
+      .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+   };
+   result = wsi->CreateImage(chain->device, &image_info,
+                             &chain->alloc, &image->image);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   wsi->GetImageMemoryRequirements(chain->device, image->image, &reqs);
+
+   const VkMemoryDedicatedAllocateInfoKHR memory_dedicated_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
+      .pNext = NULL,
+      .image = image->image,
+      .buffer = VK_NULL_HANDLE,
+   };
+   const VkMemoryAllocateInfo memory_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+      .pNext = &memory_dedicated_info,
+      .allocationSize = reqs.size,
+      .memoryTypeIndex = select_memory_type(wsi, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                                            reqs.memoryTypeBits),
+   };
+   result = wsi->AllocateMemory(chain->device, &memory_info,
+                                &chain->alloc, &image->memory);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = wsi->BindImageMemory(chain->device, image->image,
+                                 image->memory, 0);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   image->prime.blit_cmd_buffers =
+      vk_zalloc(&chain->alloc,
+                sizeof(VkCommandBuffer) * wsi->queue_family_count, 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!image->prime.blit_cmd_buffers)
+      goto fail;
+
+   for (uint32_t i = 0; i < wsi->queue_family_count; i++) {
+      const VkCommandBufferAllocateInfo cmd_buffer_info = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+         .pNext = NULL,
+         .commandPool = chain->cmd_pools[i],
+         .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+         .commandBufferCount = 1,
+      };
+      result = wsi->AllocateCommandBuffers(chain->device, &cmd_buffer_info,
+                                           &image->prime.blit_cmd_buffers[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      const VkCommandBufferBeginInfo begin_info = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+      };
+      wsi->BeginCommandBuffer(image->prime.blit_cmd_buffers[i], &begin_info);
+
+      struct VkBufferImageCopy buffer_image_copy = {
+         .bufferOffset = 0,
+         .bufferRowLength = linear_stride / cpp,
+         .bufferImageHeight = 0,
+         .imageSubresource = {
+            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+            .mipLevel = 0,
+            .baseArrayLayer = 0,
+            .layerCount = 1,
+         },
+         .imageOffset = { .x = 0, .y = 0, .z = 0 },
+         .imageExtent = {
+            .width = pCreateInfo->imageExtent.width,
+            .height = pCreateInfo->imageExtent.height,
+            .depth = 1,
+         },
+      };
+      wsi->CmdCopyImageToBuffer(image->prime.blit_cmd_buffers[i],
+                                image->image,
+                                VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+                                image->prime.buffer,
+                                1, &buffer_image_copy);
+
+      result = wsi->EndCommandBuffer(image->prime.blit_cmd_buffers[i]);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   const VkMemoryGetFdInfoKHR linear_memory_get_fd_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+      .pNext = NULL,
+      .memory = image->prime.memory,
+      .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+   };
+   int fd;
+   result = wsi->GetMemoryFdKHR(chain->device, &linear_memory_get_fd_info, &fd);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   image->size = linear_size;
+   image->row_pitch = linear_stride;
+   image->offset = 0;
+   image->fd = fd;
+
+   return VK_SUCCESS;
+
+fail:
+   wsi_destroy_prime_image(chain, image);
+
+   return result;
+}
+
+void
+wsi_destroy_prime_image(const struct wsi_swapchain *chain,
+                        struct wsi_image *image)
+{
+   const struct wsi_device *wsi = chain->wsi;
+
+   if (image->prime.blit_cmd_buffers) {
+      for (uint32_t i = 0; i < wsi->queue_family_count; i++) {
+         wsi->FreeCommandBuffers(chain->device, chain->cmd_pools[i],
+                                 1, &image->prime.blit_cmd_buffers[i]);
+      }
+      vk_free(&chain->alloc, image->prime.blit_cmd_buffers);
+   }
+
+   wsi->FreeMemory(chain->device, image->memory, &chain->alloc);
+   wsi->DestroyImage(chain->device, image->image, &chain->alloc);
+   wsi->FreeMemory(chain->device, image->prime.memory, &chain->alloc);
+   wsi->DestroyBuffer(chain->device, image->prime.buffer, &chain->alloc);
+}
+
+VkResult
+wsi_prime_image_blit_to_linear(const struct wsi_swapchain *chain,
+                               struct wsi_image *image,
+                               VkQueue queue,
+                               uint32_t waitSemaphoreCount,
+                               const VkSemaphore *pWaitSemaphores)
+{
+   uint32_t queue_family = chain->wsi->queue_get_family_index(queue);
+
+   VkPipelineStageFlags stage_flags = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
+   const VkSubmitInfo submit_info = {
+      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+      .pNext = NULL,
+      .waitSemaphoreCount = waitSemaphoreCount,
+      .pWaitSemaphores = pWaitSemaphores,
+      .pWaitDstStageMask = &stage_flags,
+      .commandBufferCount = 1,
+      .pCommandBuffers = &image->prime.blit_cmd_buffers[queue_family],
+      .signalSemaphoreCount = 0,
+      .pSignalSemaphores = NULL,
+   };
+   return chain->wsi->QueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
 }
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 243bc6f..d77ae44 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -51,6 +51,13 @@ struct wsi_memory_allocate_info {
 struct wsi_image {
    VkImage image;
    VkDeviceMemory memory;
+
+   struct {
+      VkBuffer buffer;
+      VkDeviceMemory memory;
+      VkCommandBuffer *blit_cmd_buffers;
+   } prime;
+
    uint32_t size;
    uint32_t offset;
    uint32_t row_pitch;
@@ -62,8 +69,6 @@ struct wsi_image_fns {
    VkResult (*create_wsi_image)(VkDevice device_h,
                                 const VkSwapchainCreateInfoKHR *pCreateInfo,
                                 const VkAllocationCallbacks *pAllocator,
-                                bool needs_linear_copy,
-                                bool linear,
                                 struct wsi_image *image_p);
    void (*free_wsi_image)(VkDevice device,
                           const VkAllocationCallbacks *pAllocator,
@@ -77,11 +82,11 @@ struct wsi_swapchain {
    VkAllocationCallbacks alloc;
    const struct wsi_image_fns *image_fns;
    VkFence fences[3];
-   VkCommandBuffer *cmd_buffers;
-   VkCommandPool cmd_pools[3];
    VkPresentModeKHR present_mode;
    uint32_t image_count;
-   bool needs_linear_copy;
+
+   /* Command pools, one per queue family */
+   VkCommandPool *cmd_pools;
 
    VkResult (*destroy)(struct wsi_swapchain *swapchain,
                        const VkAllocationCallbacks *pAllocator);
@@ -91,12 +96,11 @@ struct wsi_swapchain {
                                   uint64_t timeout, VkSemaphore semaphore,
                                   uint32_t *image_index);
    VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
+                             VkQueue queue,
+                             uint32_t waitSemaphoreCount,
+                             const VkSemaphore *pWaitSemaphores,
                              uint32_t image_index,
                              const VkPresentRegionKHR *damage);
-   void (*get_image_and_linear)(struct wsi_swapchain *swapchain,
-                                int imageIndex,
-                                VkImage *image,
-                                VkImage *linear_image);
 };
 
 struct wsi_interface {
@@ -137,6 +141,33 @@ struct wsi_interface {
 #define VK_ICD_WSI_PLATFORM_MAX 5
 
 struct wsi_device {
+   VkPhysicalDeviceMemoryProperties memory_props;
+   uint32_t queue_family_count;
+
+   uint32_t (*queue_get_family_index)(VkQueue queue);
+
+#define WSI_CB(cb) PFN_vk##cb cb
+   WSI_CB(AllocateMemory);
+   WSI_CB(AllocateCommandBuffers);
+   WSI_CB(BindBufferMemory);
+   WSI_CB(BindImageMemory);
+   WSI_CB(BeginCommandBuffer);
+   WSI_CB(CmdCopyImageToBuffer);
+   WSI_CB(CreateBuffer);
+   WSI_CB(CreateCommandPool);
+   WSI_CB(CreateImage);
+   WSI_CB(DestroyBuffer);
+   WSI_CB(DestroyCommandPool);
+   WSI_CB(DestroyImage);
+   WSI_CB(EndCommandBuffer);
+   WSI_CB(FreeMemory);
+   WSI_CB(FreeCommandBuffers);
+   WSI_CB(GetBufferMemoryRequirements);
+   WSI_CB(GetImageMemoryRequirements);
+   WSI_CB(GetMemoryFdKHR);
+   WSI_CB(QueueSubmit);
+#undef WSI_CB
+
     struct wsi_interface *                  wsi[VK_ICD_WSI_PLATFORM_MAX];
 };
 
@@ -149,7 +180,12 @@ wsi_device_init(struct wsi_device *wsi,
 
 #define WSI_CB(cb) PFN_vk##cb cb
 struct wsi_callbacks {
+   VkPhysicalDevice (*device_get_physical)(VkDevice);
+
+   WSI_CB(GetDeviceProcAddr);
    WSI_CB(GetPhysicalDeviceFormatProperties);
+   WSI_CB(GetPhysicalDeviceMemoryProperties);
+   WSI_CB(GetPhysicalDeviceQueueFamilyProperties);
 };
 #undef WSI_CB
 
diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h
index d178df7..3cef6dd 100644
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@@ -34,4 +34,20 @@ wsi_swapchain_init(const struct wsi_device *wsi,
 
 void wsi_swapchain_finish(struct wsi_swapchain *chain);
 
+VkResult
+wsi_create_prime_image(const struct wsi_swapchain *chain,
+                       const VkSwapchainCreateInfoKHR *pCreateInfo,
+                       struct wsi_image *image);
+
+void
+wsi_destroy_prime_image(const struct wsi_swapchain *chain,
+                        struct wsi_image *image);
+
+VkResult
+wsi_prime_image_blit_to_linear(const struct wsi_swapchain *chain,
+                               struct wsi_image *image,
+                               VkQueue queue,
+                               uint32_t waitSemaphoreCount,
+                               const VkSemaphore *pWaitSemaphores);
+
 #endif /* WSI_COMMON_PRIVATE_H */
diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c
index b75a4d0..c24afcd 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -662,6 +662,9 @@ static const struct wl_callback_listener frame_listener = {
 
 static VkResult
 wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
+                               VkQueue queue,
+                               uint32_t waitSemaphoreCount,
+                               const VkSemaphore *pWaitSemaphores,
                                uint32_t image_index,
                                const VkPresentRegionKHR *damage)
 {
@@ -731,8 +734,6 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain,
    result = chain->base.image_fns->create_wsi_image(vk_device,
                                                     pCreateInfo,
                                                     pAllocator,
-                                                    false,
-                                                    false,
                                                     &image->base);
    if (result != VK_SUCCESS)
       return result;
@@ -843,7 +844,6 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->base.image_fns = image_fns;
    chain->base.present_mode = pCreateInfo->presentMode;
    chain->base.image_count = num_images;
-   chain->base.needs_linear_copy = false;
    chain->extent = pCreateInfo->imageExtent;
    chain->vk_format = pCreateInfo->imageFormat;
    chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha);
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index c6d1f02..fd60cfa 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -616,7 +616,6 @@ VkResult wsi_create_xlib_surface(const VkAllocationCallbacks *pAllocator,
 
 struct x11_image {
    struct wsi_image                          base;
-   struct wsi_image                          linear_base;
    xcb_pixmap_t                              pixmap;
    bool                                      busy;
    struct xshmfence *                        shm_fence;
@@ -626,6 +625,8 @@ struct x11_image {
 struct x11_swapchain {
    struct wsi_swapchain                        base;
 
+   bool                                         use_prime_blit;
+
    xcb_connection_t *                           conn;
    xcb_window_t                                 window;
    xcb_gc_t                                     gc;
@@ -673,15 +674,6 @@ x11_get_images(struct wsi_swapchain *anv_chain,
    return result;
 }
 
-static void
-x11_get_image_and_linear(struct wsi_swapchain *drv_chain,
-                         int imageIndex, VkImage *image, VkImage *linear_image)
-{
-   struct x11_swapchain *chain = (struct x11_swapchain *)drv_chain;
-   *image = chain->images[imageIndex].base.image;
-   *linear_image = chain->images[imageIndex].linear_base.image;
-}
-
 static VkResult
 x11_handle_dri3_present_event(struct x11_swapchain *chain,
                               xcb_present_generic_event_t *event)
@@ -889,10 +881,24 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain,
 
 static VkResult
 x11_queue_present(struct wsi_swapchain *anv_chain,
+                  VkQueue queue,
+                  uint32_t waitSemaphoreCount,
+                  const VkSemaphore *pWaitSemaphores,
                   uint32_t image_index,
                   const VkPresentRegionKHR *damage)
 {
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
+   VkResult result;
+
+   if (chain->use_prime_blit) {
+      result = wsi_prime_image_blit_to_linear(&chain->base,
+                                              &chain->images[image_index].base,
+                                              queue,
+                                              waitSemaphoreCount,
+                                              pWaitSemaphores);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
    if (chain->threaded) {
       wsi_queue_push(&chain->present_queue, image_index);
@@ -960,46 +966,31 @@ x11_image_init(VkDevice device_h, struct x11_swapchain *chain,
    VkResult result;
    uint32_t bpp = 32;
 
-   result = chain->base.image_fns->create_wsi_image(device_h,
-                                                    pCreateInfo,
-                                                    pAllocator,
-                                                    chain->base.needs_linear_copy,
-                                                    false,
-                                                    &image->base);
-   if (result != VK_SUCCESS)
-      return result;
-
-   if (chain->base.needs_linear_copy) {
+   if (chain->use_prime_blit) {
+      result = wsi_create_prime_image(&chain->base, pCreateInfo, &image->base);
+   } else {
       result = chain->base.image_fns->create_wsi_image(device_h,
                                                        pCreateInfo,
                                                        pAllocator,
-                                                       chain->base.needs_linear_copy,
-                                                       true,
-                                                       &image->linear_base);
-
-      if (result != VK_SUCCESS) {
-         chain->base.image_fns->free_wsi_image(device_h, pAllocator,
-                                               &image->base);
-         return result;
-      }
+                                                       &image->base);
    }
+   if (result != VK_SUCCESS)
+      return result;
 
    image->pixmap = xcb_generate_id(chain->conn);
 
-   struct wsi_image *image_ws =
-      chain->base.needs_linear_copy ? &image->linear_base : &image->base;
    cookie =
       xcb_dri3_pixmap_from_buffer_checked(chain->conn,
                                           image->pixmap,
                                           chain->window,
-                                          image_ws->size,
+                                          image->base.size,
                                           pCreateInfo->imageExtent.width,
                                           pCreateInfo->imageExtent.height,
-                                          image_ws->row_pitch,
+                                          image->base.row_pitch,
                                           chain->depth, bpp,
-                                          image_ws->fd);
+                                          image->base.fd);
    xcb_discard_reply(chain->conn, cookie.sequence);
-   image_ws->fd = -1; /* XCB has now taken ownership of the FD */
+   image->base.fd = -1; /* XCB has now taken ownership of the FD */
 
    int fence_fd = xshmfence_alloc_shm();
    if (fence_fd < 0)
@@ -1028,11 +1019,11 @@ fail_pixmap:
    cookie = xcb_free_pixmap(chain->conn, image->pixmap);
    xcb_discard_reply(chain->conn, cookie.sequence);
 
-   if (chain->base.needs_linear_copy) {
-      chain->base.image_fns->free_wsi_image(device_h, pAllocator,
-                                            &image->linear_base);
+   if (chain->use_prime_blit) {
+      wsi_destroy_prime_image(&chain->base, &image->base);
+   } else {
+      chain->base.image_fns->free_wsi_image(device_h, pAllocator, &image->base);
    }
-   chain->base.image_fns->free_wsi_image(device_h, pAllocator, &image->base);
 
    return result;
 }
@@ -1051,12 +1042,12 @@ x11_image_finish(struct x11_swapchain *chain,
    cookie = xcb_free_pixmap(chain->conn, image->pixmap);
    xcb_discard_reply(chain->conn, cookie.sequence);
 
-   if (chain->base.needs_linear_copy) {
-      chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
-                                            &image->linear_base);
+   if (chain->use_prime_blit) {
+      wsi_destroy_prime_image(&chain->base, &image->base);
+   } else {
+      chain->base.image_fns->free_wsi_image(chain->base.device,
+                                            pAllocator, &image->base);
    }
-   chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
-                                         &image->base);
 }
 
 static VkResult
@@ -1132,7 +1123,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
 
    chain->base.destroy = x11_swapchain_destroy;
    chain->base.get_images = x11_get_images;
-   chain->base.get_image_and_linear = x11_get_image_and_linear;
    chain->base.acquire_next_image = x11_acquire_next_image;
    chain->base.queue_present = x11_queue_present;
    chain->base.image_fns = image_fns;
@@ -1148,9 +1138,10 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->status = VK_SUCCESS;
 
 
-   chain->base.needs_linear_copy = false;
-   if (!wsi_x11_check_dri3_compatible(conn, local_fd))
-       chain->base.needs_linear_copy = true;
+   chain->use_prime_blit = false;
+   if (!wsi_x11_check_dri3_compatible(conn, local_fd)) {
+       chain->use_prime_blit = true;
+   }
 
    chain->event_id = xcb_generate_id(chain->conn);
    xcb_present_select_input(chain->conn, chain->event_id, chain->window,
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list