[Mesa-dev] [PATCH] [rfc] radv: add initial prime support.

Wed Nov 23 05:28:58 UTC 2016

From: Dave Airlie <airlied at redhat.com>

This is kind of a gross hacks, but vulkan doesn't specify anything
but it would be nice to let people with prime systems at least
see some stuff rendering for now.

This creates a linear shadow image in GART that gets blitted to at the
image transition.

Now ideally:
this would use SDMA - but we want to use SDMA for transfer queues
maybe we don't expose a transfer queue on prime cards who knows.

we wouldn't have to add two pointers to every image, but my other
attempts at this were ugly.

Is the image transition the proper place to hack this in? not
really sure anywhere else is appropriate.

It also relies on DRI_PRIME=1 being set, I should be able
to work this out somehow automatically I think, probably getting
a DRI3 fd from the X server and doing drmGetDevice on it, and
comparing where we end up.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c |  18 +++++++
 src/amd/vulkan/radv_device.c     |   3 ++
 src/amd/vulkan/radv_meta.h       |   2 +
 src/amd/vulkan/radv_meta_copy.c  |  31 +++++++++++
 src/amd/vulkan/radv_private.h    |   4 ++
 src/amd/vulkan/radv_wsi.c        | 111 ++++++++++++++++++++++++++++++---------
 6 files changed, 144 insertions(+), 25 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a2d55833..4432afc 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2296,6 +2296,20 @@ static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer,
 	}
 }
 
+static void radv_handle_prime_image_transition(struct radv_cmd_buffer *cmd_buffer,
+					       struct radv_image *image,
+					       VkImageLayout src_layout,
+					       VkImageLayout dst_layout,
+					       VkImageSubresourceRange range,
+					       VkImageAspectFlags pending_clears)
+{
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
+	si_emit_cache_flush(cmd_buffer);
+	radv_blit_to_prime_linear(cmd_buffer, image);
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
+	si_emit_cache_flush(cmd_buffer);
+}
+
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageLayout src_layout,
@@ -2314,6 +2328,10 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 	if (image->surface.dcc_size)
 		radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
 						 dst_layout, range, pending_clears);
+
+	if (image->prime_image && dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR)
+		radv_handle_prime_image_transition(cmd_buffer, image, src_layout,
+						   dst_layout, range, pending_clears);
 }
 
 void radv_CmdPipelineBarrier(
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c639d53..b21447f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -105,6 +105,9 @@ radv_physical_device_init(struct radv_physical_device *device,
 	}
 	drmFreeVersion(version);
 
+	if (getenv("DRI_PRIME"))
+		device->is_different_gpu = true;
+
 	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
 	device->instance = instance;
 	assert(strlen(path) < ARRAY_SIZE(device->path));
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index 97d020c..e43a0e7 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -186,6 +186,8 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     uint32_t region_count,
 				     const VkImageResolve *regions);
 
+void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
+			       struct radv_image *image);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index 4c01eb7..3fd8d0c 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -397,3 +397,34 @@ void radv_CmdCopyImage(
 
 	radv_meta_restore(&saved_state, cmd_buffer);
 }
+
+void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
+			       struct radv_image *image)
+{
+	struct radv_meta_saved_state saved_state;
+	struct radv_meta_saved_pass_state saved_pass_state;
+
+	radv_meta_save_pass(&saved_pass_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+
+	struct radv_meta_blit2d_surf b_src =
+		blit_surf_for_image_level_layer(image,
+						VK_IMAGE_ASPECT_COLOR_BIT,
+						0,
+						0);
+
+	struct radv_meta_blit2d_surf b_dst =
+		blit_surf_for_image_level_layer(image->prime_image,
+						VK_IMAGE_ASPECT_COLOR_BIT,
+						0,
+						0);
+	struct radv_meta_blit2d_rect rect = {
+		.width = image->extent.width,
+		.height = image->extent.height,
+	};
+
+	radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+	radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e1c24cb..5027431 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -261,6 +261,7 @@ struct radv_physical_device {
 	uint8_t                                     uuid[VK_UUID_SIZE];
 
 	struct wsi_device                       wsi_device;
+	bool is_different_gpu;
 };
 
 struct radv_instance {
@@ -987,6 +988,9 @@ struct radv_image {
 
 	/* Depth buffer compression and fast clear. */
 	struct r600_htile_info htile;
+
+	struct radv_image *prime_image;
+	struct radv_device_memory *prime_memory;
 };
 
 bool radv_layout_has_htile(const struct radv_image *image,
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 1f1ab1c..31aeb77 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -135,23 +135,27 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
 					pPresentModes);
 }
 
+static void
+radv_wsi_image_destroy_single(VkDevice device_h,
+			      const VkAllocationCallbacks* pAllocator,
+			      VkImage image_h,
+			      VkDeviceMemory memory_h)
+{
+	radv_DestroyImage(device_h, image_h, pAllocator);
+	radv_FreeMemory(device_h, memory_h, pAllocator);
+}
+
 static VkResult
-radv_wsi_image_create(VkDevice device_h,
-		      const VkSwapchainCreateInfoKHR *pCreateInfo,
-		      const VkAllocationCallbacks* pAllocator,
-		      VkImage *image_p,
-		      VkDeviceMemory *memory_p,
-		      uint32_t *size,
-		      uint32_t *offset,
-		      uint32_t *row_pitch, int *fd_p)
+radv_wsi_image_create_single(VkDevice device_h,
+			     const VkSwapchainCreateInfoKHR *pCreateInfo,
+			     const VkAllocationCallbacks* pAllocator,
+			     VkImage *image_p,
+			     VkDeviceMemory *memory_p,
+			     bool tiled)
 {
-	struct radv_device *device = radv_device_from_handle(device_h);
-	VkResult result = VK_SUCCESS;
-	struct radeon_surf *surface;
+	VkResult result;
 	VkImage image_h;
 	struct radv_image *image;
-	bool bret;
-	int fd;
 
 	result = radv_image_create(device_h,
 				   &(struct radv_image_create_info) {
@@ -169,7 +173,7 @@ radv_wsi_image_create(VkDevice device_h,
 						   .arrayLayers = 1,
 						   .samples = 1,
 						   /* FIXME: Need a way to use X tiling to allow scanout */
-						   .tiling = VK_IMAGE_TILING_OPTIMAL,
+						   .tiling = tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
 						   .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
 						   .flags = 0,
 					   },
@@ -180,24 +184,74 @@ radv_wsi_image_create(VkDevice device_h,
 		return result;
 
 	image = radv_image_from_handle(image_h);
-
 	VkDeviceMemory memory_h;
-	struct radv_device_memory *memory;
+
 	result = radv_AllocateMemory(device_h,
 				     &(VkMemoryAllocateInfo) {
 					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
 						     .allocationSize = image->size,
-						     .memoryTypeIndex = 0,
+						     .memoryTypeIndex = tiled ? 0 : 1,
 						     },
 				     NULL /* XXX: pAllocator */,
 				     &memory_h);
 	if (result != VK_SUCCESS)
 		goto fail_create_image;
 
-	memory = radv_device_memory_from_handle(memory_h);
-
 	radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
 
+	*image_p = image_h;
+	*memory_p = memory_h;
+	return VK_SUCCESS;
+fail_create_image:
+	radv_DestroyImage(device_h, image_h, pAllocator);
+	return result;
+}
+
+static VkResult
+radv_wsi_image_create(VkDevice device_h,
+		      const VkSwapchainCreateInfoKHR *pCreateInfo,
+		      const VkAllocationCallbacks* pAllocator,
+		      VkImage *image_p,
+		      VkDeviceMemory *memory_p,
+		      uint32_t *size,
+		      uint32_t *offset,
+		      uint32_t *row_pitch, int *fd_p)
+{
+	struct radv_device *device = radv_device_from_handle(device_h);
+	VkResult result = VK_SUCCESS;
+	struct radeon_surf *surface;
+	VkImage image_h, image_prime_h;
+	VkDeviceMemory memory_h, memory_prime_h;
+	struct radv_image *image;
+	struct radv_device_memory *memory;
+	bool bret;
+	int fd;
+	bool prime = device->instance->physicalDevice.is_different_gpu;
+
+	result = radv_wsi_image_create_single(device_h, pCreateInfo,
+					      pAllocator, &image_h, &memory_h,
+					      true);
+	if (result != VK_SUCCESS)
+		return result;
+
+	image = radv_image_from_handle(image_h);
+	if (prime) {
+		result = radv_wsi_image_create_single(device_h, pCreateInfo,
+						      pAllocator, &image_prime_h,
+						      &memory_prime_h, false);
+
+		if (result != VK_SUCCESS)
+			goto fail_create_image;
+
+		image->prime_image = radv_image_from_handle(image_prime_h);
+		image->prime_memory = radv_device_memory_from_handle(memory_prime_h);
+
+		memory = image->prime_memory;
+		image = image->prime_image;
+	} else {
+		memory = radv_device_memory_from_handle(memory_h);
+	}
+
 	bret = device->ws->buffer_get_fd(device->ws,
 					 memory->bo, &fd);
 	if (bret == false)
@@ -217,24 +271,31 @@ radv_wsi_image_create(VkDevice device_h,
 	*offset = image->offset;
 	*row_pitch = surface->level[0].pitch_bytes;
 	return VK_SUCCESS;
- fail_alloc_memory:
-	radv_FreeMemory(device_h, memory_h, pAllocator);
+
+fail_alloc_memory:
+	if (prime)
+		radv_wsi_image_destroy_single(device_h, pAllocator, image_prime_h, memory_prime_h);
 
 fail_create_image:
-	radv_DestroyImage(device_h, image_h, pAllocator);
+	radv_wsi_image_destroy_single(device_h, pAllocator, image_h, memory_h);
 
 	return result;
 }
 
 static void
-radv_wsi_image_free(VkDevice device,
+radv_wsi_image_free(VkDevice device_h,
 		    const VkAllocationCallbacks* pAllocator,
 		    VkImage image_h,
 		    VkDeviceMemory memory_h)
 {
-	radv_DestroyImage(device, image_h, pAllocator);
+	RADV_FROM_HANDLE(radv_image, image, image_h);
+
+	if (image->prime_image)
+		radv_wsi_image_destroy_single(device_h, pAllocator,
+					      radv_image_to_handle(image->prime_image),
+					      radv_device_memory_to_handle(image->prime_memory));
 
-	radv_FreeMemory(device, memory_h, pAllocator);
+	radv_wsi_image_destroy_single(device_h, pAllocator, image_h, memory_h);
 }
 
 static const struct wsi_image_fns radv_wsi_image_fns = {
-- 
2.9.3