[PATCH 1/2] radv: use a winsys context per-queue, instead of per device

Wed Jan 4 02:22:28 UTC 2017

Queues are independent execution streams. The vulkan spec provides no
ordering guarantees for different queues.

By using a single context for all queues, we are forcing all commands
into an unecessary FIFO ordering.

This change is a preparation step to allow our-of-ordering scheduling of
certain work tasks.

As a side effect, vkQueueWaitIdle will be marginally faster. Previously
due to the shared context, vkQueueWaitIdle was equivalent to
vkDeviceWaitIdle.
---
 src/amd/vulkan/radv_device.c  | 35 ++++++++++++++++++++---------------
 src/amd/vulkan/radv_private.h |  2 +-
 src/amd/vulkan/radv_wsi.c     |  2 +-
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e57a419..d9f9a2b 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -651,7 +651,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 	};
 }
 
-static void
+static int
 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
 		int queue_family_index, int idx)
 {
@@ -659,11 +659,19 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue,
 	queue->device = device;
 	queue->queue_family_index = queue_family_index;
 	queue->queue_idx = idx;
+
+	queue->hw_ctx = device->ws->ctx_create(device->ws);
+	if (!queue->hw_ctx)
+		return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+	return VK_SUCCESS;
 }
 
 static void
 radv_queue_finish(struct radv_queue *queue)
 {
+	if (queue->hw_ctx)
+		queue->device->ws->ctx_destroy(queue->hw_ctx);
 }
 
 VkResult radv_CreateDevice(
@@ -718,23 +726,21 @@ VkResult radv_CreateDevice(
 			goto fail;
 		}
 
-		device->queue_count[qfi] = queue_create->queueCount;
+		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
 
-		for (unsigned q = 0; q < queue_create->queueCount; q++)
-			radv_queue_init(device, &device->queues[qfi][q], qfi, q);
-	}
+		device->queue_count[qfi] = queue_create->queueCount;
 
-	device->hw_ctx = device->ws->ctx_create(device->ws);
-	if (!device->hw_ctx) {
-		result = VK_ERROR_OUT_OF_HOST_MEMORY;
-		goto fail;
+		for (unsigned q = 0; q < queue_create->queueCount; q++) {
+			result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
+			if (result != VK_SUCCESS)
+				goto fail;
+		}
 	}
 
 	result = radv_device_init_meta(device);
-	if (result != VK_SUCCESS) {
-		device->ws->ctx_destroy(device->hw_ctx);
+	if (result != VK_SUCCESS)
 		goto fail;
-	}
+
 	device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
 	device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false);
 	device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false);
@@ -780,7 +786,6 @@ void radv_DestroyDevice(
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
 
-	device->ws->ctx_destroy(device->hw_ctx);
 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
 		for (unsigned q = 0; q < device->queue_count[i]; q++)
 			radv_queue_finish(&device->queues[i][q]);
@@ -878,7 +883,7 @@ VkResult radv_QueueSubmit(
 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
 	RADV_FROM_HANDLE(radv_fence, fence, _fence);
 	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
-	struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
+	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
 	int ret;
 
 	for (uint32_t i = 0; i < submitCount; i++) {
@@ -929,7 +934,7 @@ VkResult radv_QueueWaitIdle(
 {
 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
 
-	queue->device->ws->ctx_wait_idle(queue->device->hw_ctx,
+	queue->device->ws->ctx_wait_idle(queue->hw_ctx,
 	                                 radv_queue_family_to_ring(queue->queue_family_index),
 	                                 queue->queue_idx);
 	return VK_SUCCESS;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index f76d38d..d316f71 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -456,6 +456,7 @@ enum ring_type radv_queue_family_to_ring(int f);
 struct radv_queue {
 	VK_LOADER_DATA                              _loader_data;
 	struct radv_device *                         device;
+	struct radeon_winsys_ctx                    *hw_ctx;
 	int queue_family_index;
 	int queue_idx;
 };
@@ -467,7 +468,6 @@ struct radv_device {
 
 	struct radv_instance *                       instance;
 	struct radeon_winsys *ws;
-	struct radeon_winsys_ctx *hw_ctx;
 
 	struct radv_meta_state                       meta_state;
 
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 2eb8e45..752efe4 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -364,7 +364,7 @@ VkResult radv_QueuePresentKHR(
 
 		RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
 		struct radeon_winsys_fence *base_fence = fence->fence;
-		struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
+		struct radeon_winsys_ctx *ctx = queue->hw_ctx;
 		queue->device->ws->cs_submit(ctx, queue->queue_idx,
 					     &queue->device->empty_cs[queue->queue_family_index],
 					     1,
-- 
2.9.3