[Mesa-dev] [PATCH] radv/query: handle multiview queries properly. (v2)

Dave Airlie airlied at gmail.com
Fri Mar 16 06:32:55 UTC 2018


From: Dave Airlie <airlied at redhat.com>

For multiview we need to emit a number of sequential queries
depending on the view mask.

This avoids dEQP-VK.multiview.queries.15 waiting forever
on the CPU for query results that are never coming.

This doesn't make this test pass though, but I can now
finish my CTS run.

It turns out we only really want to emit one query,
and the rest should be blank (amdvlk does the same),
so we emit begin/end pairs for all the others except
the first query.

v2: fix the actual tests.
Fixes: dEQP-VK.multiview.queries*

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/vulkan/radv_query.c | 188 ++++++++++++++++++++++++++------------------
 1 file changed, 111 insertions(+), 77 deletions(-)

diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 9fee4d2b491..74cc9981b7c 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1077,33 +1077,12 @@ void radv_CmdResetQueryPool(
 	}
 }
 
-void radv_CmdBeginQuery(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    VkQueryControlFlags                         flags)
+static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
+			     uint64_t va,
+			     VkQueryType query_type)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
 	struct radeon_winsys_cs *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(pool->bo);
-	va += pool->stride * query;
-
-	radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 8);
-
-	if (cmd_buffer->pending_reset_query) {
-		if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
-			/* Only need to flush caches if the query pool size is
-			 * large enough to be resetted using the compute shader
-			 * path. Small pools don't need any cache flushes
-			 * because we use a CP dma clear.
-			 */
-			si_emit_cache_flush(cmd_buffer);
-			cmd_buffer->pending_reset_query = false;
-		}
-	}
-
-	switch (pool->type) {
+	switch (query_type) {
 	case VK_QUERY_TYPE_OCCLUSION:
 		radeon_check_space(cmd_buffer->device->ws, cs, 7);
 
@@ -1127,26 +1106,15 @@ void radv_CmdBeginQuery(
 	default:
 		unreachable("beginning unhandled query type");
 	}
-}
 
+}
 
-void radv_CmdEndQuery(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
+static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
+			   uint64_t va, uint64_t avail_va,
+			   VkQueryType query_type)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
 	struct radeon_winsys_cs *cs = cmd_buffer->cs;
-	uint64_t va = radv_buffer_get_va(pool->bo);
-	uint64_t avail_va = va + pool->availability_offset + 4 * query;
-	va += pool->stride * query;
-
-	/* Do not need to add the pool BO to the list because the query must
-	 * currently be active, which means the BO is already in the list.
-	 */
-
-	switch (pool->type) {
+	switch (query_type) {
 	case VK_QUERY_TYPE_OCCLUSION:
 		radeon_check_space(cmd_buffer->device->ws, cs, 14);
 
@@ -1182,6 +1150,65 @@ void radv_CmdEndQuery(
 	}
 }
 
+void radv_CmdBeginQuery(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query,
+    VkQueryControlFlags                         flags)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+	struct radeon_winsys_cs *cs = cmd_buffer->cs;
+	uint64_t va = radv_buffer_get_va(pool->bo);
+	uint64_t avail_va = va + pool->availability_offset + 4 * query;
+
+	radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 8);
+
+	if (cmd_buffer->pending_reset_query) {
+		if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
+			/* Only need to flush caches if the query pool size is
+			 * large enough to be resetted using the compute shader
+			 * path. Small pools don't need any cache flushes
+			 * because we use a CP dma clear.
+			 */
+			si_emit_cache_flush(cmd_buffer);
+			cmd_buffer->pending_reset_query = false;
+		}
+	}
+
+	int num_extra_querys = 0;
+	if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
+		num_extra_querys = util_bitcount(cmd_buffer->state.subpass->view_mask);
+
+	va += pool->stride * query;
+
+	emit_begin_query(cmd_buffer, va, pool->type);
+	for (unsigned i = 0; i < num_extra_querys; i++) {
+		va += pool->stride;
+		avail_va += 4;
+		emit_begin_query(cmd_buffer, va, pool->type);
+		emit_end_query(cmd_buffer, va, avail_va, pool->type);
+	}
+}
+
+
+void radv_CmdEndQuery(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+	uint64_t va = radv_buffer_get_va(pool->bo);
+	uint64_t avail_va = va + pool->availability_offset + 4 * query;
+	va += pool->stride * query;
+
+	/* Do not need to add the pool BO to the list because the query must
+	 * currently be active, which means the BO is already in the list.
+	 */
+	emit_end_query(cmd_buffer, va, avail_va, pool->type);
+}
+
 void radv_CmdWriteTimestamp(
     VkCommandBuffer                             commandBuffer,
     VkPipelineStageFlagBits                     pipelineStage,
@@ -1198,42 +1225,49 @@ void radv_CmdWriteTimestamp(
 
 	radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 5);
 
-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
-
-	switch(pipelineStage) {
-	case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
-		                COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-		                COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, 0);
-		radeon_emit(cs, query_va);
-		radeon_emit(cs, query_va >> 32);
-
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
-		                S_370_WR_CONFIRM(1) |
-		                S_370_ENGINE_SEL(V_370_ME));
-		radeon_emit(cs, avail_va);
-		radeon_emit(cs, avail_va >> 32);
-		radeon_emit(cs, 1);
-		break;
-	default:
-		si_cs_emit_write_event_eop(cs,
-					   false,
-					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   mec,
-					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   3, query_va, 0, 0);
-		si_cs_emit_write_event_eop(cs,
-					   false,
-					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   mec,
-					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   1, avail_va, 0, 1);
-		break;
-	}
+	int num_querys = 1;
+	if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
+		num_querys = util_bitcount(cmd_buffer->state.subpass->view_mask);
+
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_querys);
 
+	for (unsigned i = 0; i < num_querys; i++) {
+		switch(pipelineStage) {
+		case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+			radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
+				    COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
+				    COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
+			radeon_emit(cs, 0);
+			radeon_emit(cs, 0);
+			radeon_emit(cs, query_va);
+			radeon_emit(cs, query_va >> 32);
+
+			radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+			radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+				    S_370_WR_CONFIRM(1) |
+				    S_370_ENGINE_SEL(V_370_ME));
+			radeon_emit(cs, avail_va);
+			radeon_emit(cs, avail_va >> 32);
+			radeon_emit(cs, 1);
+			break;
+		default:
+			si_cs_emit_write_event_eop(cs,
+						   false,
+						   cmd_buffer->device->physical_device->rad_info.chip_class,
+						   mec,
+						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
+						   3, query_va, 0, 0);
+			si_cs_emit_write_event_eop(cs,
+						   false,
+						   cmd_buffer->device->physical_device->rad_info.chip_class,
+						   mec,
+						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
+						   1, avail_va, 0, 1);
+			break;
+		}
+		query_va += pool->stride;
+		avail_va += 4;
+	}
 	assert(cmd_buffer->cs->cdw <= cdw_max);
 }
-- 
2.14.3



More information about the mesa-dev mailing list