[Mesa-dev] [PATCH 3/4] radv: Implement more efficient !waitAll fence waiting.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Mon Feb 26 23:37:28 UTC 2018


---
 src/amd/vulkan/radv_device.c                  | 36 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_radeon_winsys.h           |  5 ++++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 34 +++++++++++++++++++++++++
 3 files changed, 75 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 3355fd0b4b..fcf333216a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2925,6 +2925,17 @@ static uint64_t radv_get_absolute_timeout(uint64_t timeout)
 	return current_time + timeout;
 }
 
+
+static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
+{
+	for (uint32_t i = 0; i < fenceCount; ++i) {
+		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+		if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
+			return false;
+	}
+	return true;
+}
+
 VkResult radv_WaitForFences(
 	VkDevice                                    _device,
 	uint32_t                                    fenceCount,
@@ -2936,6 +2947,31 @@ VkResult radv_WaitForFences(
 	timeout = radv_get_absolute_timeout(timeout);
 
 	if (!waitAll && fenceCount > 1) {
+		/* Not doing this by default for waitAll, due to needing to allocate twice. */
+		if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
+			uint32_t wait_count = 0;
+			struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
+			if (!fences)
+				return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+			for (uint32_t i = 0; i < fenceCount; ++i) {
+				RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+
+				if (fence->signalled) {
+					free(fences);
+					return VK_SUCCESS;
+				}
+
+				fences[wait_count++] = fence->fence;
+			}
+
+			bool success = device->ws->fences_wait(device->ws, fences, wait_count,
+							       waitAll, timeout - radv_get_current_time());
+
+			free(fences);
+			return success ? VK_SUCCESS : VK_TIMEOUT;
+		}
+
 		while(radv_get_current_time() <= timeout) {
 			for (uint32_t i = 0; i < fenceCount; ++i) {
 				if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4c306692e5..643d76a826 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -270,6 +270,11 @@ struct radeon_winsys {
 			   struct radeon_winsys_fence *fence,
 			   bool absolute,
 			   uint64_t timeout);
+	bool (*fences_wait)(struct radeon_winsys *ws,
+			    struct radeon_winsys_fence *const *fences,
+			    uint32_t fence_count,
+			    bool wait_all,
+			    uint64_t timeout);
 
 	/* old semaphores - non shareable */
 	struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 5632b1d4ee..d2b33546cc 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -154,6 +154,39 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
 	return false;
 }
 
+
+static bool radv_amdgpu_fences_wait(struct radeon_winsys *_ws,
+			      struct radeon_winsys_fence *const *_fences,
+			      uint32_t fence_count,
+			      bool wait_all,
+			      uint64_t timeout)
+{
+	struct amdgpu_cs_fence *fences = malloc(sizeof(struct amdgpu_cs_fence) * fence_count);
+	int r;
+	uint32_t expired = 0, first = 0;
+
+	if (!fences)
+		return false;
+
+	for (uint32_t i = 0; i < fence_count; ++i)
+		fences[i] = ((struct radv_amdgpu_fence *)_fences[i])->fence;
+
+	/* Now use the libdrm query. */
+	r = amdgpu_cs_wait_fences(fences, fence_count, wait_all,
+	                          timeout, &expired, &first);
+
+	free(fences);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_cs_wait_fences failed.\n");
+		return false;
+	}
+
+	if (expired)
+		return true;
+
+	return false;
+}
+
 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
 {
 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
@@ -1387,4 +1420,5 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 	ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
 	ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
 	ws->base.fence_wait = radv_amdgpu_fence_wait;
+	ws->base.fences_wait = radv_amdgpu_fences_wait;
 }
-- 
2.16.1



More information about the mesa-dev mailing list