[Mesa-dev] [PATCH 8/8] gallium/radeon: use unflushed fences for deferred flushes

Tue Aug 2 10:27:55 UTC 2016

From: Marek Olšák <marek.olsak at amd.com>

+23% Bioshock Infinite performance.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 45 ++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 5e9d0b6..1a7bd7a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -47,6 +47,12 @@ struct r600_multi_fence {
 	struct pipe_reference reference;
 	struct pipe_fence_handle *gfx;
 	struct pipe_fence_handle *sdma;
+
+	/* If the context wasn't flushed at fence creation, this is non-NULL. */
+	struct {
+		struct r600_common_context *ctx;
+		unsigned ib_index;
+	} gfx_unflushed;
 };
 
 /*
@@ -262,6 +268,7 @@ static void r600_flush_from_st(struct pipe_context *ctx,
 	unsigned rflags = 0;
 	struct pipe_fence_handle *gfx_fence = NULL;
 	struct pipe_fence_handle *sdma_fence = NULL;
+	bool deferred_fence = false;
 
 	if (flags & PIPE_FLUSH_END_OF_FRAME)
 		rflags |= RADEON_FLUSH_END_OF_FRAME;
@@ -271,7 +278,22 @@ static void r600_flush_from_st(struct pipe_context *ctx,
 	if (rctx->dma.cs) {
 		rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
 	}
-	rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+	/* Instead of flushing, create a deferred fence. Constraints:
+	 * - The state tracker must allow a deferred flush.
+	 * - The state tracker must request a fence.
+	 * - At most one API context can exist to ensure thread-safety.
+	 *   (that's a weak contraint though, we must be careful about
+	 *    how PIPE_FLUSH_DEFERRED is used)
+	 */
+	if (flags & PIPE_FLUSH_DEFERRED && fence &&
+	    /* internal aux_context + 1 API context */
+	    rctx->screen->num_contexts <= 2) {
+		gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+		deferred_fence = true;
+	} else {
+		rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+	}
 
 	/* Both engines can signal out of order, so we need to keep both fences. */
 	if (gfx_fence || sdma_fence) {
@@ -284,6 +306,11 @@ static void r600_flush_from_st(struct pipe_context *ctx,
 		multi_fence->gfx = gfx_fence;
 		multi_fence->sdma = sdma_fence;
 
+		if (deferred_fence) {
+			multi_fence->gfx_unflushed.ctx = rctx;
+			multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
+		}
+
 		screen->fence_reference(screen, fence, NULL);
 		*fence = (struct pipe_fence_handle*)multi_fence;
 	}
@@ -962,6 +989,7 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
 {
 	struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
 	struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+	struct r600_common_context *rctx;
 	int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
 
 	if (rfence->sdma) {
@@ -978,6 +1006,21 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
 	if (!rfence->gfx)
 		return true;
 
+	/* Flush the gfx IB if it hasn't been flushed yet. */
+	rctx = rfence->gfx_unflushed.ctx;
+	if (rctx && rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
+		rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+
+		if (!timeout)
+			return false;
+
+		/* Recompute the timeout after all that. */
+		if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+			int64_t time = os_time_get_nano();
+			timeout = abs_timeout > time ? abs_timeout - time : 0;
+		}
+	}
+
 	return rws->fence_wait(rws, rfence->gfx, timeout);
 }
 
-- 
2.7.4