[Mesa-dev] [PATCH 3/3] gallium/radeon: use unflushed fences for deferred flushes (v2)
Marek Olšák
maraeo at gmail.com
Sat Aug 6 15:32:42 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
+23% Bioshock Infinite performance.
v2: - use the new fence_finish interface
- allow deferred fences with multiple contexts
- clear the ctx pointer after a deferred flush
---
src/gallium/drivers/radeon/r600_pipe_common.c | 44 ++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 119fdf5..1c56e6e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -40,20 +40,26 @@
#include <sys/utsname.h>
#ifndef HAVE_LLVM
#define HAVE_LLVM 0
#endif
struct r600_multi_fence {
struct pipe_reference reference;
struct pipe_fence_handle *gfx;
struct pipe_fence_handle *sdma;
+
+ /* If the context wasn't flushed at fence creation, this is non-NULL. */
+ struct {
+ struct r600_common_context *ctx;
+ unsigned ib_index;
+ } gfx_unflushed;
};
/*
* shader binary helpers.
*/
void radeon_shader_binary_init(struct radeon_shader_binary *b)
{
memset(b, 0, sizeof(*b));
}
@@ -255,42 +261,59 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
static void r600_flush_from_st(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
{
struct pipe_screen *screen = ctx->screen;
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned rflags = 0;
struct pipe_fence_handle *gfx_fence = NULL;
struct pipe_fence_handle *sdma_fence = NULL;
+ bool deferred_fence = false;
if (flags & PIPE_FLUSH_END_OF_FRAME)
rflags |= RADEON_FLUSH_END_OF_FRAME;
if (flags & PIPE_FLUSH_DEFERRED)
rflags |= RADEON_FLUSH_ASYNC;
if (rctx->dma.cs) {
rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
}
- rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+ /* Instead of flushing, create a deferred fence. Constraints:
+ * - The state tracker must allow a deferred flush.
+ * - The state tracker must request a fence.
+ * Thread safety in fence_finish must be ensured by the state tracker.
+ */
+ if (flags & PIPE_FLUSH_DEFERRED && fence) {
+ gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+ deferred_fence = true;
+ } else {
+ rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+ }
/* Both engines can signal out of order, so we need to keep both fences. */
if (gfx_fence || sdma_fence) {
struct r600_multi_fence *multi_fence =
CALLOC_STRUCT(r600_multi_fence);
if (!multi_fence)
return;
multi_fence->reference.count = 1;
multi_fence->gfx = gfx_fence;
multi_fence->sdma = sdma_fence;
+ if (deferred_fence) {
+ multi_fence->gfx_unflushed.ctx = rctx;
+ multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
+ }
+
screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle*)multi_fence;
}
}
static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct radeon_winsys_cs *cs = rctx->dma.cs;
@@ -953,36 +976,55 @@ static void r600_fence_reference(struct pipe_screen *screen,
*rdst = rsrc;
}
static boolean r600_fence_finish(struct pipe_screen *screen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+ struct r600_common_context *rctx =
+ ctx ? (struct r600_common_context*)ctx : NULL;
int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
if (rfence->sdma) {
if (!rws->fence_wait(rws, rfence->sdma, timeout))
return false;
/* Recompute the timeout after waiting. */
if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
int64_t time = os_time_get_nano();
timeout = abs_timeout > time ? abs_timeout - time : 0;
}
}
if (!rfence->gfx)
return true;
+ /* Flush the gfx IB if it hasn't been flushed yet. */
+ if (rctx &&
+ rfence->gfx_unflushed.ctx == rctx &&
+ rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
+ rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+ rfence->gfx_unflushed.ctx = NULL;
+
+ if (!timeout)
+ return false;
+
+ /* Recompute the timeout after all that. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
return rws->fence_wait(rws, rfence->gfx, timeout);
}
static void r600_query_memory_info(struct pipe_screen *screen,
struct pipe_memory_info *info)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_winsys *ws = rscreen->ws;
unsigned vram_usage, gtt_usage;
--
2.7.4
More information about the mesa-dev
mailing list