[Mesa-dev] [PATCH 3/3] gallium/radeon: use unflushed fences for deferred flushes (v2)
Nicolai Hähnle
nhaehnle at gmail.com
Tue Aug 9 08:54:29 UTC 2016
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 06.08.2016 17:32, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> +23% Bioshock Infinite performance.
>
> v2: - use the new fence_finish interface
> - allow deferred fences with multiple contexts
> - clear the ctx pointer after a deferred flush
> ---
> src/gallium/drivers/radeon/r600_pipe_common.c | 44 ++++++++++++++++++++++++++-
> 1 file changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 119fdf5..1c56e6e 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -40,20 +40,26 @@
> #include <sys/utsname.h>
>
> #ifndef HAVE_LLVM
> #define HAVE_LLVM 0
> #endif
>
> struct r600_multi_fence {
> struct pipe_reference reference;
> struct pipe_fence_handle *gfx;
> struct pipe_fence_handle *sdma;
> +
> + /* If the context wasn't flushed at fence creation, this is non-NULL. */
> + struct {
> + struct r600_common_context *ctx;
> + unsigned ib_index;
> + } gfx_unflushed;
> };
>
> /*
> * shader binary helpers.
> */
> void radeon_shader_binary_init(struct radeon_shader_binary *b)
> {
> memset(b, 0, sizeof(*b));
> }
>
> @@ -255,42 +261,59 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
>
> static void r600_flush_from_st(struct pipe_context *ctx,
> struct pipe_fence_handle **fence,
> unsigned flags)
> {
> struct pipe_screen *screen = ctx->screen;
> struct r600_common_context *rctx = (struct r600_common_context *)ctx;
> unsigned rflags = 0;
> struct pipe_fence_handle *gfx_fence = NULL;
> struct pipe_fence_handle *sdma_fence = NULL;
> + bool deferred_fence = false;
>
> if (flags & PIPE_FLUSH_END_OF_FRAME)
> rflags |= RADEON_FLUSH_END_OF_FRAME;
> if (flags & PIPE_FLUSH_DEFERRED)
> rflags |= RADEON_FLUSH_ASYNC;
>
> if (rctx->dma.cs) {
> rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
> }
> - rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
> +
> + /* Instead of flushing, create a deferred fence. Constraints:
> + * - The state tracker must allow a deferred flush.
> + * - The state tracker must request a fence.
> + * Thread safety in fence_finish must be ensured by the state tracker.
> + */
> + if (flags & PIPE_FLUSH_DEFERRED && fence) {
> + gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
> + deferred_fence = true;
> + } else {
> + rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
> + }
>
> /* Both engines can signal out of order, so we need to keep both fences. */
> if (gfx_fence || sdma_fence) {
> struct r600_multi_fence *multi_fence =
> CALLOC_STRUCT(r600_multi_fence);
> if (!multi_fence)
> return;
>
> multi_fence->reference.count = 1;
> multi_fence->gfx = gfx_fence;
> multi_fence->sdma = sdma_fence;
>
> + if (deferred_fence) {
> + multi_fence->gfx_unflushed.ctx = rctx;
> + multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
> + }
> +
> screen->fence_reference(screen, fence, NULL);
> *fence = (struct pipe_fence_handle*)multi_fence;
> }
> }
>
> static void r600_flush_dma_ring(void *ctx, unsigned flags,
> struct pipe_fence_handle **fence)
> {
> struct r600_common_context *rctx = (struct r600_common_context *)ctx;
> struct radeon_winsys_cs *cs = rctx->dma.cs;
> @@ -953,36 +976,55 @@ static void r600_fence_reference(struct pipe_screen *screen,
> *rdst = rsrc;
> }
>
> static boolean r600_fence_finish(struct pipe_screen *screen,
> struct pipe_context *ctx,
> struct pipe_fence_handle *fence,
> uint64_t timeout)
> {
> struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
> struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
> + struct r600_common_context *rctx =
> + ctx ? (struct r600_common_context*)ctx : NULL;
> int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
>
> if (rfence->sdma) {
> if (!rws->fence_wait(rws, rfence->sdma, timeout))
> return false;
>
> /* Recompute the timeout after waiting. */
> if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
> int64_t time = os_time_get_nano();
> timeout = abs_timeout > time ? abs_timeout - time : 0;
> }
> }
>
> if (!rfence->gfx)
> return true;
>
> + /* Flush the gfx IB if it hasn't been flushed yet. */
> + if (rctx &&
> + rfence->gfx_unflushed.ctx == rctx &&
> + rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
> + rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
> + rfence->gfx_unflushed.ctx = NULL;
> +
> + if (!timeout)
> + return false;
> +
> + /* Recompute the timeout after all that. */
> + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
> + int64_t time = os_time_get_nano();
> + timeout = abs_timeout > time ? abs_timeout - time : 0;
> + }
> + }
> +
> return rws->fence_wait(rws, rfence->gfx, timeout);
> }
>
> static void r600_query_memory_info(struct pipe_screen *screen,
> struct pipe_memory_info *info)
> {
> struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
> struct radeon_winsys *ws = rscreen->ws;
> unsigned vram_usage, gtt_usage;
>
>
More information about the mesa-dev
mailing list