[Mesa-dev] [PATCH 7/8] radeonsi: avoid syncing the driver thread in si_fence_finish
Nicolai Hähnle
nhaehnle at gmail.com
Mon Nov 13 14:03:35 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
It is really only required when we need to flush for deferred fences.
---
src/gallium/auxiliary/util/u_threaded_context.h | 8 +++
src/gallium/drivers/radeonsi/si_fence.c | 75 +++++++++++++------------
src/gallium/drivers/radeonsi/si_hw_context.c | 3 +
3 files changed, 49 insertions(+), 37 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index ea815ed5e03..e1ba73607db 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -399,20 +399,28 @@ threaded_query(struct pipe_query *q)
{
return (struct threaded_query*)q;
}
static inline struct threaded_transfer *
threaded_transfer(struct pipe_transfer *transfer)
{
return (struct threaded_transfer*)transfer;
}
+static inline struct pipe_context *
+threaded_context_unwrap_unsync(struct pipe_context *pipe)
+{
+ if (!pipe || !pipe->priv)
+ return pipe;
+ return (struct pipe_context*)pipe->priv;
+}
+
static inline void
tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst,
struct tc_unflushed_batch_token *src)
{
if (pipe_reference((struct pipe_reference *)*dst, (struct pipe_reference *)src))
free(*dst);
*dst = src;
}
#endif
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index ff1800ce785..5163d652c83 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -181,26 +181,22 @@ static void si_fine_fence_set(struct si_context *ctx,
}
}
static boolean si_fence_finish(struct pipe_screen *screen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
- struct r600_common_context *rctx;
int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
- ctx = threaded_context_unwrap_sync(ctx);
- rctx = ctx ? (struct r600_common_context*)ctx : NULL;
-
if (!util_queue_fence_is_signalled(&rfence->ready)) {
if (!timeout)
return false;
if (rfence->tc_token) {
/* Ensure that si_flush_from_st will be called for
* this fence, but only if we're in the API thread
* where the context is current.
*
* Note that the batch containing the flush may already
@@ -238,55 +234,60 @@ static boolean si_fence_finish(struct pipe_screen *screen,
return true;
if (rfence->fine.buf &&
si_fine_fence_signaled(rws, &rfence->fine)) {
rws->fence_reference(&rfence->gfx, NULL);
r600_resource_reference(&rfence->fine.buf, NULL);
return true;
}
/* Flush the gfx IB if it hasn't been flushed yet. */
- if (rctx &&
- rfence->gfx_unflushed.ctx == rctx &&
- rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
- /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
- * spec says:
- *
- * "If the sync object being blocked upon will not be
- * signaled in finite time (for example, by an associated
- * fence command issued previously, but not yet flushed to
- * the graphics pipeline), then ClientWaitSync may hang
- * forever. To help prevent this behavior, if
- * ClientWaitSync is called and all of the following are
- * true:
- *
- * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
- * * sync is unsignaled when ClientWaitSync is called,
- * * and the calls to ClientWaitSync and FenceSync were
- * issued from the same context,
- *
- * then the GL will behave as if the equivalent of Flush
- * were inserted immediately after the creation of sync."
- *
- * This means we need to flush for such fences even when we're
- * not going to wait.
- */
- rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
- rfence->gfx_unflushed.ctx = NULL;
+ if (ctx && rfence->gfx_unflushed.ctx) {
+ struct si_context *sctx;
+
+ sctx = (struct si_context *)threaded_context_unwrap_unsync(ctx);
+ if (rfence->gfx_unflushed.ctx == &sctx->b &&
+ rfence->gfx_unflushed.ib_index == sctx->b.num_gfx_cs_flushes) {
+ /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
+ * spec says:
+ *
+ * "If the sync object being blocked upon will not be
+ * signaled in finite time (for example, by an associated
+ * fence command issued previously, but not yet flushed to
+ * the graphics pipeline), then ClientWaitSync may hang
+ * forever. To help prevent this behavior, if
+ * ClientWaitSync is called and all of the following are
+ * true:
+ *
+ * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
+ * * sync is unsignaled when ClientWaitSync is called,
+ * * and the calls to ClientWaitSync and FenceSync were
+ * issued from the same context,
+ *
+ * then the GL will behave as if the equivalent of Flush
+ * were inserted immediately after the creation of sync."
+ *
+ * This means we need to flush for such fences even when we're
+ * not going to wait.
+ */
+ threaded_context_unwrap_sync(ctx);
+ sctx->b.gfx.flush(&sctx->b, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+ rfence->gfx_unflushed.ctx = NULL;
- if (!timeout)
- return false;
+ if (!timeout)
+ return false;
- /* Recompute the timeout after all that. */
- if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
- int64_t time = os_time_get_nano();
- timeout = abs_timeout > time ? abs_timeout - time : 0;
+ /* Recompute the timeout after all that. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
}
}
if (rws->fence_wait(rws, rfence->gfx, timeout))
return true;
/* Re-check in case the GPU is slow or hangs, but the commands before
* the fine-grained fence have completed. */
if (rfence->fine.buf &&
si_fine_fence_signaled(rws, &rfence->fine))
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 15234d72725..1903cf8c978 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -122,20 +122,23 @@ void si_context_gfx_flush(void *context, unsigned flags,
/* Save the IB for debug contexts. */
si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
ctx->current_saved_cs->flushed = true;
ctx->current_saved_cs->time_flush = os_time_get_nano();
}
/* Flush the CS. */
ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
if (fence)
ws->fence_reference(fence, ctx->b.last_gfx_fence);
+
+ /* This must be after cs_flush returns, since the context's API
+ * thread can concurrently read this value in si_fence_finish. */
ctx->b.num_gfx_cs_flushes++;
/* Check VM faults if needed. */
if (ctx->screen->b.debug_flags & DBG(CHECK_VM)) {
/* Use conservative timeout 800ms, after which we won't wait any
* longer and assume the GPU is hung.
*/
ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX);
--
2.11.0
More information about the mesa-dev
mailing list