[PATCH] [r600g] Use new kernel interface to wait for fences

Simon Farnsworth simon.farnsworth at onelan.co.uk
Tue Jan 31 05:17:24 PST 2012


Instead of busywaiting for the GPU to finish a fence, use the new kernel
interface to wait for fence completion.

This code needs completion - in particular, we should fall back to
busywaiting (using the nokernel function that's in radeon_drm_bo.c) if the
kernel doesn't support the new interface.

Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---
 src/gallium/drivers/r600/r600_hw_context.c    |    2 +-
 src/gallium/drivers/r600/r600_pipe.c          |   12 +++------
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |   30 +++++++++++++++++++++++++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |   16 +++++++++++++
 4 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb8e6d..35a57a7 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1618,7 +1618,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
 	ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
 	/* DATA_SEL | INT_EN | ADDRESS_HI */
-	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
+	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (2 << 24) | ((va >> 32UL) & 0xFF);
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c38fbc5..12c5bf5 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -595,7 +595,6 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	struct r600_screen *rscreen = (struct r600_screen *)pscreen;
 	struct r600_fence *rfence = (struct r600_fence*)fence;
 	int64_t start_time = 0;
-	unsigned spins = 0;
 
 	if (timeout != PIPE_TIMEOUT_INFINITE) {
 		start_time = os_time_get();
@@ -605,13 +604,10 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	}
 
 	while (rscreen->fences.data[rfence->index] == 0) {
-		if (++spins % 256)
-			continue;
-#ifdef PIPE_OS_UNIX
-		sched_yield();
-#else
-		os_time_sleep(10);
-#endif
+		rscreen->ws->buffer_wait_fence(rscreen->fences.bo->buf,
+					       rfence->index << 2,
+					       0,
+					       timeout);
 		if (timeout != PIPE_TIMEOUT_INFINITE &&
 		    os_time_get() - start_time >= timeout) {
 			return FALSE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 143dcf9..b552c11 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -879,6 +879,35 @@ static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
     return bo->va;
 }
 
+/* No kernel support for doing this faster - just spin */
+static void radeon_winsys_bo_wait_fence_nokernel(struct pb_buffer *buf,
+						 unsigned offset,
+						 uint32_t value,
+						 uint64_t timeout)
+{
+#ifdef PIPE_OS_UNIX
+    sched_yield();
+#else
+    os_time_sleep(10);
+#endif
+}
+
+static void radeon_winsys_bo_wait_fence(struct pb_buffer *_buf,
+					unsigned offset,
+					uint32_t value,
+					uint64_t timeout)
+{
+    struct radeon_bo *bo = get_radeon_bo(_buf);
+    struct drm_radeon_gem_wait_user_fence args;
+    memset(&args, 0, sizeof(args));
+    args.handle = bo->handle;
+    args.offset = offset;
+    args.value = value;
+    args.timeout_usec = timeout;
+    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_USER_FENCE,
+                               &args, sizeof(args)) == -EBUSY);
+}
+
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -892,4 +921,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
+    ws->base.buffer_wait_fence = radeon_winsys_bo_wait_fence;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index e462e86..869961f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -264,6 +264,22 @@ struct radeon_winsys {
      */
     uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
 
+    /**
+     * Wait until a fence (EVENT_WRITE_EOP typically) has had a chance to
+     * write to a buffer. NB: there is no guarantee that the GPU has written
+     * to the buffer when this call returns, merely that it has had an
+     * opportunity to do so.
+     *
+     * \param buf       A winsys buffer object
+     * \param offset    Offset in bytes within the buffer that you expect to see changed - must be uint32_t aligned
+     * \param value     The current value stored at offset
+     * \param timeout   The maximum wait time, in microseconds
+     */
+    void (*buffer_wait_fence)(struct pb_buffer *buf,
+                              unsigned offset,
+                              uint32_t value,
+                              uint64_t timeout);
+
     /**************************************************************************
      * Command submission.
      *
-- 
1.7.6.4



More information about the dri-devel mailing list