[PATCH] [r600g] Use new kernel interface to wait for fences
Simon Farnsworth
simon.farnsworth at onelan.co.uk
Tue Jan 31 05:17:24 PST 2012
Instead of busywaiting for the GPU to finish a fence, use the new kernel
interface to wait for fence completion.
This code needs completion - in particular, we should fall back to
busywaiting (using the nokernel function that's in radeon_drm_bo.c) if the
kernel doesn't support the new interface.
Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---
src/gallium/drivers/r600/r600_hw_context.c | 2 +-
src/gallium/drivers/r600/r600_pipe.c | 12 +++------
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 30 +++++++++++++++++++++++++
src/gallium/winsys/radeon/drm/radeon_winsys.h | 16 +++++++++++++
4 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb8e6d..35a57a7 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1618,7 +1618,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */
/* DATA_SEL | INT_EN | ADDRESS_HI */
- ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
+ ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (2 << 24) | ((va >> 32UL) & 0xFF);
ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c38fbc5..12c5bf5 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -595,7 +595,6 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
struct r600_fence *rfence = (struct r600_fence*)fence;
int64_t start_time = 0;
- unsigned spins = 0;
if (timeout != PIPE_TIMEOUT_INFINITE) {
start_time = os_time_get();
@@ -605,13 +604,10 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
}
while (rscreen->fences.data[rfence->index] == 0) {
- if (++spins % 256)
- continue;
-#ifdef PIPE_OS_UNIX
- sched_yield();
-#else
- os_time_sleep(10);
-#endif
+ rscreen->ws->buffer_wait_fence(rscreen->fences.bo->buf,
+ rfence->index << 2,
+ 0,
+ timeout);
if (timeout != PIPE_TIMEOUT_INFINITE &&
os_time_get() - start_time >= timeout) {
return FALSE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 143dcf9..b552c11 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -879,6 +879,35 @@ static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
return bo->va;
}
+/* No kernel support for doing this faster - just spin */
+static void radeon_winsys_bo_wait_fence_nokernel(struct pb_buffer *buf,
+ unsigned offset,
+ uint32_t value,
+ uint64_t timeout)
+{
+#ifdef PIPE_OS_UNIX
+ sched_yield();
+#else
+ os_time_sleep(10);
+#endif
+}
+
+static void radeon_winsys_bo_wait_fence(struct pb_buffer *_buf,
+ unsigned offset,
+ uint32_t value,
+ uint64_t timeout)
+{
+ struct radeon_bo *bo = get_radeon_bo(_buf);
+ struct drm_radeon_gem_wait_user_fence args;
+ memset(&args, 0, sizeof(args));
+ args.handle = bo->handle;
+ args.offset = offset;
+ args.value = value;
+ args.timeout_usec = timeout;
+ while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_USER_FENCE,
+ &args, sizeof(args)) == -EBUSY);
+}
+
void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
{
ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -892,4 +921,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
+ ws->base.buffer_wait_fence = radeon_winsys_bo_wait_fence;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index e462e86..869961f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -264,6 +264,22 @@ struct radeon_winsys {
*/
uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
+ /**
+ * Wait until a fence (EVENT_WRITE_EOP typically) has had a chance to
+ * write to a buffer. NB: there is no guarantee that the GPU has written
+ * to the buffer when this call returns, merely that it has had an
+ * opportunity to do so.
+ *
+ * \param buf A winsys buffer object
+ * \param offset Offset in bytes within the buffer that you expect to see changed - must be uint32_t aligned
+ * \param value The current value stored at offset
+ * \param timeout The maximum wait time, in microseconds
+ */
+ void (*buffer_wait_fence)(struct pb_buffer *buf,
+ unsigned offset,
+ uint32_t value,
+ uint64_t timeout);
+
/**************************************************************************
* Command submission.
*
--
1.7.6.4
More information about the dri-devel
mailing list