[Intel-gfx] [PATCH] [RFC] drm/i915: read-read semaphore optimization
Ben Widawsky
ben at bwidawsk.net
Tue Dec 13 04:52:08 CET 2011
Since we don't differentiate on the different GPU read domains, it
should be safe to allow back to back reads to occur without issuing a
wait (or flush in the non-semaphore case).
This has the unfortunate side effect that we need to keep track of all
the outstanding buffer reads so that we can synchronize on a write, to
another ring (since we don't know which read finishes first). In other
words, the code is quite simple for two rings, but gets more tricky for
> 2 rings.
Here is a picture of the solution to the above problem
Ring 0 Ring 1 Ring 2
batch 0 batch 1 batch 2
read buffer A read buffer A wait batch 0
wait batch 1
write buffer A
This code is really untested. I'm hoping for some feedback if this is
worth cleaning up, and testing more thoroughly.
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem.c | 1 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 32 ++++++++++++++++++++++-----
3 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a9c1b9..d59bf20 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -852,6 +852,7 @@ struct drm_i915_gem_object {
/** Breadcrumb of last rendering to the buffer. */
uint32_t last_rendering_seqno;
+ uint32_t lrs_ro[I915_NUM_RINGS];
struct intel_ring_buffer *ring;
/** Breadcrumb of last fenced GPU access to the buffer. */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed0b68f..64cc804 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1561,6 +1561,7 @@ i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
{
list_del_init(&obj->ring_list);
obj->last_rendering_seqno = 0;
+ memset(obj->lrs_ro, 0, sizeof(obj->lrs_ro));
}
static void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 42a6529..78090a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -752,21 +752,20 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
{
struct intel_ring_buffer *from = obj->ring;
u32 seqno;
- int ret, idx;
+ int ret, idx, i;
if (from == NULL || to == from)
return 0;
- /* If the object isn't being written to, and the object isn't moving
- * into a GPU write domain, it doesn't need to sync.
- */
if (obj->pending_gpu_write == 0 &&
- obj->base.pending_write_domain == 0)
+ obj->base.pending_write_domain == 0) {
+ obj->lrs_ro[ffs(to->id) - 1] = obj->last_rendering_seqno;
return 0;
+ }
/* XXX gpu semaphores are implicated in various hard hangs on SNB */
if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
- return i915_gem_object_wait_rendering(obj);
+ ret = i915_gem_object_wait_rendering(obj);
idx = intel_ring_sync_index(from, to);
@@ -792,6 +791,27 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
from->sync_seqno[idx] = seqno;
+ /* We must be doing a write to an object which may be in queue to be
+ * read by some other command. If we skipped synchronizing because of a read
+ * only request, we must check and add a sync point now. The reason we
+ * must sync on the write is because we have more than 2 rings. If two
+ * rings have outstanding reads (without sync), we can't let the write
+ * occur until both other have completed.
+ */
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ uint32_t ro_seqno = obj->lrs_ro[i];
+ if (ro_seqno != 0) {
+ obj->lrs_ro[i] = 0;
+
+ if (i915_seqno_passed(from->get_seqno(from), ro_seqno))
+ continue;
+
+ ret = to->sync_to(to, from, ro_seqno - 1);
+ if (ret)
+ return ret;
+ }
+ }
+
return to->sync_to(to, from, seqno - 1);
}
--
1.7.8
More information about the Intel-gfx
mailing list