[PATCH 6/7] drm/radeon: allow asynchronous waiting on foreign fences
Maarten Lankhorst
maarten.lankhorst at canonical.com
Thu Sep 4 04:42:41 PDT 2014
Use the semaphore mechanism to make this happen, this uses signaling
from the cpu instead of signaling by the gpu.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst at canonical.com>
---
drivers/gpu/drm/radeon/radeon.h | 17 ++-
drivers/gpu/drm/radeon/radeon_cs.c | 30 ++---
drivers/gpu/drm/radeon/radeon_fence.c | 13 ++-
drivers/gpu/drm/radeon/radeon_semaphore.c | 184 ++++++++++++++++++++++++++++++
4 files changed, 221 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index dddb2b7dd752..cd18fa7f801c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -359,6 +359,11 @@ struct radeon_fence_driver {
struct delayed_work lockup_work;
};
+struct radeon_fence_cb {
+ struct fence_cb base;
+ struct fence *fence;
+};
+
struct radeon_fence {
struct fence base;
@@ -368,6 +373,10 @@ struct radeon_fence {
unsigned ring;
wait_queue_t fence_wake;
+
+ atomic_t num_cpu_cbs;
+ struct radeon_fence_cb *cpu_cbs;
+ uint32_t *cpu_sema;
};
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -574,9 +583,11 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
*/
struct radeon_semaphore {
struct radeon_sa_bo *sa_bo;
- signed waiters;
+ signed waiters, cpu_waiters, cpu_waiters_max;
uint64_t gpu_addr;
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
+ uint32_t *cpu_sema;
+ struct radeon_fence_cb *cpu_cbs;
};
int radeon_semaphore_create(struct radeon_device *rdev,
@@ -587,6 +598,10 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
struct radeon_fence *fence);
+int radeon_semaphore_sync_obj(struct radeon_device *rdev,
+ struct radeon_semaphore *semaphore,
+ struct reservation_object *resv);
+
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
int waiting_ring);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 8ad4e2cfae15..b141f5bd029d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -250,32 +250,16 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
{
- int i;
-
- for (i = 0; i < p->nrelocs; i++) {
- struct reservation_object *resv;
- struct fence *fence;
- struct radeon_fence *rfence;
- int r;
+ int i, ret = 0;
+ for (i = 0; !ret && i < p->nrelocs; i++) {
if (!p->relocs[i].robj)
continue;
- resv = p->relocs[i].robj->tbo.resv;
- fence = reservation_object_get_excl(resv);
- if (!fence)
- continue;
- rfence = to_radeon_fence(fence);
- if (!rfence || rfence->rdev != p->rdev) {
- r = fence_wait(fence, true);
- if (r)
- return r;
- continue;
- }
-
- radeon_semaphore_sync_to(p->ib.semaphore, rfence);
+ ret = radeon_semaphore_sync_obj(p->rdev, p->ib.semaphore,
+ p->relocs[i].robj->tbo.resv);
}
- return 0;
+ return ret;
}
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
@@ -442,6 +426,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
*/
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
+ /* must be called with all reservation_objects still held */
+ radeon_semaphore_free(parser->rdev, &parser->ib.semaphore,
+ parser->ib.fence);
+
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
&parser->ib.fence->base);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 0262fe2580d2..7687a7f8f41b 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -142,6 +142,8 @@ int radeon_fence_emit(struct radeon_device *rdev,
(*fence)->ring = ring;
fence_init(&(*fence)->base, &radeon_fence_ops,
&rdev->fence_queue.lock, rdev->fence_context + ring, seq);
+ (*fence)->cpu_cbs = NULL;
+ (*fence)->cpu_sema = NULL;
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
radeon_fence_schedule_check(rdev, ring);
@@ -1057,11 +1059,20 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr,
return t;
}
+static void __radeon_fence_destroy(struct fence *f)
+{
+ struct radeon_fence *fence = to_radeon_fence(f);
+
+ WARN_ON(fence->cpu_cbs);
+ kfree(fence->cpu_cbs);
+ fence_free(f);
+}
+
const struct fence_ops radeon_fence_ops = {
.get_driver_name = radeon_fence_get_driver_name,
.get_timeline_name = radeon_fence_get_timeline_name,
.enable_signaling = radeon_fence_enable_signaling,
.signaled = radeon_fence_is_signaled,
.wait = radeon_fence_default_wait,
- .release = NULL,
+ .release = __radeon_fence_destroy,
};
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 56d9fd66d8ae..2e71463d11c5 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -30,6 +30,7 @@
#include <drm/drmP.h>
#include "radeon.h"
#include "radeon_trace.h"
+#include <trace/events/fence.h>
int radeon_semaphore_create(struct radeon_device *rdev,
struct radeon_semaphore **semaphore)
@@ -49,7 +50,11 @@ int radeon_semaphore_create(struct radeon_device *rdev,
return r;
}
(*semaphore)->waiters = 0;
+ (*semaphore)->cpu_waiters = 0;
+ (*semaphore)->cpu_waiters_max = 0;
(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
+ (*semaphore)->cpu_sema = NULL;
+ (*semaphore)->cpu_cbs = NULL;
cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
for (i = 0; i < RADEON_NUM_SYNCS; ++i)
@@ -115,6 +120,101 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
}
+int radeon_semaphore_reserve_cpu_waiters(struct radeon_semaphore *semaphore, int add)
+{
+ int max = 4;
+ struct radeon_fence_cb *cpu_cbs;
+
+ if (semaphore->cpu_waiters + add <= semaphore->cpu_waiters_max)
+ return 0;
+
+ if (semaphore->cpu_waiters_max)
+ max = semaphore->cpu_waiters_max * 2;
+
+ cpu_cbs = krealloc(semaphore->cpu_cbs, max * sizeof(*cpu_cbs), GFP_KERNEL);
+ if (!cpu_cbs)
+ return -ENOMEM;
+ semaphore->cpu_cbs = cpu_cbs;
+ semaphore->cpu_waiters_max = max;
+ return 0;
+}
+
+static void radeon_semaphore_add_cpu_cb(struct radeon_semaphore *semaphore,
+ struct fence *fence)
+{
+ unsigned i;
+ struct radeon_fence_cb *empty = NULL;
+
+ for (i = 0; i < semaphore->cpu_waiters; ++i) {
+ struct fence *other = semaphore->cpu_cbs[i].fence;
+
+ if (!other)
+ empty = &semaphore->cpu_cbs[i];
+ else if (other->context == fence->context) {
+ semaphore->cpu_cbs[i].fence = fence_later(other, fence);
+ return;
+ }
+ }
+
+ if (!empty)
+ empty = &semaphore->cpu_cbs[semaphore->cpu_waiters++];
+
+ empty->fence = fence;
+ return;
+}
+
+/**
+ * radeon_semaphore_sync_obj - use the semaphore to sync to a bo
+ *
+ * @semaphore: semaphore object to add fence to
+ * @resv: the reservation_object to sync to
+ *
+ * Sync the reservation_object using this semaphore.
+ *
+ * radeon_semaphore_free must be called with all reservation_object locks
+ * still held!!!
+ */
+int radeon_semaphore_sync_obj(struct radeon_device *rdev,
+ struct radeon_semaphore *semaphore,
+ struct reservation_object *resv)
+{
+ struct fence *fence;
+ struct radeon_fence *rfence;
+ struct reservation_object_list *fobj;
+ int ret, i;
+
+ fobj = reservation_object_get_list(resv);
+ if (fobj && fobj->shared_count) {
+ ret = radeon_semaphore_reserve_cpu_waiters(semaphore, fobj->shared_count);
+ if (ret)
+ return ret;
+ for (i = 0; i < fobj->shared_count; ++i) {
+ fence = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(resv));
+
+ radeon_semaphore_add_cpu_cb(semaphore, fence);
+ }
+ return 0;
+ }
+
+ fence = reservation_object_get_excl(resv);
+ if (!fence)
+ return 0;
+
+ rfence = to_radeon_fence(fence);
+ if (rfence && rfence->rdev == rdev) {
+ struct radeon_fence *other = semaphore->sync_to[rfence->ring];
+
+ semaphore->sync_to[rfence->ring] =
+ radeon_fence_later(rfence, other);
+ return 0;
+ }
+ ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 1);
+ if (!ret)
+ radeon_semaphore_add_cpu_cb(semaphore, fence);
+ return ret;
+}
+
/**
* radeon_semaphore_sync_rings - sync ring to all registered fences
*
@@ -124,6 +224,8 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
*
* Ensure that all registered fences are signaled before letting
* the ring continue. The caller must hold the ring lock.
+ *
+ * This function may only be called once on a semaphore.
*/
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
@@ -132,6 +234,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
unsigned count = 0;
int i, r;
+ if (semaphore->cpu_waiters) {
+ /* allocate enough space for sync command */
+ if (radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
+ semaphore->cpu_sema = radeon_sa_bo_cpu_addr(semaphore->sa_bo);
+ semaphore->gpu_addr += 8;
+ ++count;
+ } else
+ semaphore->cpu_waiters = -1;
+ }
+
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = semaphore->sync_to[i];
@@ -188,6 +300,68 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
return 0;
}
+static void radeon_semaphore_cpu_trigger(struct fence *other_fence,
+ struct fence_cb *fence_cb)
+{
+ struct radeon_fence_cb *cb = (struct radeon_fence_cb*)fence_cb;
+ struct radeon_fence *fence = (struct radeon_fence *)cb->fence;
+
+#ifdef CONFIG_FENCE_TRACE
+ int ret = atomic_dec_return(&fence->num_cpu_cbs);
+
+ if (ret)
+ FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n",
+ ret, other_fence->context, other_fence->seqno);
+ else
+#else
+ if (atomic_dec_and_test(&fence->num_cpu_cbs))
+#endif
+ {
+ FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n",
+ other_fence->context, other_fence->seqno);
+
+ *fence->cpu_sema = ~0;
+
+ kfree(fence->cpu_cbs);
+ fence->cpu_cbs = NULL;
+ }
+}
+
+static void radeon_semaphore_arm_cpu_cbs(struct radeon_semaphore *semaphore,
+ struct radeon_fence *fence)
+{
+ unsigned i, skipped = 0;
+
+ fence->cpu_cbs = semaphore->cpu_cbs;
+ fence->cpu_sema = semaphore->cpu_sema;
+ atomic_set(&fence->num_cpu_cbs, semaphore->cpu_waiters);
+
+ for (i = 0; i < semaphore->cpu_waiters; ++i) {
+ struct fence *other = fence->cpu_cbs[i].fence;
+
+ if (other) {
+ fence->cpu_cbs[i].fence = &fence->base;
+ trace_fence_annotate_wait_on(&fence->base, other);
+
+ FENCE_TRACE(&fence->base, "queued wait on %u#%u\n",
+ other->context, other->seqno);
+
+ if (!fence_add_callback(other, &fence->cpu_cbs[i].base,
+ radeon_semaphore_cpu_trigger))
+ continue;
+ }
+ skipped++;
+ }
+
+ if (skipped && atomic_sub_and_test(skipped, &fence->num_cpu_cbs)) {
+ FENCE_TRACE(&fence->base, "No triggers, starting..\n");
+
+ *fence->cpu_sema = ~0;
+ kfree(fence->cpu_cbs);
+ fence->cpu_cbs = NULL;
+ }
+}
+
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence)
@@ -195,6 +369,16 @@ void radeon_semaphore_free(struct radeon_device *rdev,
if (semaphore == NULL || *semaphore == NULL) {
return;
}
+ if ((*semaphore)->cpu_cbs) {
+ (*semaphore)->waiters--;
+
+ if (!fence) {
+ *(*semaphore)->cpu_sema = ~0U;
+ kfree((*semaphore)->cpu_cbs);
+ } else
+ radeon_semaphore_arm_cpu_cbs(*semaphore, fence);
+ }
+
if ((*semaphore)->waiters > 0) {
dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", *semaphore);
--
2.0.4
More information about the dri-devel
mailing list