[Intel-gfx] [PATCH 17/22] drm/i915: Protect request retirement with timeline->mutex
Chris Wilson
chris at chris-wilson.co.uk
Tue Aug 6 09:05:30 UTC 2019
Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 183 ++++++++++--------
drivers/gpu/drm/i915/gt/intel_context.h | 18 +-
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 -
drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 -
drivers/gpu/drm/i915/gt/intel_gt.c | 2 -
drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 -
drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 19 +-
drivers/gpu/drm/i915/gt/mock_engine.c | 1 -
drivers/gpu/drm/i915/gt/selftest_context.c | 9 +-
drivers/gpu/drm/i915/i915_request.c | 151 +++++++--------
drivers/gpu/drm/i915/i915_request.h | 3 -
12 files changed, 208 insertions(+), 185 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cee37322b953..db9978227ef2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0;
}
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
- struct i915_request *rq;
-
- /*
- * Completely unscientific finger-in-the-air estimates for suitable
- * maximum user request size (to avoid blocking) and then backoff.
- */
- if (intel_ring_update_space(ring) >= PAGE_SIZE)
- return NULL;
-
- /*
- * Find a request that after waiting upon, there will be at least half
- * the ring available. The hysteresis allows us to compete for the
- * shared ring and should mean that we sleep less often prior to
- * claiming our resources, but not so long that the ring completely
- * drains before we can submit our next request.
- */
- list_for_each_entry(rq, &ring->request_list, ring_link) {
- if (__intel_ring_space(rq->postfix,
- ring->emit, ring->size) > ring->size / 2)
- break;
- }
- if (&rq->ring_link == &ring->request_list)
- return NULL; /* weird, we will check again later for real */
-
- return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
- struct i915_request *rq;
- int ret = 0;
-
- /*
- * Apply a light amount of backpressure to prevent excessive hogs
- * from blocking waiting for space whilst holding struct_mutex and
- * keeping all of their resources pinned.
- */
-
- rq = __eb_wait_for_ring(eb->context->ring);
- if (rq) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
-
- if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0)
- ret = -EINTR;
-
- i915_request_put(rq);
-
- mutex_lock(&eb->i915->drm.struct_mutex);
- }
-
- return ret;
-}
-
static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2128,10 +2071,75 @@ static const enum intel_engine_id user_ring_map[] = {
[I915_EXEC_VEBOX] = VECS0
};
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+ struct intel_ring *ring = ce->ring;
+ struct intel_timeline *tl = ce->timeline;
+ struct i915_request *rq;
+
+ /*
+ * Completely unscientific finger-in-the-air estimates for suitable
+ * maximum user request size (to avoid blocking) and then backoff.
+ */
+ if (intel_ring_update_space(ring) >= PAGE_SIZE)
+ return NULL;
+
+ /*
+ * Find a request that after waiting upon, there will be at least half
+ * the ring available. The hysteresis allows us to compete for the
+ * shared ring and should mean that we sleep less often prior to
+ * claiming our resources, but not so long that the ring completely
+ * drains before we can submit our next request.
+ */
+ list_for_each_entry(rq, &tl->requests, link) {
+ if (rq->ring != ring)
+ continue;
+
+ if (__intel_ring_space(rq->postfix,
+ ring->emit, ring->size) > ring->size / 2)
+ break;
+ }
+ if (&rq->link == &tl->requests)
+ return NULL; /* weird, we will check again later for real */
+
+ return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
{
int err;
+ if (likely(atomic_inc_not_zero(&ce->pin_count)))
+ return 0;
+
+ err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+ if (err)
+ return err;
+
+ err = __intel_context_do_pin(ce);
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+
+ return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+ if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+ return;
+
+ mutex_lock(&eb->i915->drm.struct_mutex);
+ intel_context_unpin(ce);
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+ int err;
+
/*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
@@ -2145,7 +2153,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = intel_context_pin(ce);
+ err = __eb_pin_context(eb, ce);
if (err)
return err;
@@ -2157,23 +2165,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
* until the timeline is idle, which in turn releases the wakeref
* taken on the engine, and the parent device.
*/
- err = intel_context_timeline_lock(ce);
- if (err)
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
goto err_unpin;
+ }
intel_context_enter(ce);
- intel_context_timeline_unlock(ce);
+ rq = eb_throttle(ce);
+
+ intel_context_timeline_unlock(tl);
+
+ if (rq) {
+ if (i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ err = -EINTR;
+ goto err_exit;
+ }
+
+ i915_request_put(rq);
+ }
eb->engine = ce->engine;
eb->context = ce;
return 0;
+err_exit:
+ mutex_lock(&tl->mutex);
+ intel_context_exit(ce);
+ intel_context_timeline_unlock(tl);
err_unpin:
- intel_context_unpin(ce);
+ __eb_unpin_context(eb, ce);
return err;
}
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline;
@@ -2182,7 +2210,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
intel_context_exit(ce);
mutex_unlock(&tl->mutex);
- intel_context_unpin(ce);
+ __eb_unpin_context(eb, ce);
}
static unsigned int
@@ -2227,9 +2255,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
}
static int
-eb_select_engine(struct i915_execbuffer *eb,
- struct drm_file *file,
- struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+ struct drm_file *file,
+ struct drm_i915_gem_execbuffer2 *args)
{
struct intel_context *ce;
unsigned int idx;
@@ -2244,7 +2272,7 @@ eb_select_engine(struct i915_execbuffer *eb,
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = eb_pin_context(eb, ce);
+ err = __eb_pin_engine(eb, ce);
intel_context_put(ce);
return err;
@@ -2462,16 +2490,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- err = i915_mutex_lock_interruptible(dev);
- if (err)
- goto err_context;
-
- err = eb_select_engine(&eb, file, args);
+ err = eb_pin_engine(&eb, file, args);
if (unlikely(err))
- goto err_unlock;
+ goto err_context;
- err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
- if (unlikely(err))
+ err = i915_mutex_lock_interruptible(dev);
+ if (err)
goto err_engine;
err = eb_relocate(&eb);
@@ -2629,10 +2653,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err_vma:
if (eb.exec)
eb_release_vmas(&eb);
-err_engine:
- eb_unpin_context(&eb);
-err_unlock:
mutex_unlock(&dev->struct_mutex);
+err_engine:
+ eb_unpin_engine(&eb);
err_context:
i915_gem_context_put(eb.gem_context);
err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 9fa8b588f18e..053a1307ecb4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
#include "i915_active.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_timeline_types.h"
void intel_context_init(struct intel_context *ce,
struct i915_gem_context *ctx,
@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce)
kref_put(&ce->ref, ce->ops->destroy);
}
-static inline int __must_check
+static inline struct intel_timeline *__must_check
intel_context_timeline_lock(struct intel_context *ce)
__acquires(&ce->timeline->mutex)
{
- return mutex_lock_interruptible(&ce->timeline->mutex);
+ struct intel_timeline *tl = ce->timeline;
+ int err;
+
+ err = mutex_lock_interruptible(&tl->mutex);
+ if (err)
+ return ERR_PTR(err);
+
+ return tl;
}
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
- __releases(&ce->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+ __releases(&tl->mutex)
{
- mutex_unlock(&ce->timeline->mutex);
+ mutex_unlock(&tl->mutex);
}
int intel_context_prepare_remote_request(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0f841e1e50ca..47f555dfa288 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -699,7 +699,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
engine->status_page.vma))
goto out_frame;
- INIT_LIST_HEAD(&frame->ring.request_list);
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index eba3111038a9..ff5e3b8cef7c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,9 +69,6 @@ struct intel_ring {
struct i915_vma *vma;
void *vaddr;
- struct list_head request_list;
- struct list_head active_link;
-
/*
* As we have two types of rings, one global to the engine used
* by ringbuffer submission and those that are exclusive to a
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 75d8c5ee6ecb..469115c0a196 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -14,9 +14,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
gt->i915 = i915;
gt->uncore = &i915->uncore;
- INIT_LIST_HEAD(>->active_rings);
INIT_LIST_HEAD(>->closed_vma);
-
spin_lock_init(>->closed_lock);
intel_gt_init_hangcheck(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index ec7baaed391d..7ece62c4666c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -48,8 +48,6 @@ struct intel_gt {
struct list_head hwsp_free_list;
} timelines;
- struct list_head active_rings;
-
struct intel_wakeref wakeref;
atomic_t user_wakeref;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 93682de110c8..31b03fc4390b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1581,6 +1581,7 @@ static void execlists_context_unpin(struct intel_context *ce)
{
i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj);
+ intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index e4fc40c3c4d9..6acf447d7b77 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1248,7 +1248,7 @@ void intel_ring_unpin(struct intel_ring *ring)
return;
/* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->tail);
+ intel_ring_reset(ring, ring->emit);
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
@@ -1309,7 +1309,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
return ERR_PTR(-ENOMEM);
kref_init(&ring->ref);
- INIT_LIST_HEAD(&ring->request_list);
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
@@ -1862,7 +1861,10 @@ static int ring_request_alloc(struct i915_request *request)
return 0;
}
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
+static noinline int
+wait_for_space(struct intel_ring *ring,
+ struct intel_timeline *tl,
+ unsigned int bytes)
{
struct i915_request *target;
long timeout;
@@ -1870,15 +1872,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
if (intel_ring_update_space(ring) >= bytes)
return 0;
- GEM_BUG_ON(list_empty(&ring->request_list));
- list_for_each_entry(target, &ring->request_list, ring_link) {
+ GEM_BUG_ON(list_empty(&tl->requests));
+ list_for_each_entry(target, &tl->requests, link) {
+ if (target->ring != ring)
+ continue;
+
/* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix,
ring->emit, ring->size))
break;
}
- if (WARN_ON(&target->ring_link == &ring->request_list))
+ if (GEM_WARN_ON(&target->link == &tl->requests))
return -ENOSPC;
timeout = i915_request_wait(target,
@@ -1945,7 +1950,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
*/
GEM_BUG_ON(!rq->reserved_space);
- ret = wait_for_space(ring, total_bytes);
+ ret = wait_for_space(ring, rq->timeline, total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 44081ecfbb9b..e65af9f30523 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
ring->vaddr = (void *)(ring + 1);
atomic_set(&ring->pin_count, 1);
- INIT_LIST_HEAD(&ring->request_list);
intel_ring_update_space(ring);
return ring;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 9d3cdcc67b4d..dab0c030e9e4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq)
i915_request_add(rq);
timeout = i915_request_wait(rq, 0, HZ / 10);
- if (timeout < 0)
+ if (timeout < 0) {
err = timeout;
- else
+ } else {
+ mutex_lock(&rq->timeline->mutex);
i915_request_retire_upto(rq);
+ mutex_unlock(&rq->timeline->mutex);
+ }
i915_request_put(rq);
@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline;
int err = 0;
+ mutex_lock(&tl->mutex);
do {
struct i915_request *rq;
long timeout;
@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce)
i915_request_put(rq);
} while (!err);
+ mutex_unlock(&tl->mutex);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 30ae2d630022..7e2d2d22784e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -180,40 +180,6 @@ i915_request_remove_from_client(struct i915_request *request)
spin_unlock(&file_priv->mm.lock);
}
-static void advance_ring(struct i915_request *request)
-{
- struct intel_ring *ring = request->ring;
- unsigned int tail;
-
- /*
- * We know the GPU must have read the request to have
- * sent us the seqno + interrupt, so use the position
- * of tail of the request to update the last known position
- * of the GPU head.
- *
- * Note this requires that we are always called in request
- * completion order.
- */
- GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
- if (list_is_last(&request->ring_link, &ring->request_list)) {
- /*
- * We may race here with execlists resubmitting this request
- * as we retire it. The resubmission will move the ring->tail
- * forwards (to request->wa_tail). We either read the
- * current value that was written to hw, or the value that
- * is just about to be. Either works, if we miss the last two
- * noops - they are safe to be replayed on a reset.
- */
- tail = READ_ONCE(request->tail);
- list_del(&ring->active_link);
- } else {
- tail = request->postfix;
- }
- list_del_init(&request->ring_link);
-
- ring->head = tail;
-}
-
static void free_capture_list(struct i915_request *request)
{
struct i915_capture_list *capture;
@@ -231,7 +197,7 @@ static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
- lockdep_assert_held(&rq->i915->drm.struct_mutex);
+ lockdep_assert_held(&rq->timeline->mutex);
if (!i915_request_completed(rq))
return false;
@@ -243,7 +209,17 @@ static bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
- advance_ring(rq);
+ /*
+ * We know the GPU must have read the request to have
+ * sent us the seqno + interrupt, so use the position
+ * of tail of the request to update the last known position
+ * of the GPU head.
+ *
+ * Note this requires that we are always called in request
+ * completion order.
+ */
+ GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+ rq->ring->head = rq->postfix;
/*
* Walk through the active list, calling retire on each. This allows
@@ -320,7 +296,7 @@ static bool i915_request_retire(struct i915_request *rq)
void i915_request_retire_upto(struct i915_request *rq)
{
- struct intel_ring *ring = rq->ring;
+ struct intel_timeline * const tl = rq->timeline;
struct i915_request *tmp;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -328,15 +304,11 @@ void i915_request_retire_upto(struct i915_request *rq)
rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq));
- lockdep_assert_held(&rq->i915->drm.struct_mutex);
+ lockdep_assert_held(&tl->mutex);
GEM_BUG_ON(!i915_request_completed(rq));
- if (list_empty(&rq->ring_link))
- return;
-
do {
- tmp = list_first_entry(&ring->request_list,
- typeof(*tmp), ring_link);
+ tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
} while (i915_request_retire(tmp) && tmp != rq);
}
@@ -563,29 +535,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
{
struct i915_request *rq, *rn;
- list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
if (!i915_request_retire(rq))
break;
}
static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
{
- struct intel_ring *ring = ce->ring;
struct i915_request *rq;
- if (list_empty(&ring->request_list))
+ if (list_empty(&tl->requests))
goto out;
if (!gfpflags_allow_blocking(gfp))
goto out;
/* Move our oldest request to the slab-cache (if not in use!) */
- rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
i915_request_retire(rq);
rq = kmem_cache_alloc(global.slab_requests,
@@ -594,11 +565,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
return rq;
/* Ratelimit ourselves to prevent oom from malicious clients */
- rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
cond_synchronize_rcu(rq->rcustate);
/* Retire our old requests in the hope that we free some */
- ring_retire_requests(ring);
+ retire_requests(tl);
out:
return kmem_cache_alloc(global.slab_requests, gfp);
@@ -649,7 +620,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq = kmem_cache_alloc(global.slab_requests,
gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!rq)) {
- rq = request_alloc_slow(ce, gfp);
+ rq = request_alloc_slow(tl, gfp);
if (!rq) {
ret = -ENOMEM;
goto err_unreserve;
@@ -741,15 +712,15 @@ struct i915_request *
i915_request_create(struct intel_context *ce)
{
struct i915_request *rq;
- int err;
+ struct intel_timeline *tl;
- err = intel_context_timeline_lock(ce);
- if (err)
- return ERR_PTR(err);
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl))
+ return ERR_CAST(tl);
/* Move our oldest request to the slab-cache (if not in use!) */
- rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
- if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+ rq = list_first_entry(&tl->requests, typeof(*rq), link);
+ if (!list_is_last(&rq->link, &tl->requests))
i915_request_retire(rq);
intel_context_enter(ce);
@@ -759,22 +730,22 @@ i915_request_create(struct intel_context *ce)
goto err_unlock;
/* Check that we do not interrupt ourselves with a new request */
- rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+ rq->cookie = lockdep_pin_lock(&tl->mutex);
return rq;
err_unlock:
- intel_context_timeline_unlock(ce);
+ intel_context_timeline_unlock(tl);
return rq;
}
static int
i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
{
- if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+ if (list_is_first(&signal->link, &signal->timeline->requests))
return 0;
- signal = list_prev_entry(signal, ring_link);
+ signal = list_prev_entry(signal, link);
if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
return 0;
@@ -1167,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
*/
GEM_BUG_ON(rq->reserved_space > ring->space);
rq->reserved_space = 0;
+ rq->emitted_jiffies = jiffies;
/*
* Record the position of the start of the breadcrumb so that
@@ -1180,11 +1152,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
prev = __i915_request_add_to_timeline(rq);
- list_add_tail(&rq->ring_link, &ring->request_list);
- if (list_is_first(&rq->ring_link, &ring->request_list))
- list_add(&ring->active_link, &rq->i915->gt.active_rings);
- rq->emitted_jiffies = jiffies;
-
/*
* Let the backend know a new request has arrived that may need
* to adjust the existing execution schedule due to a high priority
@@ -1235,10 +1202,11 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
void i915_request_add(struct i915_request *rq)
{
+ struct intel_timeline * const tl = rq->timeline;
struct i915_request *prev;
- lockdep_assert_held(&rq->timeline->mutex);
- lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+ lockdep_assert_held(&tl->mutex);
+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
trace_i915_request_add(rq);
@@ -1261,10 +1229,10 @@ void i915_request_add(struct i915_request *rq)
* work on behalf of others -- but instead we should benefit from
* improved resource management. (Well, that's the theory at least.)
*/
- if (prev && i915_request_completed(prev))
+ if (prev && i915_request_completed(prev) && prev->timeline == tl)
i915_request_retire_upto(prev);
- mutex_unlock(&rq->timeline->mutex);
+ mutex_unlock(&tl->mutex);
}
static unsigned long local_clock_us(unsigned int *cpu)
@@ -1484,18 +1452,43 @@ long i915_request_wait(struct i915_request *rq,
bool i915_retire_requests(struct drm_i915_private *i915)
{
- struct intel_ring *ring, *tmp;
+ struct intel_gt_timelines *timelines = &i915->gt.timelines;
+ struct intel_timeline *tl, *tn;
+ LIST_HEAD(free);
+
+ spin_lock(&timelines->lock);
+ list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+ if (!mutex_trylock(&tl->mutex))
+ continue;
+
+ intel_timeline_get(tl);
+ GEM_BUG_ON(!tl->active_count);
+ tl->active_count++; /* pin the list element */
+ spin_unlock(&timelines->lock);
- lockdep_assert_held(&i915->drm.struct_mutex);
+ retire_requests(tl);
- list_for_each_entry_safe(ring, tmp,
- &i915->gt.active_rings, active_link) {
- intel_ring_get(ring); /* last rq holds reference! */
- ring_retire_requests(ring);
- intel_ring_put(ring);
+ spin_lock(&timelines->lock);
+
+ /* Restart iteration after dropping lock */
+ list_safe_reset_next(tl, tn, link);
+ if (!--tl->active_count)
+ list_del(&tl->link);
+
+ mutex_unlock(&tl->mutex);
+
+ /* Defer the final release to after the spinlock */
+ if (refcount_dec_and_test(&tl->kref.refcount)) {
+ GEM_BUG_ON(tl->active_count);
+ list_add(&tl->link, &free);
+ }
}
+ spin_unlock(&timelines->lock);
+
+ list_for_each_entry_safe(tl, tn, &free, link)
+ __intel_timeline_free(&tl->kref);
- return !list_empty(&i915->gt.active_rings);
+ return !list_empty(&timelines->active_list);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 313df3c37158..22e506e960e0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -223,9 +223,6 @@ struct i915_request {
/** timeline->request entry for this request */
struct list_head link;
- /** ring->request_list entry for this request */
- struct list_head ring_link;
-
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_link;
--
2.23.0.rc1
More information about the Intel-gfx
mailing list