[PATCH 26/35] drm/i915/gem: Don't drop the timeline lock during execbuf
Chris Wilson
chris at chris-wilson.co.uk
Fri Jun 19 19:17:39 UTC 2020
Our timeline lock is our defence against a concurrent execbuf
interrupting our request construction. we need hold it throughout or,
for example, a second thread may interject a relocation request in
between our own relocation request and execution in the ring.
A second, major benefit, is that it allows us to preserve a large chunk
of the ringbuffer for our exclusive use; which should virtually
eliminate the threat of hitting a wait_for_space during request
construction -- although we should have already dropped other
contentious locks at that point.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 165 ++++++++++++------
.../i915/gem/selftests/i915_gem_execbuffer.c | 23 ++-
2 files changed, 136 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 37b40380a98c..156e883a5723 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -643,6 +643,35 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
return 0;
}
+static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
+{
+ struct i915_request *rq, *rn;
+
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (rq == end || !i915_request_retire(rq))
+ break;
+}
+
+static int wait_for_timeline(struct intel_timeline *tl)
+{
+ do {
+ struct dma_fence *fence;
+ int err;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
+ return 0;
+
+ err = dma_fence_wait(fence, true);
+ dma_fence_put(fence);
+ if (err)
+ return err;
+
+ /* Retiring may trigger a barrier, requiring an extra pass */
+ retire_requests(tl, NULL);
+ } while (1);
+}
+
static int eb_reserve(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
@@ -650,7 +679,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
struct list_head last;
struct eb_vma *ev;
unsigned int i, pass;
- int err = 0;
/*
* Attempt to pin all of the buffers into the GTT.
@@ -666,18 +694,22 @@ static int eb_reserve(struct i915_execbuffer *eb)
* room for the earlier objects *unless* we need to defragment.
*/
- if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
- return -EINTR;
-
pass = 0;
do {
+ int err = 0;
+
+ if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
+ return -EINTR;
+
list_for_each_entry(ev, &eb->unbound, bind_link) {
err = eb_reserve_vma(eb, ev, pin_flags);
if (err)
break;
}
- if (!(err == -ENOSPC || err == -EAGAIN))
- break;
+ if (!(err == -ENOSPC || err == -EAGAIN)) {
+ mutex_unlock(&eb->i915->drm.struct_mutex);
+ return err;
+ }
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
@@ -706,11 +738,10 @@ static int eb_reserve(struct i915_execbuffer *eb)
list_add_tail(&ev->bind_link, &last);
}
list_splice_tail(&last, &eb->unbound);
+ mutex_unlock(&eb->i915->drm.struct_mutex);
if (err == -EAGAIN) {
- mutex_unlock(&eb->i915->drm.struct_mutex);
flush_workqueue(eb->i915->mm.userptr_wq);
- mutex_lock(&eb->i915->drm.struct_mutex);
continue;
}
@@ -719,25 +750,22 @@ static int eb_reserve(struct i915_execbuffer *eb)
break;
case 1:
- /* Too fragmented, unbind everything and retry */
- mutex_lock(&eb->context->vm->mutex);
- err = i915_gem_evict_vm(eb->context->vm);
- mutex_unlock(&eb->context->vm->mutex);
+ /*
+ * Too fragmented, retire everything on the timeline
+ * and so make it available to evict.
+ */
+ err = wait_for_timeline(eb->context->timeline);
if (err)
- goto unlock;
+ return err;
+
break;
default:
- err = -ENOSPC;
- goto unlock;
+ return -ENOSPC;
}
pin_flags = PIN_USER;
} while (1);
-
-unlock:
- mutex_unlock(&eb->i915->drm.struct_mutex);
- return err;
}
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@ -1023,8 +1051,21 @@ static int reloc_gpu_emit(struct reloc_cache *cache)
return err;
}
-static void reloc_gpu_flush(struct reloc_cache *cache)
+static void __i915_request_add(struct i915_request *rq,
+ struct i915_sched_attr *attr)
+{
+ struct intel_timeline * const tl = i915_request_timeline(rq);
+
+ lockdep_assert_held(&tl->mutex);
+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, attr);
+}
+
+static void reloc_gpu_flush(struct i915_execbuffer *eb)
{
+ struct reloc_cache *cache = &eb->reloc_cache;
struct i915_request *rq = cache->rq;
if (cache->rq_vma) {
@@ -1040,7 +1081,32 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
intel_gt_chipset_flush(rq->engine->gt);
i915_request_get(rq);
- i915_request_add(rq);
+ __i915_request_add(rq, &eb->gem_context->sched);
+ if (i915_request_timeline(rq) != eb->context->timeline)
+ mutex_unlock(&i915_request_timeline(rq)->mutex);
+}
+
+static struct i915_request *
+nested_request_create(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_context_pin(ce);
+ if (unlikely(err))
+ return ERR_PTR(err);
+
+ mutex_lock_nested(&ce->timeline->mutex, SINGLE_DEPTH_NESTING);
+ intel_context_enter(ce);
+
+ rq = __i915_request_create(ce, GFP_KERNEL);
+
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ mutex_unlock(&ce->timeline->mutex);
+
+ intel_context_unpin(ce);
+ return rq;
}
static int
@@ -1077,7 +1143,7 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
goto err_unmap;
if (engine == eb->context->engine) {
- rq = i915_request_create(eb->context);
+ rq = __i915_request_create(eb->context, GFP_KERNEL);
} else {
struct intel_context *ce;
@@ -1087,16 +1153,19 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
goto err_unpin;
}
+ /* Reuse eb->context->timeline with scheduler! */
+
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(eb->context->vm);
- rq = intel_context_create_request(ce);
+ rq = nested_request_create(ce);
intel_context_put(ce);
}
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
}
+ rq->cookie = lockdep_pin_lock(&i915_request_timeline(rq)->mutex);
err = intel_gt_buffer_pool_mark_active(pool, rq);
if (err)
@@ -1122,7 +1191,9 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
goto out_pool;
err_request:
- i915_request_add(rq);
+ __i915_request_add(rq, &eb->gem_context->sched);
+ if (i915_request_timeline(rq) != eb->context->timeline)
+ mutex_unlock(&i915_request_timeline(rq)->mutex);
err_unpin:
i915_vma_unpin(batch);
err_unmap:
@@ -1569,7 +1640,7 @@ static int reloc_gpu(struct i915_execbuffer *eb)
}
out:
- reloc_gpu_flush(&eb->reloc_cache);
+ reloc_gpu_flush(eb);
return err;
}
@@ -1835,21 +1906,17 @@ parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
{
int err;
- mutex_lock(&tl->mutex);
-
err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
if (err)
- goto unlock;
+ return err;
if (pw->trampoline) {
err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
if (err)
- goto unlock;
+ return err;
}
-unlock:
- mutex_unlock(&tl->mutex);
- return err;
+ return 0;
}
static int eb_parse_pipeline(struct i915_execbuffer *eb,
@@ -2199,9 +2266,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
intel_context_enter(ce);
rq = eb_throttle(ce);
- intel_context_timeline_unlock(tl);
-
- if (rq) {
+ while (rq) {
bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
long timeout;
@@ -2209,6 +2274,8 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
if (nonblock)
timeout = 0;
+ intel_context_timeline_unlock(tl);
+
timeout = i915_request_wait(rq,
I915_WAIT_INTERRUPTIBLE,
timeout);
@@ -2218,6 +2285,15 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
err = nonblock ? -EWOULDBLOCK : timeout;
goto err_exit;
}
+
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto err_exit;
+ }
+
+ retire_requests(tl, NULL);
+ rq = eb_throttle(ce);
}
eb->engine = ce->engine;
@@ -2236,11 +2312,9 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
static void eb_unpin_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce = eb->context;
- struct intel_timeline *tl = ce->timeline;
- mutex_lock(&tl->mutex);
intel_context_exit(ce);
- mutex_unlock(&tl->mutex);
+ intel_context_timeline_unlock(ce->timeline);
intel_context_unpin(ce);
}
@@ -2442,15 +2516,6 @@ signal_fence_array(struct i915_execbuffer *eb,
}
}
-static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
-{
- struct i915_request *rq, *rn;
-
- list_for_each_entry_safe(rq, rn, &tl->requests, link)
- if (rq == end || !i915_request_retire(rq))
- break;
-}
-
static void eb_request_add(struct i915_execbuffer *eb)
{
struct i915_request *rq = eb->request;
@@ -2481,8 +2546,6 @@ static void eb_request_add(struct i915_execbuffer *eb)
/* Try to clean up the client's timeline after submitting the request */
if (prev)
retire_requests(tl, prev);
-
- mutex_unlock(&tl->mutex);
}
static int
@@ -2569,6 +2632,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_pin_engine(&eb, file, args);
if (unlikely(err))
goto err_context;
+ lockdep_assert_held(&eb.context->timeline->mutex);
err = eb_relocate(&eb);
if (err) {
@@ -2633,11 +2697,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
/* Allocate a request for this batch buffer nice and early. */
- eb.request = i915_request_create(eb.context);
+ eb.request = __i915_request_create(eb.context, GFP_KERNEL);
if (IS_ERR(eb.request)) {
err = PTR_ERR(eb.request);
goto err_batch_unpin;
}
+ eb.request->cookie = lockdep_pin_lock(&eb.context->timeline->mutex);
if (in_fence) {
if (args->flags & I915_EXEC_FENCE_SUBMIT)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 62bba179b455..d06fc70842c2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -7,6 +7,9 @@
#include "gt/intel_engine_pm.h"
#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+
+#include "mock_context.h"
static u64 read_reloc(const u32 *map, int x, const u64 mask)
{
@@ -73,7 +76,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (err)
goto unpin_vma;
- reloc_gpu_flush(&eb->reloc_cache);
+ reloc_gpu_flush(eb);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
if (err) {
@@ -111,14 +114,22 @@ static int igt_gpu_reloc(void *arg)
{
struct i915_execbuffer eb;
struct drm_i915_gem_object *scratch;
+ struct file *file;
int err = 0;
u32 *map;
+ file = mock_file(arg);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
eb.i915 = arg;
+ eb.gem_context = live_context(arg, file);
+ if (IS_ERR(eb.gem_context))
+ goto err_file;
scratch = i915_gem_object_create_internal(eb.i915, 4096);
if (IS_ERR(scratch))
- return PTR_ERR(scratch);
+ goto err_file;
map = i915_gem_object_pin_map(scratch, I915_MAP_WC);
if (IS_ERR(map)) {
@@ -142,8 +153,14 @@ static int igt_gpu_reloc(void *arg)
if (err)
goto err_put;
+ mutex_lock(&eb.context->timeline->mutex);
+ intel_context_enter(eb.context);
+
err = __igt_gpu_reloc(&eb, scratch);
+ intel_context_exit(eb.context);
+ mutex_unlock(&eb.context->timeline->mutex);
+
intel_context_unpin(eb.context);
err_put:
intel_context_put(eb.context);
@@ -158,6 +175,8 @@ static int igt_gpu_reloc(void *arg)
err_scratch:
i915_gem_object_put(scratch);
+err_file:
+ fput(file);
return err;
}
--
2.20.1
More information about the Intel-gfx-trybot
mailing list