[PATCH 51/60] drm/i915: Lift i915_vma_move_to_active locking to caller

Chris Wilson chris at chris-wilson.co.uk
Sun Jul 1 10:21:20 UTC 2018


Time to start treating ww_mutex with respect, and use to acquire every
vma->resv->lock concurrently with a view to removing the struct_mutex
serialisation in the new future. We have to acquire the lock on every
vma concurrently in order to serialise the await with the current fence
to prevent the current request leaking out to other users causing a
cycle (as we may then import our own fence as a dependency via one of
the objects).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  2 +
 drivers/gpu/drm/i915/i915_gem.c               | 11 ++-
 drivers/gpu/drm/i915/i915_gem_clflush.c       |  8 +-
 drivers/gpu/drm/i915/i915_gem_clflush.h       |  5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 81 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_object.h        |  3 +
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +
 drivers/gpu/drm/i915/i915_vma.c               |  3 +-
 drivers/gpu/drm/i915/i915_vma.h               | 12 +++
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  4 +
 .../drm/i915/selftests/i915_gem_coherency.c   |  3 +
 .../gpu/drm/i915/selftests/i915_gem_context.c |  4 +
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  2 +
 drivers/gpu/drm/i915/selftests/i915_request.c |  4 +
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  4 +
 .../drm/i915/selftests/intel_workarounds.c    |  2 +
 17 files changed, 124 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 2da2e5baafaa..63c207807d81 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -476,9 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			i915_gem_obj_finish_shmem_access(bb->obj);
 			bb->accessing = false;
 
+			i915_vma_lock(bb->vma);
 			ret = i915_vma_move_to_active(bb->vma,
 						      workload->req,
 						      0);
+			i915_vma_unlock(bb->vma);
 			if (ret)
 				goto err;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6a28812956aa..66d6f439d000 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3420,13 +3420,18 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 {
+	i915_gem_object_assert_held(obj);
+
 	/*
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+		i915_gem_clflush_object(obj,
+				       	I915_CLFLUSH_FORCE |
+				       	I915_CLFLUSH_LOCKED);
+
 	obj->write_domain = 0;
 }
 
@@ -3435,9 +3440,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	if (!READ_ONCE(obj->pin_global))
 		return;
 
-	mutex_lock(&obj->base.dev->struct_mutex);
+	i915_gem_object_lock(obj);
 	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index f5c570d35b2a..b5ddb610099c 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -169,9 +169,13 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						true, I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
 
-		reservation_object_lock(obj->resv, NULL);
+		if (!(flags & I915_CLFLUSH_LOCKED))
+			reservation_object_lock(obj->resv, NULL);
+
 		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-		reservation_object_unlock(obj->resv);
+
+		if (!(flags & I915_CLFLUSH_LOCKED))
+			reservation_object_unlock(obj->resv);
 
 		i915_sw_fence_commit(&clflush->wait);
 	} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
index f390247561b3..9e0a3ff86dfc 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -30,7 +30,8 @@ struct drm_i915_gem_object;
 
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 			     unsigned int flags);
-#define I915_CLFLUSH_FORCE BIT(0)
-#define I915_CLFLUSH_SYNC BIT(1)
+#define I915_CLFLUSH_FORCE	BIT(0)
+#define I915_CLFLUSH_SYNC	BIT(1)
+#define I915_CLFLUSH_LOCKED	BIT(2)
 
 #endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 32cbdfd4f16d..f8025ddc928a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1185,11 +1185,15 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_request;
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1800,24 +1804,40 @@ static int eb_relocate(struct i915_execbuffer *eb)
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
 	unsigned int i;
-	int err;
+	int err = 0;
 
-	for (i = 0; i < count; i++) {
+	ww_acquire_init(&acquire, &reservation_ww_class);
+
+	for (i = 0; i < count && err == 0; i++) {
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		if (flags & EXEC_OBJECT_CAPTURE) {
-			struct i915_capture_list *capture;
-
-			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
-			if (unlikely(!capture))
-				return -ENOMEM;
+		err = ww_mutex_lock_interruptible(&vma->resv->lock,
+						  &acquire);
+		if (unlikely(err)) {
+			/* No duplicate execobjects/vma */
+			GEM_BUG_ON(err == -EALREADY);
+
+			if (err == -EDEADLK) {
+				GEM_BUG_ON(i == 0);
+				do {
+					ww_mutex_unlock(&eb->vma[i - 1]->resv->lock);
+					swap(eb->flags[i], eb->flags[i - 1]);
+					swap(eb->vma[i], eb->vma[i - 1]);
+					eb->vma[i]->exec_flags = &eb->flags[i];
+				} while (--i);
+				GEM_BUG_ON(vma != eb->vma[0]);
+				vma->exec_flags = &eb->flags[0];
+
+				err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+								       &acquire);
+			}
 
-			capture->next = eb->request->capture_list;
-			capture->vma = eb->vma[i];
-			eb->request->capture_list = capture;
+			if (err)
+				continue;
 		}
 
 		/*
@@ -1833,7 +1853,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		 * two jumps instead of one. Maybe one day...
 		 */
 		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
-			if (i915_gem_clflush_object(obj, 0))
+			if (i915_gem_clflush_object(obj, I915_CLFLUSH_LOCKED))
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
@@ -1848,24 +1868,43 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 
 		err = i915_request_await_object
 			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
-		if (err)
-			return err;
 	}
+	ww_acquire_done(&acquire);
 
-	for (i = 0; i < count; i++) {
-		err = i915_vma_move_to_active(eb->vma[i],
-					      eb->request,
-					      eb->flags[i]);
-		if (unlikely(err)) {
-			i915_request_skip(eb->request, err);
-			return err;
+	while (i--) {
+		i915_vma_assert_held(eb->vma[i]);
+
+		if (eb->flags[i] & EXEC_OBJECT_CAPTURE) {
+			struct i915_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (unlikely(!capture))
+				err = -ENOMEM;
+
+			capture->next = eb->request->capture_list;
+			capture->vma = eb->vma[i];
+			eb->request->capture_list = capture;
 		}
+
+		if (err == 0)
+			err = i915_vma_move_to_active(eb->vma[i],
+						      eb->request,
+						      eb->flags[i]);
+		i915_vma_unlock(eb->vma[i]);
 	}
+	ww_acquire_fini(&acquire);
+
+	if (unlikely(err))
+		goto err_skip;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
 
 	return 0;
+
+err_skip:
+	i915_request_skip(eb->request, err);
+	return err;
 }
 
 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 75c79907598f..dfdd365b6163 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -365,6 +365,9 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 	reservation_object_unlock(obj->resv);
 }
 
+#define i915_gem_object_assert_held(obj) \
+	reservation_object_assert_held(obj->resv)
+
 static inline void
 i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 904581fdbde7..67cac97fcfd4 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -227,7 +227,9 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
+	i915_vma_lock(so.vma);
 	err = i915_vma_move_to_active(so.vma, rq, 0);
+	i915_vma_unlock(so.vma);
 err_unpin:
 	mutex_lock(&ggtt->vm.mutex);
 	__i915_vma_unpin(so.vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 0bcc91a06733..1750d664eaa9 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -938,12 +938,10 @@ static void export_fence(struct i915_vma *vma,
 	 * handle an error right now. Worst case should be missed
 	 * synchronisation leading to rendering corruption.
 	 */
-	reservation_object_lock(resv, NULL);
 	if (flags & EXEC_OBJECT_WRITE)
 		reservation_object_add_excl_fence(resv, &rq->fence);
 	else if (reservation_object_reserve_shared(resv) == 0)
 		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
 }
 
 static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
@@ -1007,6 +1005,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct i915_gem_active *active;
 
+	i915_vma_assert_held(vma);
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 91dab9586b6a..aef9877652d9 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -146,6 +146,18 @@ static inline bool i915_vma_is_active(struct i915_vma *vma)
 	return vma->active_count;
 }
 
+static inline void i915_vma_lock(struct i915_vma *vma)
+{
+	reservation_object_lock(vma->resv, NULL);
+}
+
+static inline void i915_vma_unlock(struct i915_vma *vma)
+{
+	reservation_object_unlock(vma->resv);
+}
+
+#define i915_vma_assert_held(vma) reservation_object_assert_held(vma->resv)
+
 int __must_check i915_vma_move_to_active(struct i915_vma *vma,
 					 struct i915_request *rq,
 					 unsigned int flags);
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 78ff95b52630..28b47ed677c3 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -985,7 +985,9 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_request;
 	}
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto err_request;
 
@@ -999,7 +1001,9 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		i915_request_skip(rq, err);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 0aadf811d34c..e4ea5af6cefe 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -224,7 +224,10 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1269b4a3dacb..236d6d123c84 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -167,11 +167,15 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index cea26309cfc4..bb49d022db14 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -456,7 +456,9 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 3ecfdb1bc9ea..0008a038b50f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -683,7 +683,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_request_get(request[id]);
@@ -797,7 +799,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_gem_object_set_active_reference(batch->obj);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 408458583b4b..f19f6f6d9ef9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -130,7 +130,9 @@ static int emit_recurse_batch(struct hang *h,
 	if (err)
 		goto unpin_vma;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
 	if (err)
 		goto unpin_hws;
 
@@ -139,7 +141,9 @@ static int emit_recurse_batch(struct hang *h,
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
+	i915_vma_lock(hws);
 	err = i915_vma_move_to_active(hws, rq, 0);
+	i915_vma_unlock(hws);
 	if (err)
 		goto unpin_hws;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index a602bb697177..831e4d2638ef 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -106,7 +106,9 @@ static int emit_recurse_batch(struct spinner *spin,
 	if (err)
 		goto unpin_vma;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
 	if (err)
 		goto unpin_hws;
 
@@ -115,7 +117,9 @@ static int emit_recurse_batch(struct spinner *spin,
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
+	i915_vma_lock(hws);
 	err = i915_vma_move_to_active(hws, rq, 0);
+	i915_vma_unlock(hws);
 	if (err)
 		goto unpin_hws;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 0fa0cdb6356b..6c48dbf00cd1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -50,7 +50,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 		goto err_pin;
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_req;
 
-- 
2.18.0



More information about the Intel-gfx-trybot mailing list