[PATCH 32/68] drm/i915/gt: Acquire backing storage for the context

Chris Wilson chris at chris-wilson.co.uk
Mon Jul 13 15:07:24 UTC 2020


Pull the individual acquisition of the context objects (state, ring,
timeline) under a common i915_acquire_ctx in preparation to allow the
context to evict memory (or rather the i915_acquire_ctx on its behalf).

The context objects maintain their semi-permanent status; that is they
are assumed to be accessible by the HW at all times until we receive a
signal from the HW that they are no longer in use. Currently, we
generate such a signal ourselves from the context switch following the
final use of the objects. This means that they will remain on the HW for
an indefinite amount of time, and we retain the use of pinning to keep
them in the same place. As they are pinned, they can be processed
outside of the working set for the requests within the context. This is
useful, as the context share some global state causing it to incur a
global lock via its objects. By only requiring that lock as the context
is activated, it is both reduced in frequency and reduced in duration
(as compared to execbuf).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 108 ++++++++++++++--
 drivers/gpu/drm/i915/gt/intel_ring.c          |  17 ++-
 drivers/gpu/drm/i915/gt/intel_ring.h          |   5 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 117 ++++++++++++------
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  14 ++-
 drivers/gpu/drm/i915/gt/intel_timeline.h      |  10 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   2 +
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  30 ++++-
 8 files changed, 237 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 52db2bde44a3..2f1606365f63 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
+#include "mm/i915_acquire_ctx.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
@@ -93,6 +94,27 @@ static void intel_context_active_release(struct intel_context *ce)
 	i915_active_release(&ce->active);
 }
 
+static int __intel_context_sync(struct intel_context *ce)
+{
+	int err;
+
+	err = i915_vma_wait_for_bind(ce->ring->vma);
+	if (err)
+		return err;
+
+	err = i915_vma_wait_for_bind(ce->timeline->hwsp_ggtt);
+	if (err)
+		return err;
+
+	if (ce->state) {
+		err = i915_vma_wait_for_bind(ce->state);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
@@ -118,6 +140,10 @@ int __intel_context_do_pin(struct intel_context *ce)
 	}
 
 	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
+		err = __intel_context_sync(ce);
+		if (unlikely(err))
+			goto out_unlock;
+
 		err = intel_context_active_acquire(ce);
 		if (unlikely(err))
 			goto out_unlock;
@@ -166,12 +192,12 @@ void intel_context_unpin(struct intel_context *ce)
 	intel_context_put(ce);
 }
 
-static int __context_pin_state(struct i915_vma *vma)
+static int __context_active_locked(struct i915_vma *vma)
 {
 	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
 	int err;
 
-	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
+	err = i915_ggtt_pin_locked(vma, 0, bias | PIN_HIGH);
 	if (err)
 		return err;
 
@@ -200,11 +226,11 @@ static void __context_unpin_state(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
-static int __ring_active(struct intel_ring *ring)
+static int __ring_active_locked(struct intel_ring *ring)
 {
 	int err;
 
-	err = intel_ring_pin(ring);
+	err = intel_ring_pin_locked(ring);
 	if (err)
 		return err;
 
@@ -244,27 +270,53 @@ static void __intel_context_retire(struct i915_active *active)
 	intel_context_put(ce);
 }
 
-static int __intel_context_active(struct i915_active *active)
+static int
+__intel_context_acquire_lock(struct intel_context *ce,
+			     struct i915_acquire_ctx *ctx)
+{
+	return i915_acquire_ctx_lock(ctx, ce->state->obj);
+}
+
+static int
+intel_context_acquire_lock(struct intel_context *ce,
+			   struct i915_acquire_ctx *ctx)
 {
-	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
-	CE_TRACE(ce, "active\n");
+	err = intel_ring_acquire_lock(ce->ring, ctx);
+	if (err)
+		return err;
 
-	intel_context_get(ce);
+	if (ce->state) {
+		err = __intel_context_acquire_lock(ce, ctx);
+		if (err)
+			return err;
+	}
 
-	err = __ring_active(ce->ring);
+	/* Note that the timeline will migrate as the seqno wrap around */
+	err = intel_timeline_acquire_lock(ce->timeline, ctx);
 	if (err)
-		goto err_put;
+		return err;
+
+	return 0;
+}
 
-	err = intel_timeline_pin(ce->timeline);
+static int intel_context_active_locked(struct intel_context *ce)
+{
+	int err;
+
+	err = __ring_active_locked(ce->ring);
+	if (err)
+		return err;
+
+	err = intel_timeline_pin_locked(ce->timeline);
 	if (err)
 		goto err_ring;
 
 	if (!ce->state)
 		return 0;
 
-	err = __context_pin_state(ce->state);
+	err = __context_active_locked(ce->state);
 	if (err)
 		goto err_timeline;
 
@@ -274,7 +326,37 @@ static int __intel_context_active(struct i915_active *active)
 	intel_timeline_unpin(ce->timeline);
 err_ring:
 	__ring_retire(ce->ring);
-err_put:
+	return err;
+}
+
+static int __intel_context_active(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+	struct i915_acquire_ctx acquire;
+	int err;
+
+	CE_TRACE(ce, "active\n");
+
+	intel_context_get(ce);
+	i915_acquire_ctx_init(&acquire);
+
+	err = intel_context_acquire_lock(ce, &acquire);
+	if (err)
+		goto err;
+
+	err = i915_acquire_mm(&acquire);
+	if (err)
+		goto err;
+
+	err = intel_context_active_locked(ce);
+	if (err)
+		goto err;
+
+	i915_acquire_ctx_fini(&acquire);
+	return 0;
+
+err:
+	i915_acquire_ctx_fini(&acquire);
 	intel_context_put(ce);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index bdb324167ef3..1c21f5725731 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -5,6 +5,8 @@
  */
 
 #include "gem/i915_gem_object.h"
+#include "mm/i915_acquire_ctx.h"
+
 #include "i915_drv.h"
 #include "i915_vma.h"
 #include "intel_engine.h"
@@ -21,9 +23,16 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 	return space;
 }
 
-int intel_ring_pin(struct intel_ring *ring)
+int intel_ring_acquire_lock(struct intel_ring *ring,
+			    struct i915_acquire_ctx *ctx)
+{
+	return i915_acquire_ctx_lock(ctx, ring->vma->obj);
+}
+
+int intel_ring_pin_locked(struct intel_ring *ring)
 {
 	struct i915_vma *vma = ring->vma;
+	enum i915_map_type type;
 	unsigned int flags;
 	void *addr;
 	int ret;
@@ -39,15 +48,15 @@ int intel_ring_pin(struct intel_ring *ring)
 	else
 		flags |= PIN_HIGH;
 
-	ret = i915_ggtt_pin(vma, 0, flags);
+	ret = i915_ggtt_pin_locked(vma, 0, flags);
 	if (unlikely(ret))
 		goto err_unpin;
 
+	type = i915_coherent_map_type(vma->vm->i915);
 	if (i915_vma_is_map_and_fenceable(vma))
 		addr = (void __force *)i915_vma_pin_iomap(vma);
 	else
-		addr = i915_gem_object_pin_map(vma->obj,
-					       i915_coherent_map_type(vma->vm->i915));
+		addr = __i915_gem_object_pin_map_locked(vma->obj, type);
 	if (IS_ERR(addr)) {
 		ret = PTR_ERR(addr);
 		goto err_ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index cc0ebca65167..34134a0b80b3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -11,6 +11,7 @@
 #include "i915_request.h"
 #include "intel_ring_types.h"
 
+struct i915_acquire_ctx;
 struct intel_engine_cs;
 
 struct intel_ring *
@@ -21,7 +22,9 @@ int intel_ring_cacheline_align(struct i915_request *rq);
 
 unsigned int intel_ring_update_space(struct intel_ring *ring);
 
-int intel_ring_pin(struct intel_ring *ring);
+int intel_ring_acquire_lock(struct intel_ring *ring,
+			    struct i915_acquire_ctx *ctx);
+int intel_ring_pin_locked(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 9a126ad517c1..ec54ff029699 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -27,6 +27,8 @@
  *
  */
 
+#include "mm/i915_acquire_ctx.h"
+
 #include "gen2_engine_cs.h"
 #include "gen6_engine_cs.h"
 #include "gen6_ppgtt.h"
@@ -1009,6 +1011,15 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
+static void ring_release_global_timeline(struct intel_engine_cs *engine)
+{
+	intel_ring_unpin(engine->legacy.ring);
+	intel_ring_put(engine->legacy.ring);
+
+	intel_timeline_unpin(engine->legacy.timeline);
+	intel_timeline_put(engine->legacy.timeline);
+}
+
 static void ring_release(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1023,11 +1034,7 @@ static void ring_release(struct intel_engine_cs *engine)
 		i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
 	}
 
-	intel_ring_unpin(engine->legacy.ring);
-	intel_ring_put(engine->legacy.ring);
-
-	intel_timeline_unpin(engine->legacy.timeline);
-	intel_timeline_put(engine->legacy.timeline);
+	ring_release_global_timeline(engine);
 }
 
 static void setup_irq(struct intel_engine_cs *engine)
@@ -1226,12 +1233,69 @@ static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
 	return err;
 }
 
-int intel_ring_submission_setup(struct intel_engine_cs *engine)
+static int ring_setup_global_timeline(struct intel_engine_cs *engine)
 {
+	struct i915_acquire_ctx acquire;
 	struct intel_timeline *timeline;
 	struct intel_ring *ring;
 	int err;
 
+	timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
+	if (IS_ERR(timeline))
+		return PTR_ERR(timeline);
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+	ring = intel_engine_create_ring(engine, SZ_16K);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err_timeline;
+	}
+
+	i915_acquire_ctx_init(&acquire);
+
+	err = intel_ring_acquire_lock(ring, &acquire);
+	if (err)
+		goto err_acquire;
+
+	err = intel_timeline_acquire_lock(timeline, &acquire);
+	if (err)
+		goto err_acquire;
+
+	err = i915_acquire_mm(&acquire);
+	if (err)
+		goto err_acquire;
+
+	err = intel_timeline_pin_locked(timeline);
+	if (err)
+		goto err_acquire;
+
+	err = intel_ring_pin_locked(ring);
+	if (err)
+		goto err_timeline_unpin;
+
+	i915_acquire_ctx_fini(&acquire);
+
+	GEM_BUG_ON(engine->legacy.ring);
+	engine->legacy.ring = ring;
+	engine->legacy.timeline = timeline;
+
+	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+	return 0;
+
+err_timeline_unpin:
+	intel_timeline_unpin(timeline);
+err_acquire:
+	i915_acquire_ctx_fini(&acquire);
+	intel_ring_put(ring);
+err_timeline:
+	intel_timeline_put(timeline);
+	return err;
+}
+
+int intel_ring_submission_setup(struct intel_engine_cs *engine)
+{
+	int err;
+
 	setup_common(engine);
 
 	switch (engine->class) {
@@ -1252,37 +1316,14 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 		return -ENODEV;
 	}
 
-	timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
-	if (IS_ERR(timeline)) {
-		err = PTR_ERR(timeline);
-		goto err;
-	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
-	err = intel_timeline_pin(timeline);
-	if (err)
-		goto err_timeline;
-
-	ring = intel_engine_create_ring(engine, SZ_16K);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err_timeline_unpin;
-	}
-
-	err = intel_ring_pin(ring);
+	err = ring_setup_global_timeline(engine);
 	if (err)
-		goto err_ring;
-
-	GEM_BUG_ON(engine->legacy.ring);
-	engine->legacy.ring = ring;
-	engine->legacy.timeline = timeline;
-
-	GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+		goto err_common;
 
 	if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) {
 		err = gen7_ctx_switch_bb_init(engine);
 		if (err)
-			goto err_ring_unpin;
+			goto err_global;
 	}
 
 	/* Finally, take ownership and responsibility for cleanup! */
@@ -1290,15 +1331,9 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
 
 	return 0;
 
-err_ring_unpin:
-	intel_ring_unpin(ring);
-err_ring:
-	intel_ring_put(ring);
-err_timeline_unpin:
-	intel_timeline_unpin(timeline);
-err_timeline:
-	intel_timeline_put(timeline);
-err:
+err_global:
+	ring_release_global_timeline(engine);
+err_common:
 	intel_engine_cleanup_common(engine);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index e4a5326633b8..0e449c4b3e61 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,9 +4,10 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "i915_drv.h"
+#include "mm/i915_acquire_ctx.h"
 
 #include "i915_active.h"
+#include "i915_drv.h"
 #include "i915_syncmap.h"
 #include "intel_gt.h"
 #include "intel_ring.h"
@@ -313,14 +314,21 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
 	return timeline;
 }
 
-int intel_timeline_pin(struct intel_timeline *tl)
+int
+intel_timeline_acquire_lock(struct intel_timeline *tl,
+			    struct i915_acquire_ctx *ctx)
+{
+	return i915_acquire_ctx_lock(ctx, tl->hwsp_ggtt->obj);
+}
+
+int intel_timeline_pin_locked(struct intel_timeline *tl)
 {
 	int err;
 
 	if (atomic_add_unless(&tl->pin_count, 1, 0))
 		return 0;
 
-	err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
+	err = i915_ggtt_pin_locked(tl->hwsp_ggtt, 0, PIN_HIGH);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 4298b9ac7327..073c94cd8160 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -29,7 +29,9 @@
 
 #include "i915_active.h"
 #include "i915_syncmap.h"
-#include "gt/intel_timeline_types.h"
+#include "intel_timeline_types.h"
+
+struct i915_acquire_ctx;
 
 struct intel_timeline *
 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
@@ -71,7 +73,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
 	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
-int intel_timeline_pin(struct intel_timeline *tl);
+int
+intel_timeline_acquire_lock(struct intel_timeline *tl,
+			    struct i915_acquire_ctx *ctx);
+int intel_timeline_pin_locked(struct intel_timeline *tl);
+
 void intel_timeline_enter(struct intel_timeline *tl);
 int intel_timeline_get_seqno(struct intel_timeline *tl,
 			     struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b8dd3cbc8696..283cfa2912b3 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -67,6 +67,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
 	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
 	ring->vma->node.size = sz;
+	ring->vma->obj = i915_gem_object_create_internal(engine->i915, 4096);
 
 	intel_ring_update_space(ring);
 
@@ -75,6 +76,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 
 static void mock_ring_free(struct intel_ring *ring)
 {
+	i915_gem_object_put(ring->vma->obj);
 	i915_active_fini(&ring->vma->active);
 	i915_vma_free(ring->vma);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index fcdee951579b..db7966106569 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,6 +6,8 @@
 
 #include <linux/prime_numbers.h>
 
+#include "mm/i915_acquire_ctx.h"
+
 #include "intel_context.h"
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
@@ -449,13 +451,37 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 	return 0;
 }
 
+static int tl_pin(struct intel_timeline *tl)
+{
+	struct i915_acquire_ctx acquire;
+	int err;
+
+	i915_acquire_ctx_init(&acquire);
+
+	err = intel_timeline_acquire_lock(tl, &acquire);
+	if (err)
+		goto out;
+
+	err = i915_acquire_mm(&acquire);
+	if (err)
+		goto out;
+
+	err = intel_timeline_pin_locked(tl);
+	if (err == 0)
+		err = i915_vma_wait_for_bind(tl->hwsp_ggtt);
+
+out:
+	i915_acquire_ctx_fini(&acquire);
+	return err;
+}
+
 static struct i915_request *
 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 {
 	struct i915_request *rq;
 	int err;
 
-	err = intel_timeline_pin(tl);
+	err = tl_pin(tl);
 	if (err) {
 		rq = ERR_PTR(err);
 		goto out;
@@ -665,7 +691,7 @@ static int live_hwsp_wrap(void *arg)
 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 		goto out_free;
 
-	err = intel_timeline_pin(tl);
+	err = tl_pin(tl);
 	if (err)
 		goto out_free;
 
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list