[Intel-gfx] [PATCH 19/39] drm/i915: Provide an i915_active.acquire callback

Fri Jun 14 07:10:03 UTC 2019

If we introduce a callback for i915_active that is only called the first
time we use the i915_active and is symmetrically paired with the
i915_active.retire callback, we can replace the open-coded and
non-atomic implementations -- which will be very fragile (i.e. broken)
upon removing the struct_mutex serialisation.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   8 +-
 drivers/gpu/drm/i915/gt/intel_context.c      |  82 ++++---
 drivers/gpu/drm/i915/gt/intel_context.h      |  14 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          |   6 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c   |   2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c        |   2 +-
 drivers/gpu/drm/i915/i915_active.c           | 217 ++++++++++---------
 drivers/gpu/drm/i915/i915_active.h           |  25 ++-
 drivers/gpu/drm/i915/i915_active_types.h     |  10 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c          |   2 +-
 drivers/gpu/drm/i915/i915_timeline.c         |  16 +-
 drivers/gpu/drm/i915/i915_vma.c              |  22 +-
 drivers/gpu/drm/i915/selftests/i915_active.c |  12 +-
 13 files changed, 225 insertions(+), 193 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1ce122f4ed25..9262a1d4f763 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -923,8 +923,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	if (!cb)
 		return -ENOMEM;
 
-	i915_active_init(i915, &cb->base, cb_retire);
-	i915_active_acquire(&cb->base);
+	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	err = i915_active_acquire(&cb->base);
+	if (err) {
+		kfree(cb);
+		return err;
+	}
 
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 3d660c8ce414..2ea05dcdb99f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -95,11 +95,14 @@ void intel_context_unpin(struct intel_context *ce)
 	intel_context_put(ce);
 }
 
-static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
+static int __context_pin_state(struct i915_vma *vma)
 {
+	u64 flags;
 	int err;
 
-	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	flags = PIN_HIGH | PIN_GLOBAL;
+	flags |= i915_vm_to_ggtt(vma->vm)->pin_bias | PIN_OFFSET_BIAS;
+	err = i915_vma_pin(vma, 0, 0, flags);
 	if (err)
 		return err;
 
@@ -119,7 +122,7 @@ static void __context_unpin_state(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
-static void intel_context_retire(struct i915_active *active)
+static void __intel_context_retire(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 
@@ -129,65 +132,58 @@ static void intel_context_retire(struct i915_active *active)
 	intel_context_put(ce);
 }
 
-void
-intel_context_init(struct intel_context *ce,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine)
-{
-	GEM_BUG_ON(!engine->cops);
-
-	kref_init(&ce->ref);
-
-	ce->gem_context = ctx;
-	ce->engine = engine;
-	ce->ops = engine->cops;
-	ce->sseu = engine->sseu;
-
-	INIT_LIST_HEAD(&ce->signal_link);
-	INIT_LIST_HEAD(&ce->signals);
-
-	mutex_init(&ce->pin_mutex);
-
-	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
-}
-
-int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
+static int __intel_context_active(struct i915_active *active)
 {
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
-	if (!i915_active_acquire(&ce->active))
-		return 0;
-
 	intel_context_get(ce);
 
 	if (!ce->state)
 		return 0;
 
-	err = __context_pin_state(ce->state, flags);
-	if (err) {
-		i915_active_cancel(&ce->active);
-		intel_context_put(ce);
-		return err;
-	}
+	err = __context_pin_state(ce->state);
+	if (err)
+		goto err_put;
 
 	/* Preallocate tracking nodes */
 	if (!i915_gem_context_is_kernel(ce->gem_context)) {
 		err = i915_active_acquire_preallocate_barrier(&ce->active,
 							      ce->engine);
-		if (err) {
-			i915_active_release(&ce->active);
-			return err;
-		}
+		if (err)
+			goto err_unpin;
 	}
 
 	return 0;
+
+err_unpin:
+	__context_unpin_state(ce->state);
+err_put:
+	intel_context_put(ce);
+	return err;
 }
 
-void intel_context_active_release(struct intel_context *ce)
+void
+intel_context_init(struct intel_context *ce,
+		   struct i915_gem_context *ctx,
+		   struct intel_engine_cs *engine)
 {
-	/* Nodes preallocated in intel_context_active() */
-	i915_active_acquire_barrier(&ce->active);
-	i915_active_release(&ce->active);
+	GEM_BUG_ON(!engine->cops);
+
+	kref_init(&ce->ref);
+
+	ce->gem_context = ctx;
+	ce->engine = engine;
+	ce->ops = engine->cops;
+	ce->sseu = engine->sseu;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
+
+	mutex_init(&ce->pin_mutex);
+
+	i915_active_init(ctx->i915, &ce->active,
+			 __intel_context_active, __intel_context_retire);
 }
 
 static void i915_global_context_shrink(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a47275bc4f01..40cd8320fcc3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -9,6 +9,7 @@
 
 #include <linux/lockdep.h>
 
+#include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
@@ -102,8 +103,17 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
-int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
-void intel_context_active_release(struct intel_context *ce);
+static inline int intel_context_active_acquire(struct intel_context *ce)
+{
+	return i915_active_acquire(&ce->active);
+}
+
+static inline void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
+}
 
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index cd4cf4d0b30c..5dbc43c70496 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1565,12 +1565,10 @@ __execlists_context_pin(struct intel_context *ce,
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = intel_context_active_acquire(ce,
-					   engine->i915->ggtt.pin_bias |
-					   PIN_OFFSET_BIAS |
-					   PIN_HIGH);
+	ret = intel_context_active_acquire(ce);
 	if (ret)
 		goto err;
+	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	vaddr = i915_gem_object_pin_map(ce->state->obj,
 					i915_coherent_map_type(engine->i915) |
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 5ccf61806d9e..bead5afaeb13 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1437,7 +1437,7 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = intel_context_active_acquire(ce, PIN_HIGH);
+	err = intel_context_active_acquire(ce);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 086801b51441..b9c2764beca3 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -154,7 +154,7 @@ static int mock_context_pin(struct intel_context *ce)
 			return -ENOMEM;
 	}
 
-	ret = intel_context_active_acquire(ce, PIN_HIGH);
+	ret = intel_context_active_acquire(ce);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 2d019ac6db20..6a9f8d37f415 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -30,48 +30,50 @@ struct active_node {
 };
 
 static void
-__active_park(struct i915_active *ref)
+active_retire(struct i915_active *ref)
 {
 	struct active_node *it, *n;
+	struct rb_root root;
+	bool retire = false;
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		GEM_BUG_ON(i915_active_request_isset(&it->base));
-		kmem_cache_free(global.slab_cache, it);
+	GEM_BUG_ON(!atomic_read(&ref->count));
+	if (atomic_add_unless(&ref->count, -1, 1))
+		return;
+
+	/* One active may be flushed from inside the acquire of another */
+	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
+
+	/* return the unused nodes to our slabcache -- flushing the allocator */
+	if (atomic_dec_and_test(&ref->count)) {
+		root = ref->tree;
+		ref->tree = RB_ROOT;
+		ref->cache = NULL;
+		retire = true;
 	}
-	ref->tree = RB_ROOT;
-}
 
-static void
-__active_retire(struct i915_active *ref)
-{
-	GEM_BUG_ON(!ref->count);
-	if (--ref->count)
+	mutex_unlock(&ref->mutex);
+	if (!retire)
 		return;
 
-	/* return the unused nodes to our slabcache */
-	__active_park(ref);
-
 	ref->retire(ref);
-}
 
-static void
-node_retire(struct i915_active_request *base, struct i915_request *rq)
-{
-	__active_retire(container_of(base, struct active_node, base)->ref);
+	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
+		GEM_BUG_ON(i915_active_request_isset(&it->base));
+		kmem_cache_free(global.slab_cache, it);
+	}
 }
 
 static void
-last_retire(struct i915_active_request *base, struct i915_request *rq)
+node_retire(struct i915_active_request *base, struct i915_request *rq)
 {
-	__active_retire(container_of(base, struct i915_active, last));
+	active_retire(container_of(base, struct active_node, base)->ref);
 }
 
 static struct i915_active_request *
 active_instance(struct i915_active *ref, u64 idx)
 {
-	struct active_node *node;
+	struct active_node *node, *prealloc;
 	struct rb_node **p, *parent;
-	struct i915_request *old;
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -79,20 +81,18 @@ active_instance(struct i915_active *ref, u64 idx)
 	 * at all. We can reuse the last slot if it is empty, that is
 	 * after the previous activity has been retired, or if it matches the
 	 * current timeline.
-	 *
-	 * Note that we allow the timeline to be active simultaneously in
-	 * the rbtree and the last cache. We do this to avoid having
-	 * to search and replace the rbtree element for a new timeline, with
-	 * the cost being that we must be aware that the ref may be retired
-	 * twice for the same timeline (as the older rbtree element will be
-	 * retired before the new request added to last).
 	 */
-	old = i915_active_request_raw(&ref->last, BKL(ref));
-	if (!old || old->fence.context == idx)
-		goto out;
+	node = READ_ONCE(ref->cache);
+	if (node && node->timeline == idx)
+		return &node->base;
 
-	/* Move the currently active fence into the rbtree */
-	idx = old->fence.context;
+	/* Preallocate a replacement, just in case */
+	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+	if (!prealloc)
+		return NULL;
+
+	mutex_lock(&ref->mutex);
+	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	parent = NULL;
 	p = &ref->tree.rb_node;
@@ -100,8 +100,10 @@ active_instance(struct i915_active *ref, u64 idx)
 		parent = *p;
 
 		node = rb_entry(parent, struct active_node, node);
-		if (node->timeline == idx)
-			goto replace;
+		if (node->timeline == idx) {
+			kmem_cache_free(global.slab_cache, prealloc);
+			goto out;
+		}
 
 		if (node->timeline < idx)
 			p = &parent->rb_right;
@@ -109,17 +111,7 @@ active_instance(struct i915_active *ref, u64 idx)
 			p = &parent->rb_left;
 	}
 
-	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-
-	/* kmalloc may retire the ref->last (thanks shrinker)! */
-	if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
-		kmem_cache_free(global.slab_cache, node);
-		goto out;
-	}
-
-	if (unlikely(!node))
-		return ERR_PTR(-ENOMEM);
-
+	node = prealloc;
 	i915_active_request_init(&node->base, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
@@ -127,38 +119,27 @@ active_instance(struct i915_active *ref, u64 idx)
 	rb_link_node(&node->node, parent, p);
 	rb_insert_color(&node->node, &ref->tree);
 
-replace:
-	/*
-	 * Overwrite the previous active slot in the rbtree with last,
-	 * leaving last zeroed. If the previous slot is still active,
-	 * we must be careful as we now only expect to receive one retire
-	 * callback not two, and so much undo the active counting for the
-	 * overwritten slot.
-	 */
-	if (i915_active_request_isset(&node->base)) {
-		/* Retire ourselves from the old rq->active_list */
-		__list_del_entry(&node->base.link);
-		ref->count--;
-		GEM_BUG_ON(!ref->count);
-	}
-	GEM_BUG_ON(list_empty(&ref->last.link));
-	list_replace_init(&ref->last.link, &node->base.link);
-	node->base.request = fetch_and_zero(&ref->last.request);
-
 out:
-	return &ref->last;
+	ref->cache = node;
+	mutex_unlock(&ref->mutex);
+
+	return &node->base;
 }
 
-void i915_active_init(struct drm_i915_private *i915,
-		      struct i915_active *ref,
-		      void (*retire)(struct i915_active *ref))
+void __i915_active_init(struct drm_i915_private *i915,
+			struct i915_active *ref,
+			int (*active)(struct i915_active *ref),
+			void (*retire)(struct i915_active *ref),
+			struct lock_class_key *key)
 {
 	ref->i915 = i915;
+	ref->active = active;
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
-	i915_active_request_init(&ref->last, NULL, last_retire);
+	ref->cache = NULL;
 	init_llist_head(&ref->barriers);
-	ref->count = 0;
+	atomic_set(&ref->count, 0);
+	__mutex_init(&ref->mutex, "i915_active", key);
 }
 
 int i915_active_ref(struct i915_active *ref,
@@ -166,60 +147,80 @@ int i915_active_ref(struct i915_active *ref,
 		    struct i915_request *rq)
 {
 	struct i915_active_request *active;
-	int err = 0;
+	int err;
 
 	/* Prevent reaping in case we malloc/wait while building the tree */
-	i915_active_acquire(ref);
+	err = i915_active_acquire(ref);
+	if (err)
+		return err;
 
 	active = active_instance(ref, timeline);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
+	if (!active) {
+		err = -ENOMEM;
 		goto out;
 	}
 
 	if (!i915_active_request_isset(active))
-		ref->count++;
+		atomic_inc(&ref->count);
 	__i915_active_request_set(active, rq);
 
-	GEM_BUG_ON(!ref->count);
 out:
 	i915_active_release(ref);
 	return err;
 }
 
-bool i915_active_acquire(struct i915_active *ref)
+int i915_active_acquire(struct i915_active *ref)
 {
-	lockdep_assert_held(BKL(ref));
-	return !ref->count++;
+	int err;
+
+	if (atomic_add_unless(&ref->count, 1, 0))
+		return 0;
+
+	err = mutex_lock_interruptible(&ref->mutex);
+	if (err)
+		return err;
+
+	if (!atomic_read(&ref->count) && ref->active)
+		err = ref->active(ref);
+	if (!err)
+		atomic_inc(&ref->count);
+
+	mutex_unlock(&ref->mutex);
+
+	return err;
 }
 
 void i915_active_release(struct i915_active *ref)
 {
-	lockdep_assert_held(BKL(ref));
-	__active_retire(ref);
+	active_retire(ref);
 }
 
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int ret = 0;
+	int err;
 
-	if (i915_active_acquire(ref))
-		goto out_release;
+	might_sleep();
+	if (RB_EMPTY_ROOT(&ref->tree))
+		return 0;
 
-	ret = i915_active_request_retire(&ref->last, BKL(ref));
-	if (ret)
-		goto out_release;
+	err = mutex_lock_interruptible(&ref->mutex);
+	if (err)
+		return err;
+
+	if (!atomic_add_unless(&ref->count, 1, 0))
+		goto unlock;
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		ret = i915_active_request_retire(&it->base, BKL(ref));
-		if (ret)
+		err = i915_active_request_retire(&it->base, BKL(ref));
+		if (err)
 			break;
 	}
 
-out_release:
-	i915_active_release(ref);
-	return ret;
+	active_retire(ref);
+unlock:
+	mutex_unlock(&ref->mutex);
+	return err;
 }
 
 int i915_request_await_active_request(struct i915_request *rq,
@@ -234,23 +235,24 @@ int i915_request_await_active_request(struct i915_request *rq,
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int err = 0;
+	int err;
 
-	/* await allocates and so we need to avoid hitting the shrinker */
-	if (i915_active_acquire(ref))
-		goto out; /* was idle */
+	if (RB_EMPTY_ROOT(&ref->tree))
+		return 0;
 
-	err = i915_request_await_active_request(rq, &ref->last);
+	/* await allocates and so we need to avoid hitting the shrinker */
+	err = i915_active_acquire(ref);
 	if (err)
-		goto out;
+		return err;
 
+	mutex_lock(&ref->mutex);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		err = i915_request_await_active_request(rq, &it->base);
 		if (err)
-			goto out;
+			break;
 	}
+	mutex_unlock(&ref->mutex);
 
-out:
 	i915_active_release(ref);
 	return err;
 }
@@ -258,9 +260,9 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
-	GEM_BUG_ON(i915_active_request_isset(&ref->last));
 	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
-	GEM_BUG_ON(ref->count);
+	GEM_BUG_ON(atomic_read(&ref->count));
+	mutex_destroy(&ref->mutex);
 }
 #endif
 
@@ -286,7 +288,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					 (void *)engine, node_retire);
 		node->timeline = kctx->ring->timeline->fence_context;
 		node->ref = ref;
-		ref->count++;
+		atomic_inc(&ref->count);
 
 		llist_add((struct llist_node *)&node->base.link,
 			  &ref->barriers);
@@ -299,8 +301,9 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 {
 	struct llist_node *pos, *next;
 
-	i915_active_acquire(ref);
+	GEM_BUG_ON(i915_active_is_idle(ref));
 
+	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
 	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
 		struct intel_engine_cs *engine;
 		struct active_node *node;
@@ -329,7 +332,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		llist_add((struct llist_node *)&node->base.link,
 			  &engine->barrier_tasks);
 	}
-	i915_active_release(ref);
+	mutex_unlock(&ref->mutex);
 }
 
 void i915_request_add_barriers(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index d55d37673944..6fa9263ca407 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -369,9 +369,16 @@ i915_active_request_retire(struct i915_active_request *active,
  * synchronisation.
  */
 
-void i915_active_init(struct drm_i915_private *i915,
-		      struct i915_active *ref,
-		      void (*retire)(struct i915_active *ref));
+void __i915_active_init(struct drm_i915_private *i915,
+			struct i915_active *ref,
+			int (*active)(struct i915_active *ref),
+			void (*retire)(struct i915_active *ref),
+			struct lock_class_key *key);
+#define i915_active_init(i915, ref, active, retire) do {		\
+	static struct lock_class_key __key;				\
+									\
+	__i915_active_init(i915, ref, active, retire, &__key);		\
+} while (0)
 
 int i915_active_ref(struct i915_active *ref,
 		    u64 timeline,
@@ -384,20 +391,14 @@ int i915_request_await_active(struct i915_request *rq,
 int i915_request_await_active_request(struct i915_request *rq,
 				      struct i915_active_request *active);
 
-bool i915_active_acquire(struct i915_active *ref);
-
-static inline void i915_active_cancel(struct i915_active *ref)
-{
-	GEM_BUG_ON(ref->count != 1);
-	ref->count = 0;
-}
-
+int i915_active_acquire(struct i915_active *ref);
 void i915_active_release(struct i915_active *ref);
+void __i915_active_release_nested(struct i915_active *ref, int subclass);
 
 static inline bool
 i915_active_is_idle(const struct i915_active *ref)
 {
-	return !ref->count;
+	return !atomic_read(&ref->count);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index c025991b9233..5b0a3024ce24 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,7 +7,9 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/atomic.h>
 #include <linux/llist.h>
+#include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 
@@ -24,13 +26,17 @@ struct i915_active_request {
 	i915_active_retire_fn retire;
 };
 
+struct active_node;
+
 struct i915_active {
 	struct drm_i915_private *i915;
 
+	struct active_node *cache;
 	struct rb_root tree;
-	struct i915_active_request last;
-	unsigned int count;
+	struct mutex mutex;
+	atomic_t count;
 
+	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
 
 	struct llist_head barriers;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7be72388b052..611ced79fd09 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2061,7 +2061,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	i915_active_init(i915, &vma->active, NULL);
+	i915_active_init(i915, &vma->active, NULL, NULL);
 	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index c311ce9c6f9d..e2f9336bab83 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -148,6 +148,15 @@ static void __cacheline_retire(struct i915_active *active)
 		__idle_cacheline_free(cl);
 }
 
+static int __cacheline_active(struct i915_active *active)
+{
+	struct i915_timeline_cacheline *cl =
+		container_of(active, typeof(*cl), active);
+
+	__i915_vma_pin(cl->hwsp->vma);
+	return 0;
+}
+
 static struct i915_timeline_cacheline *
 cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
 {
@@ -170,15 +179,16 @@ cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
-	i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire);
+	i915_active_init(hwsp_to_i915(hwsp), &cl->active,
+			 __cacheline_active, __cacheline_retire);
 
 	return cl;
 }
 
 static void cacheline_acquire(struct i915_timeline_cacheline *cl)
 {
-	if (cl && i915_active_acquire(&cl->active))
-		__i915_vma_pin(cl->hwsp->vma);
+	if (cl)
+		i915_active_acquire(&cl->active);
 }
 
 static void cacheline_release(struct i915_timeline_cacheline *cl)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f782919ff19b..f02128658055 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -77,11 +77,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
-static void __i915_vma_retire(struct i915_active *ref)
+static inline struct i915_vma *active_to_vma(struct i915_active *ref)
 {
-	struct i915_vma *vma = container_of(ref, typeof(*vma), active);
+	return container_of(ref, typeof(struct i915_vma), active);
+}
 
-	i915_vma_put(vma);
+static int __i915_vma_active(struct i915_active *ref)
+{
+	i915_vma_get(active_to_vma(ref));
+	return 0;
+}
+
+static void __i915_vma_retire(struct i915_active *ref)
+{
+	i915_vma_put(active_to_vma(ref));
 }
 
 static struct i915_vma *
@@ -106,7 +115,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
+	i915_active_init(vm->i915, &vma->active,
+			 __i915_vma_active, __i915_vma_retire);
 	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	INIT_LIST_HEAD(&vma->closed_link);
@@ -903,11 +913,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (i915_active_acquire(&vma->active))
-		i915_vma_get(vma);
-
 	err = i915_active_ref(&vma->active, rq->fence.context, rq);
-	i915_active_release(&vma->active);
 	if (unlikely(err))
 		return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index cc1ca4be1a00..3b3ca5658122 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -36,14 +36,12 @@ static int __live_active_setup(struct drm_i915_private *i915,
 	if (!submit)
 		return -ENOMEM;
 
-	i915_active_init(i915, &active->base, __live_active_retire);
+	i915_active_init(i915, &active->base, NULL, __live_active_retire);
 	active->retired = false;
 
-	if (!i915_active_acquire(&active->base)) {
-		pr_err("First i915_active_acquire should report being idle\n");
-		err = -EINVAL;
+	err = i915_active_acquire(&active->base);
+	if (err)
 		goto out;
-	}
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -74,9 +72,9 @@ static int __live_active_setup(struct drm_i915_private *i915,
 		pr_err("i915_active retired before submission!\n");
 		err = -EINVAL;
 	}
-	if (active->base.count != count) {
+	if (atomic_read(&active->base.count) != count) {
 		pr_err("i915_active not tracking all requests, found %d, expected %d\n",
-		       active->base.count, count);
+		       atomic_read(&active->base.count), count);
 		err = -EINVAL;
 	}
 
-- 
2.20.1