[PATCH 27/27] drm/i915: Generalise GPU read locks

Fri Jan 25 11:56:14 UTC 2019

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile         |   1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   2 -
 drivers/gpu/drm/i915/i915_read_lock.c | 198 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_read_lock.h |  46 ++++++
 drivers/gpu/drm/i915/i915_vma.c       | 150 ++-----------------
 drivers/gpu/drm/i915/i915_vma.h       |   9 +-
 6 files changed, 262 insertions(+), 144 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_read_lock.c
 create mode 100644 drivers/gpu/drm/i915/i915_read_lock.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8300efe60fe1..21880c26cb2e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -74,6 +74,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_tiling.o \
 	  i915_gem_userptr.o \
 	  i915_gemfs.o \
+	  i915_read_lock.o \
 	  i915_query.o \
 	  i915_request.o \
 	  i915_scheduler.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49b00996a15e..89d96d4a7628 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1923,8 +1923,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ops = &pd_vma_ops;
 	vma->private = ppgtt;
 
-	vma->active = RB_ROOT;
-
 	vma->size = size;
 	vma->fence_size = size;
 	vma->flags = I915_VMA_GGTT;
diff --git a/drivers/gpu/drm/i915/i915_read_lock.c b/drivers/gpu/drm/i915/i915_read_lock.c
new file mode 100644
index 000000000000..226ef7ed63f4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_read_lock.c
@@ -0,0 +1,198 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_read_lock.h"
+
+struct read_lock_active {
+	struct i915_gem_active base;
+	struct i915_read_lock *lock;
+	struct rb_node node;
+	u64 timeline;
+};
+
+static void
+__read_lock_retire(struct i915_read_lock *lock, struct i915_request *rq)
+{
+	if (--lock->count)
+		return;
+
+	lock->retire(lock, rq);
+}
+
+static void
+read_lock_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__read_lock_retire(container_of(base,
+					struct read_lock_active,
+					base)->lock,
+			   rq);
+}
+
+static void
+read_lock_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__read_lock_retire(container_of(base,
+					struct i915_read_lock,
+					last_active),
+			   rq);
+}
+
+static struct i915_gem_active *
+active_instance(struct i915_read_lock *lock, u64 idx)
+{
+	struct read_lock_active *active;
+	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last_active slot if it is empty, that is
+	 * after the previous activity has been retired, or if the active
+	 * matches the current timeline.
+	 *
+	 * Note that we allow the timeline to be active simultaneously in
+	 * the rbtree and the last_active cache. We do this to avoid having
+	 * to search and replace the rbtree element for a new timeline, with
+	 * the cost being that we must be aware that the lock may be retired
+	 * twice for the same timeline (as the older rbtree element will be
+	 * retired before the new request added to last_active).
+	 */
+	old = i915_gem_active_raw(&lock->last_active,
+				  &lock->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
+
+	parent = NULL;
+	p = &lock->active.rb_node;
+	while (*p) {
+		parent = *p;
+
+		active = rb_entry(parent, struct read_lock_active, node);
+		if (active->timeline == idx)
+			goto replace;
+
+		if (active->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+
+	/* kmalloc may retire the lock->last_active (thanks shrinker)! */
+	if (unlikely(!i915_gem_active_raw(&lock->last_active,
+					  &lock->i915->drm.struct_mutex))) {
+		kfree(active);
+		goto out;
+	}
+
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, read_lock_retire);
+	active->lock = lock;
+	active->timeline = idx;
+
+	rb_link_node(&active->node, parent, p);
+	rb_insert_color(&active->node, &lock->active);
+
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last_active,
+	 * leaving last_active zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to receive one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_gem_active_isset(&active->base)) {
+		/* Retire ourselves from the old rq->active_list */
+		__list_del_entry(&active->base.link);
+		lock->count--;
+		GEM_BUG_ON(!lock->count);
+	}
+	GEM_BUG_ON(list_empty(&lock->last_active.link));
+	list_replace_init(&lock->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&lock->last_active.request);
+
+out:
+	return &lock->last_active;
+}
+
+void i915_read_lock_init(struct drm_i915_private *i915,
+			 struct i915_read_lock *lock,
+			 void (*retire)(struct i915_read_lock *lock,
+					struct i915_request *rq))
+{
+	lock->i915 = i915;
+	lock->retire = retire;
+	lock->active = RB_ROOT;
+	init_request_active(&lock->last_active, read_lock_last_retire);
+}
+
+int i915_read_lock(struct i915_read_lock *lock,
+		   u64 timeline, struct i915_request *rq)
+{
+	struct i915_gem_active *active;
+
+	active = active_instance(lock, timeline);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
+	if (!i915_gem_active_isset(active))
+		lock->count++;
+	i915_gem_active_set(active, rq);
+
+	return 0;
+}
+
+void i915_read_lock_acquire(struct i915_read_lock *lock)
+{
+	lockdep_assert_held(&lock->i915->drm.struct_mutex);
+	lock->count++;
+}
+
+void i915_read_lock_release(struct i915_read_lock *lock)
+{
+	lockdep_assert_held(&lock->i915->drm.struct_mutex);
+	__read_lock_retire(lock, NULL);
+}
+
+int i915_read_lock_wait(struct i915_read_lock *lock)
+{
+	struct read_lock_active *iter, *n;
+	int ret;
+
+	ret = i915_gem_active_retire(&lock->last_active,
+				     &lock->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	rbtree_postorder_for_each_entry_safe(iter, n, &lock->active, node) {
+		ret = i915_gem_active_retire(&iter->base,
+					     &lock->i915->drm.struct_mutex);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void i915_read_lock_fini(struct i915_read_lock *lock)
+{
+	struct read_lock_active *iter, *n;
+
+	GEM_BUG_ON(i915_gem_active_isset(&lock->last_active));
+
+	rbtree_postorder_for_each_entry_safe(iter, n, &lock->active, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+		kfree(iter);
+	}
+}
diff --git a/drivers/gpu/drm/i915/i915_read_lock.h b/drivers/gpu/drm/i915/i915_read_lock.h
new file mode 100644
index 000000000000..010310f49c1a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_read_lock.h
@@ -0,0 +1,46 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_READ_LOCK_H_
+#define _I915_READ_LOCK_H_
+
+#include <linux/rbtree.h>
+
+#include "i915_request.h"
+
+struct drm_i915_private;
+
+struct i915_read_lock {
+	struct drm_i915_private *i915;
+
+	struct rb_root active;
+	struct i915_gem_active last_active;
+	unsigned int count;
+
+	void (*retire)(struct i915_read_lock *lock, struct i915_request *rq);
+};
+
+void i915_read_lock_init(struct drm_i915_private *i915,
+			 struct i915_read_lock *lock,
+			 void (*retire)(struct i915_read_lock *lock,
+					struct i915_request *rq));
+
+int i915_read_lock(struct i915_read_lock *lock,
+		   u64 timeline,
+		   struct i915_request *rq);
+int i915_read_lock_wait(struct i915_read_lock *lock);
+
+void i915_read_lock_acquire(struct i915_read_lock *lock);
+void i915_read_lock_release(struct i915_read_lock *lock);
+
+static inline bool i915_read_lock_is_idle(const struct i915_read_lock *lock)
+{
+	return !lock->count;
+}
+
+void i915_read_lock_fini(struct i915_read_lock *lock);
+
+#endif /* _I915_READ_LOCK_H_ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d83b8ad5f859..ae74e2373458 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,22 +63,12 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
-struct i915_vma_active {
-	struct i915_gem_active base;
-	struct i915_vma *vma;
-	struct rb_node node;
-	u64 timeline;
-};
-
 static void
-__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
+__i915_vma_retire(struct i915_read_lock *lock, struct i915_request *rq)
 {
+	struct i915_vma *vma = container_of(lock, typeof(*vma), read_lock);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_is_active(vma));
-	if (--vma->active_count)
-		return;
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -107,21 +97,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	}
 }
 
-static void
-i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
-	struct i915_vma_active *active =
-		container_of(base, typeof(*active), base);
-
-	__i915_vma_retire(active->vma, rq);
-}
-
-static void
-i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
-	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
-}
-
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -137,9 +112,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vma->active = RB_ROOT;
+	i915_read_lock_init(vm->i915, &vma->read_lock, __i915_vma_retire);
 
-	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -823,7 +797,6 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
@@ -843,10 +816,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 		spin_unlock(&obj->vma.lock);
 	}
 
-	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
-		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
-		kfree(iter);
-	}
+	i915_read_lock_fini(&vma->read_lock);
 
 	kmem_cache_free(i915->vmas, vma);
 }
@@ -931,104 +901,15 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
-static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
-{
-	struct i915_vma_active *active;
-	struct rb_node **p, *parent;
-	struct i915_request *old;
-
-	/*
-	 * We track the most recently used timeline to skip a rbtree search
-	 * for the common case, under typical loads we never need the rbtree
-	 * at all. We can reuse the last_active slot if it is empty, that is
-	 * after the previous activity has been retired, or if the active
-	 * matches the current timeline.
-	 *
-	 * Note that we allow the timeline to be active simultaneously in
-	 * the rbtree and the last_active cache. We do this to avoid having
-	 * to search and replace the rbtree element for a new timeline, with
-	 * the cost being that we must be aware that the vma may be retired
-	 * twice for the same timeline (as the older rbtree element will be
-	 * retired before the new request added to last_active).
-	 */
-	old = i915_gem_active_raw(&vma->last_active,
-				  &vma->vm->i915->drm.struct_mutex);
-	if (!old || old->fence.context == idx)
-		goto out;
-
-	/* Move the currently active fence into the rbtree */
-	idx = old->fence.context;
-
-	parent = NULL;
-	p = &vma->active.rb_node;
-	while (*p) {
-		parent = *p;
-
-		active = rb_entry(parent, struct i915_vma_active, node);
-		if (active->timeline == idx)
-			goto replace;
-
-		if (active->timeline < idx)
-			p = &parent->rb_right;
-		else
-			p = &parent->rb_left;
-	}
-
-	active = kmalloc(sizeof(*active), GFP_KERNEL);
-
-	/* kmalloc may retire the vma->last_active request (thanks shrinker)! */
-	if (unlikely(!i915_gem_active_raw(&vma->last_active,
-					  &vma->vm->i915->drm.struct_mutex))) {
-		kfree(active);
-		goto out;
-	}
-
-	if (unlikely(!active))
-		return ERR_PTR(-ENOMEM);
-
-	init_request_active(&active->base, i915_vma_retire);
-	active->vma = vma;
-	active->timeline = idx;
-
-	rb_link_node(&active->node, parent, p);
-	rb_insert_color(&active->node, &vma->active);
-
-replace:
-	/*
-	 * Overwrite the previous active slot in the rbtree with last_active,
-	 * leaving last_active zeroed. If the previous slot is still active,
-	 * we must be careful as we now only expect to receive one retire
-	 * callback not two, and so much undo the active counting for the
-	 * overwritten slot.
-	 */
-	if (i915_gem_active_isset(&active->base)) {
-		/* Retire ourselves from the old rq->active_list */
-		__list_del_entry(&active->base.link);
-		vma->active_count--;
-		GEM_BUG_ON(!vma->active_count);
-	}
-	GEM_BUG_ON(list_empty(&vma->last_active.link));
-	list_replace_init(&vma->last_active.link, &active->base.link);
-	active->base.request = fetch_and_zero(&vma->last_active.request);
-
-out:
-	return &vma->last_active;
-}
-
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	active = active_instance(vma, rq->fence.context);
-	if (IS_ERR(active))
-		return PTR_ERR(active);
-
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -1037,9 +918,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++)
+	if (!vma->read_lock.count)
 		obj->active_count++;
-	i915_gem_active_set(active, rq);
+
+	if (unlikely(i915_read_lock(&vma->read_lock, rq->fence.context, rq))) {
+		if (!vma->read_lock.count)
+			obj->active_count--;
+		return -ENOMEM;
+	}
+
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
 
@@ -1073,8 +960,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 */
 	might_sleep();
 	if (i915_vma_is_active(vma)) {
-		struct i915_vma_active *active, *n;
-
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
 		 * In order to prevent it from being recursively closed,
@@ -1090,19 +975,10 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		ret = i915_gem_active_retire(&vma->last_active,
-					     &vma->vm->i915->drm.struct_mutex);
+		ret = i915_read_lock_wait(&vma->read_lock);
 		if (ret)
 			goto unpin;
 
-		rbtree_postorder_for_each_entry_safe(active, n,
-						     &vma->active, node) {
-			ret = i915_gem_active_retire(&active->base,
-						     &vma->vm->i915->drm.struct_mutex);
-			if (ret)
-				goto unpin;
-		}
-
 		ret = i915_gem_active_retire(&vma->last_fence,
 					     &vma->vm->i915->drm.struct_mutex);
 unpin:
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index b0f6b1d904a5..a4b06557ce2e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -34,6 +34,7 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 
+#include "i915_read_lock.h"
 #include "i915_request.h"
 
 enum i915_cache_level;
@@ -108,9 +109,7 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(15)
 
-	unsigned int active_count;
-	struct rb_root active;
-	struct i915_gem_active last_active;
+	struct i915_read_lock read_lock;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
 #define I915_VMA_RELEASE_MAP BIT(0)
 
-static inline bool i915_vma_is_active(struct i915_vma *vma)
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
 {
-	return vma->active_count;
+	return !i915_read_lock_is_idle(&vma->read_lock);
 }
 
 int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-- 
2.20.1