[PATCH 37/72] rt-active

Tue Feb 6 20:57:37 UTC 2018

---
 drivers/gpu/drm/i915/i915_drv.h            |   5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  85 ------------------
 drivers/gpu/drm/i915/i915_gpu_error.c      |  14 +--
 drivers/gpu/drm/i915/i915_vma.c            | 137 ++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.h            |  41 +++------
 5 files changed, 128 insertions(+), 154 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 10d77e831830..3b198567981b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -603,7 +603,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
@@ -3302,9 +3302,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct drm_i915_gem_request *req,
-					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e1ca67c995c9..5cf9e3f78f14 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1734,7 +1734,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
-		struct drm_i915_gem_request *order;
 
 		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_gem_capture_list *capture;
@@ -1765,29 +1764,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		/*
-		 * XXX As we allow multiple queues to share the vma, but
-		 * with different timelines, yet we rely on a single
-		 * timeline through the vm (for activity tracking
-		 * see i915_vma_move_to_active()/i915_vma_retire()) we impose
-		 * that ordering constraint on the different timelines here.
-		 *
-		 * Note that this ordering constraint is undesirable as we
-		 * want to keep our weakly ordered reads through the GEM
-		 * interface. That will require us to be able to track
-		 * multiple timelines (lifting the current limit of one
-		 * per engine), like struct reservation_object but coupled
-		 * into our activity tracking.
-		 */
-		order = i915_gem_active_peek(&vma->last_read[eb->engine->id],
-					     &eb->i915->drm.struct_mutex);
-		if (order) {
-			err = i915_gem_request_await_dma_fence(eb->request,
-							       &order->fence);
-			if (err)
-				return err;
-		}
-
 		if (flags & EXEC_OBJECT_ASYNC)
 			continue;
 
@@ -1845,67 +1821,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 	return true;
 }
 
-static void export_fence(struct i915_vma *vma,
-			 struct drm_i915_gem_request *req,
-			 unsigned int flags)
-{
-	struct reservation_object *resv = vma->resv;
-
-	/*
-	 * Ignore errors from failing to allocate the new fence, we can't
-	 * handle an error right now. Worst case should be missed
-	 * synchronisation leading to rendering corruption.
-	 */
-	reservation_object_lock(resv, NULL);
-	if (flags & EXEC_OBJECT_WRITE)
-		reservation_object_add_excl_fence(resv, &req->fence);
-	else if (reservation_object_reserve_shared(resv) == 0)
-		reservation_object_add_shared_fence(resv, &req->fence);
-	reservation_object_unlock(resv);
-}
-
-int i915_vma_move_to_active(struct i915_vma *vma,
-			    struct drm_i915_gem_request *req,
-			    unsigned int flags)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = req->engine->id;
-
-	lockdep_assert_held(&req->i915->drm.struct_mutex);
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-
-	/*
-	 * Add a reference if we're newly entering the active list.
-	 * The order in which we add operations to the retirement queue is
-	 * vital here: mark_active adds to the start of the callback list,
-	 * such that subsequent callbacks are called first. Therefore we
-	 * add the active reference first and queue for it to be dropped
-	 * *last*.
-	 */
-	if (!i915_vma_is_active(vma))
-		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], req);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
-
-	obj->base.write_domain = 0;
-	if (flags & EXEC_OBJECT_WRITE) {
-		obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
-
-		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
-			i915_gem_active_set(&obj->frontbuffer_write, req);
-
-		obj->base.read_domains = 0;
-	}
-	obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
-
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, req);
-
-	export_fence(vma, req, flags);
-	return 0;
-}
-
 static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
 	u32 *cs;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7f82c6062c44..59d2c782204b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -334,21 +334,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1010,13 +1005,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e0e7c48f45dc..46d079afef0a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -30,18 +30,23 @@
 
 #include <drm/drm_gem.h>
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active,
+i915_vma_retire(struct i915_gem_active *base,
 		struct drm_i915_gem_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+	struct i915_vma *vma = active->vma;
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
+	radix_tree_delete(&vma->active, rq->fence.context);
+	kfree(active);
 
-	i915_vma_clear_active(vma, idx);
 	if (i915_vma_is_active(vma))
 		return;
 
@@ -85,7 +90,6 @@ vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
@@ -94,8 +98,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	INIT_RADIX_TREE(&vma->active, GFP_KERNEL);
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->obj = obj;
@@ -687,15 +691,11 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
 static void i915_vma_destroy(struct i915_vma *vma)
 {
-	int i;
-
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(!i915_vma_is_closed(vma));
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -753,10 +753,93 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+static void export_fence(struct i915_vma *vma,
+			 struct drm_i915_gem_request *req,
+			 unsigned int flags)
+{
+	struct reservation_object *resv = vma->resv;
+
+	/*
+	 * Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	reservation_object_lock(resv, NULL);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &req->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &req->fence);
+	reservation_object_unlock(resv);
+}
+
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct drm_i915_gem_request *req,
+			    unsigned int flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_vma_active *active;
+	bool inactive;
+
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+	inactive = !i915_vma_is_active(vma);
+
+	active = radix_tree_lookup(&vma->active, req->fence.context);
+	if (!active) {
+		int err;
+
+		active = kmalloc(sizeof(*active), GFP_KERNEL);
+		if (!active)
+			return -ENOMEM;
+
+		init_request_active(&active->base, i915_vma_retire);
+		active->vma = vma;
+
+		err = radix_tree_insert(&vma->active,
+				       	req->fence.context,
+				       	active);
+		if (err) {
+			kfree(active);
+			return err;
+		}
+	}
+	i915_gem_active_set(&active->base, req);
+
+	/*
+	 * Add a reference if we're newly entering the active list.
+	 * The order in which we add operations to the retirement queue is
+	 * vital here: mark_active adds to the start of the callback list,
+	 * such that subsequent callbacks are called first. Therefore we
+	 * add the active reference first and queue for it to be dropped
+	 * *last*.
+	 */
+	if (inactive)
+		obj->active_count++;
+
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+
+	obj->base.write_domain = 0;
+	if (flags & EXEC_OBJECT_WRITE) {
+		obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+
+		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+			i915_gem_active_set(&obj->frontbuffer_write, req);
+
+		obj->base.read_domains = 0;
+	}
+	obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_gem_active_set(&vma->last_fence, req);
+
+	export_fence(vma, req, flags);
+	return 0;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -765,9 +848,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct radix_tree_iter iter;
+		void __rcu **slot;
 
 		/* When a closed VMA is retired, it is unbound - eek.
 		 * In order to prevent it from being recursively closed,
@@ -783,18 +866,24 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rcu_read_lock();
+		radix_tree_for_each_slot(slot, &vma->active, &iter, 0) {
+			struct i915_vma_active *active =
+				rcu_dereference_raw(*slot);
+			rcu_read_unlock();
+
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
+				goto unpin;
 
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+			rcu_read_lock();
 		}
+		rcu_read_unlock();
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index fd5b84904f7c..d5e36b107ae4 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/radix-tree.h>
 
 #include <drm/drm_mm.h>
 
@@ -92,8 +93,7 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct radix_tree_root active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -134,6 +134,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return !radix_tree_empty(&vma->active);
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct drm_i915_gem_request *req,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -183,34 +192,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.16.1