[Intel-gfx] [PATCH 04/11] drm/i915: Generalise GPU activity tracking

Wed Jan 30 12:02:02 UTC 2019

On 30/01/2019 02:18, Chris Wilson wrote:
> We currently track GPU memory usage inside VMA, such that we never
> release memory used by the GPU until after it has finished accessing it.
> However, we may want to track other resources aside from VMA, or we may
> want to split a VMA into multiple independent regions and track each
> separately. For this purpose, generalise our request tracking (akin to
> struct reservation_object) so that we can embed it into other objects.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   4 +-
>   drivers/gpu/drm/i915/i915_active.c            | 226 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_active.h            |  66 +++++
>   drivers/gpu/drm/i915/i915_active_types.h      |  26 ++
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
>   drivers/gpu/drm/i915/i915_vma.c               | 173 +++-----------
>   drivers/gpu/drm/i915/i915_vma.h               |   9 +-
>   drivers/gpu/drm/i915/selftests/i915_active.c  | 158 ++++++++++++
>   .../drm/i915/selftests/i915_live_selftests.h  |   3 +-
>   9 files changed, 514 insertions(+), 154 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_active.c
>   create mode 100644 drivers/gpu/drm/i915/i915_active.h
>   create mode 100644 drivers/gpu/drm/i915/i915_active_types.h
>   create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 210d0e8777b6..1787e1299b1b 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>   i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>   
>   # GEM code
> -i915-y += i915_cmd_parser.o \
> +i915-y += \
> +	  i915_active.o \
> +	  i915_cmd_parser.o \
>   	  i915_gem_batch_pool.o \
>   	  i915_gem_clflush.o \
>   	  i915_gem_context.o \
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> new file mode 100644
> index 000000000000..e0182e19cb8b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -0,0 +1,226 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "i915_active.h"
> +
> +#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
> +
> +struct active_node {
> +	struct i915_gem_active base;
> +	struct i915_active *ref;
> +	struct rb_node node;
> +	u64 timeline;
> +};
> +
> +static void
> +__active_retire(struct i915_active *ref)
> +{

You wouldn't consider naming this variable 'active' throughout? Ref 
reminds me so much of a kref eg. fence->refcount. Although 'active' has 
been used for gem_active so far. Not a blocker though, I'll get used to it.

> +	GEM_BUG_ON(!ref->count);
> +	if (!--ref->count)
> +		ref->retire(ref);
> +}
> +
> +static void
> +node_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__active_retire(container_of(base, struct active_node, base)->ref);
> +}
> +
> +static void
> +last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__active_retire(container_of(base, struct i915_active, last));
> +}
> +
> +static struct i915_gem_active *
> +active_instance(struct i915_active *ref, u64 idx)
> +{
> +	struct active_node *node;
> +	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last slot if it is empty, that is
> +	 * after the previous activity has been retired, or if it matches the
> +	 * current timeline.
> +	 *
> +	 * Note that we allow the timeline to be active simultaneously in
> +	 * the rbtree and the last cache. We do this to avoid having
> +	 * to search and replace the rbtree element for a new timeline, with
> +	 * the cost being that we must be aware that the ref may be retired
> +	 * twice for the same timeline (as the older rbtree element will be
> +	 * retired before the new request added to last).
> +	 */
> +	old = i915_gem_active_raw(&ref->last, BKL(ref));
> +	if (!old || old->fence.context == idx)
> +		goto out;
> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
> +
> +	parent = NULL;
> +	p = &ref->tree.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		node = rb_entry(parent, struct active_node, node);
> +		if (node->timeline == idx)
> +			goto replace;
> +
> +		if (node->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	node = kmalloc(sizeof(*node), GFP_KERNEL);
> +
> +	/* kmalloc may retire the ref->last (thanks shrinker)! */
> +	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
> +		kfree(node);
> +		goto out;
> +	}
> +
> +	if (unlikely(!node))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&node->base, node_retire);
> +	node->ref = ref;
> +	node->timeline = idx;
> +
> +	rb_link_node(&node->node, parent, p);
> +	rb_insert_color(&node->node, &ref->tree);
> +
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last,
> +	 * leaving last zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to receive one retire
> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&node->base)) {
> +		/* Retire ourselves from the old rq->active_list */
> +		__list_del_entry(&node->base.link);
> +		ref->count--;
> +		GEM_BUG_ON(!ref->count);
> +	}
> +	GEM_BUG_ON(list_empty(&ref->last.link));
> +	list_replace_init(&ref->last.link, &node->base.link);
> +	node->base.request = fetch_and_zero(&ref->last.request);
> +
> +out:
> +	return &ref->last;
> +}
> +
> +void i915_active_init(struct drm_i915_private *i915,
> +		      struct i915_active *ref,
> +		      void (*retire)(struct i915_active *ref))
> +{
> +	ref->i915 = i915;
> +	ref->retire = retire;
> +	ref->tree = RB_ROOT;
> +	init_request_active(&ref->last, last_retire);
> +	ref->count = 0;
> +}
> +
> +int i915_active_ref(struct i915_active *ref,
> +		    u64 timeline,
> +		    struct i915_request *rq)
> +{
> +	struct i915_gem_active *active;
> +
> +	active = active_instance(ref, timeline);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
> +	if (!i915_gem_active_isset(active))
> +		ref->count++;

Could stick a super-paranoid overflow GEM_BUG_ON here.

> +	i915_gem_active_set(active, rq);
> +
> +	return 0;
> +}
> +
> +bool i915_active_acquire(struct i915_active *ref)
> +{
> +	lockdep_assert_held(BKL(ref));
> +	return !ref->count++;
> +}
> +
> +void i915_active_release(struct i915_active *ref)
> +{
> +	lockdep_assert_held(BKL(ref));
> +	__active_retire(ref);
> +}
> +
> +int i915_active_wait(struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +	int ret;
> +
> +	ret = i915_gem_active_retire(&ref->last, BKL(ref));
> +	if (ret)
> +		return ret;
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		ret = i915_gem_active_retire(&it->base, BKL(ref));
> +		if (ret)
> +			return ret;
> +
> +		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> +		kfree(it);
> +	}
> +	ref->tree = RB_ROOT;
> +
> +	return 0;
> +}
> +
> +static int __i915_request_await_active(struct i915_request *rq,
> +				       struct i915_gem_active *active)
> +{
> +	struct i915_request *barrier =
> +		i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
> +
> +	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
> +}
> +
> +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +	int ret;
> +
> +	ret = __i915_request_await_active(rq, &ref->last);
> +	if (ret)
> +		return ret;
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		ret = __i915_request_await_active(rq, &it->base);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +void i915_active_fini(struct i915_active *ref)
> +{
> +	struct active_node *it, *n;
> +
> +	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
> +
> +	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&it->base));
> +		kfree(it);
> +	}
> +	ref->tree = RB_ROOT;
> +}
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/i915_active.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> new file mode 100644
> index 000000000000..c0729a046f98
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -0,0 +1,66 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_H_
> +#define _I915_ACTIVE_H_
> +
> +#include "i915_active_types.h"
> +
> +#include <linux/rbtree.h>
> +
> +#include "i915_request.h"
> +
> +/*
> + * GPU activity tracking
> + *
> + * Each set of commands submitted to the GPU compromises a single request that
> + * signals a fence upon completion. struct i915_request combines the
> + * command submission, scheduling and fence signaling roles. If we want to see
> + * if a particular task is complete, we need to grab the fence (struct
> + * i915_request) for that task and check or wait for it to be signaled. More
> + * often though we want to track the status of a bunch of tasks, for example
> + * to wait for the GPU to finish accessing some memory across a variety of
> + * different command pipelines from different clients. We could choose to
> + * track every single request associated with the task, but knowing that
> + * each request belongs to an ordered timeline (later requests within a
> + * timeline must wait for earlier requests), we need only track the
> + * latest request in each timeline to determine the overall status of the
> + * task.
> + *
> + * struct i915_active provides this tracking across timelines. It builds a
> + * composite shared-fence, and is updated as new work is submitted to the task,
> + * forming a snapshot of the current status. It should be embedded into the
> + * different resources that need to track their associated GPU activity to
> + * provide a callback when that GPU activity has ceased, or otherwise to
> + * provide a serialisation point either for request submission or for CPU
> + * synchronisation.
> + */
> +
> +void i915_active_init(struct drm_i915_private *i915,
> +		      struct i915_active *ref,
> +		      void (*retire)(struct i915_active *ref));
> +
> +int i915_active_ref(struct i915_active *ref,
> +		    u64 timeline,
> +		    struct i915_request *rq);
> +
> +int i915_active_wait(struct i915_active *ref);
> +
> +int i915_request_await_active(struct i915_request *rq,
> +			      struct i915_active *ref);
> +
> +bool i915_active_acquire(struct i915_active *ref);
> +void i915_active_release(struct i915_active *ref);
> +
> +static inline bool
> +i915_active_is_idle(const struct i915_active *ref)
> +{
> +	return !ref->count;
> +}
> +
> +void i915_active_fini(struct i915_active *ref);
> +
> +#endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> new file mode 100644
> index 000000000000..411e502ed8dd
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -0,0 +1,26 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_ACTIVE_TYPES_H_
> +#define _I915_ACTIVE_TYPES_H_
> +
> +#include <linux/rbtree.h>
> +
> +#include "i915_request.h"
> +
> +struct drm_i915_private;
> +
> +struct i915_active {
> +	struct drm_i915_private *i915;
> +
> +	struct rb_root tree;
> +	struct i915_gem_active last;
> +	unsigned int count;
> +
> +	void (*retire)(struct i915_active *ref);
> +};
> +
> +#endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 49b00996a15e..e625659c03a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> +	i915_active_init(i915, &vma->active, NULL);
>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
>   	vma->ops = &pd_vma_ops;
>   	vma->private = ppgtt;
>   
> -	vma->active = RB_ROOT;
> -
>   	vma->size = size;
>   	vma->fence_size = size;
>   	vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d83b8ad5f859..d4772061e642 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,22 +63,23 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> -struct i915_vma_active {
> -	struct i915_gem_active base;
> -	struct i915_vma *vma;
> -	struct rb_node node;
> -	u64 timeline;
> -};
> +static void obj_bump_mru(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>   
> -static void
> -__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> +	spin_lock(&i915->mm.obj_lock);
> +	if (obj->bind_count)
> +		list_move_tail(&obj->mm.link, &i915->mm.bound_list);
> +	spin_unlock(&i915->mm.obj_lock);
> +
> +	obj->mm.dirty = true; /* be paranoid  */
> +}
> +
> +static void __i915_vma_retire(struct i915_active *ref)
>   {
> +	struct i915_vma *vma = container_of(ref, typeof(*vma), active);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_is_active(vma));
> -	if (--vma->active_count)
> -		return;
> -
>   	GEM_BUG_ON(!i915_gem_object_is_active(obj));
>   	if (--obj->active_count)
>   		return;
> @@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   		reservation_object_unlock(obj->resv);
>   	}
>   
> -	/* Bump our place on the bound list to keep it roughly in LRU order
> +	/*
> +	 * Bump our place on the bound list to keep it roughly in LRU order
>   	 * so that we don't steal from recently used but inactive objects
>   	 * (unless we are forced to ofc!)
>   	 */
> -	spin_lock(&rq->i915->mm.obj_lock);
> -	if (obj->bind_count)
> -		list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
> -	spin_unlock(&rq->i915->mm.obj_lock);
> -
> -	obj->mm.dirty = true; /* be paranoid  */
> +	obj_bump_mru(obj);
>   
>   	if (i915_gem_object_has_active_reference(obj)) {
>   		i915_gem_object_clear_active_reference(obj);
> @@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   	}
>   }
>   
> -static void
> -i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> -	struct i915_vma_active *active =
> -		container_of(base, typeof(*active), base);
> -
> -	__i915_vma_retire(active->vma, rq);
> -}
> -
> -static void
> -i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> -{
> -	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> -}
> -
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	vma->active = RB_ROOT;
> -
> -	init_request_active(&vma->last_active, i915_vma_last_retire);
> +	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
>   	init_request_active(&vma->last_fence, NULL);
> +
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
>   	vma->obj = obj;
> @@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
> @@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   		spin_unlock(&obj->vma.lock);
>   	}
>   
> -	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> -		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> -		kfree(iter);
> -	}
> +	i915_active_fini(&vma->active);
>   
>   	kmem_cache_free(i915->vmas, vma);
>   }
> @@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> -static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> -{
> -	struct i915_vma_active *active;
> -	struct rb_node **p, *parent;
> -	struct i915_request *old;
> -
> -	/*
> -	 * We track the most recently used timeline to skip a rbtree search
> -	 * for the common case, under typical loads we never need the rbtree
> -	 * at all. We can reuse the last_active slot if it is empty, that is
> -	 * after the previous activity has been retired, or if the active
> -	 * matches the current timeline.
> -	 *
> -	 * Note that we allow the timeline to be active simultaneously in
> -	 * the rbtree and the last_active cache. We do this to avoid having
> -	 * to search and replace the rbtree element for a new timeline, with
> -	 * the cost being that we must be aware that the vma may be retired
> -	 * twice for the same timeline (as the older rbtree element will be
> -	 * retired before the new request added to last_active).
> -	 */
> -	old = i915_gem_active_raw(&vma->last_active,
> -				  &vma->vm->i915->drm.struct_mutex);
> -	if (!old || old->fence.context == idx)
> -		goto out;
> -
> -	/* Move the currently active fence into the rbtree */
> -	idx = old->fence.context;
> -
> -	parent = NULL;
> -	p = &vma->active.rb_node;
> -	while (*p) {
> -		parent = *p;
> -
> -		active = rb_entry(parent, struct i915_vma_active, node);
> -		if (active->timeline == idx)
> -			goto replace;
> -
> -		if (active->timeline < idx)
> -			p = &parent->rb_right;
> -		else
> -			p = &parent->rb_left;
> -	}
> -
> -	active = kmalloc(sizeof(*active), GFP_KERNEL);
> -
> -	/* kmalloc may retire the vma->last_active request (thanks shrinker)! */
> -	if (unlikely(!i915_gem_active_raw(&vma->last_active,
> -					  &vma->vm->i915->drm.struct_mutex))) {
> -		kfree(active);
> -		goto out;
> -	}
> -
> -	if (unlikely(!active))
> -		return ERR_PTR(-ENOMEM);
> -
> -	init_request_active(&active->base, i915_vma_retire);
> -	active->vma = vma;
> -	active->timeline = idx;
> -
> -	rb_link_node(&active->node, parent, p);
> -	rb_insert_color(&active->node, &vma->active);
> -
> -replace:
> -	/*
> -	 * Overwrite the previous active slot in the rbtree with last_active,
> -	 * leaving last_active zeroed. If the previous slot is still active,
> -	 * we must be careful as we now only expect to receive one retire
> -	 * callback not two, and so much undo the active counting for the
> -	 * overwritten slot.
> -	 */
> -	if (i915_gem_active_isset(&active->base)) {
> -		/* Retire ourselves from the old rq->active_list */
> -		__list_del_entry(&active->base.link);
> -		vma->active_count--;
> -		GEM_BUG_ON(!vma->active_count);
> -	}
> -	GEM_BUG_ON(list_empty(&vma->last_active.link));
> -	list_replace_init(&vma->last_active.link, &active->base.link);
> -	active->base.request = fetch_and_zero(&vma->last_active.request);
> -
> -out:
> -	return &vma->last_active;
> -}
> -
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> -	active = active_instance(vma, rq->fence.context);
> -	if (IS_ERR(active))
> -		return PTR_ERR(active);
> -
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_gem_active_isset(active) && !vma->active_count++)
> +	if (!vma->active.count)
>   		obj->active_count++;
> -	i915_gem_active_set(active, rq);
> +
> +	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
> +		if (!vma->active.count)
> +			obj->active_count--;
> +		return -ENOMEM;
> +	}

Optionally you could make i915_active_ref return the old ref count or 
error. Then this could become simpler:

	ret = i915_active_ref(..);
	if (unlikely(ret < 0))
		return -ENOMEM;
	else if (ret == 0)
		obj->active_count++;

> +
>   	GEM_BUG_ON(!i915_vma_is_active(vma));
>   	GEM_BUG_ON(!obj->active_count);
>   
> @@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 */
>   	might_sleep();
>   	if (i915_vma_is_active(vma)) {
> -		struct i915_vma_active *active, *n;
> -
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
>   		 * In order to prevent it from being recursively closed,
> @@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		ret = i915_gem_active_retire(&vma->last_active,
> -					     &vma->vm->i915->drm.struct_mutex);
> +		ret = i915_active_wait(&vma->active);
>   		if (ret)
>   			goto unpin;
>   
> -		rbtree_postorder_for_each_entry_safe(active, n,
> -						     &vma->active, node) {
> -			ret = i915_gem_active_retire(&active->base,
> -						     &vma->vm->i915->drm.struct_mutex);
> -			if (ret)
> -				goto unpin;
> -		}
> -
>   		ret = i915_gem_active_retire(&vma->last_fence,
>   					     &vma->vm->i915->drm.struct_mutex);
>   unpin:
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5793abe509a2..3c03d4569481 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -34,6 +34,7 @@
>   #include "i915_gem_fence_reg.h"
>   #include "i915_gem_object.h"
>   
> +#include "i915_active.h"
>   #include "i915_request.h"
>   
>   enum i915_cache_level;
> @@ -108,9 +109,7 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(15)
>   
> -	unsigned int active_count;
> -	struct rb_root active;
> -	struct i915_gem_active last_active;
> +	struct i915_active active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
>   #define I915_VMA_RELEASE_MAP BIT(0)
>   
> -static inline bool i915_vma_is_active(struct i915_vma *vma)
> +static inline bool i915_vma_is_active(const struct i915_vma *vma)
>   {
> -	return vma->active_count;
> +	return !i915_active_is_idle(&vma->active);
>   }
>   
>   int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> new file mode 100644
> index 000000000000..7c5c3068565b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -0,0 +1,158 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +
> +#include "igt_flush_test.h"
> +#include "lib_sw_fence.h"
> +
> +struct live_active {
> +	struct i915_active base;
> +	bool retired;
> +};
> +
> +static void __live_active_retire(struct i915_active *base)
> +{
> +	struct live_active *active = container_of(base, typeof(*active), base);
> +
> +	active->retired = true;
> +}
> +
> +static int __live_active_setup(struct drm_i915_private *i915,
> +			       struct live_active *active)
> +{
> +	struct intel_engine_cs *engine;
> +	struct i915_sw_fence *submit;
> +	enum intel_engine_id id;
> +	unsigned int count = 0;
> +	int err = 0;
> +
> +	i915_active_init(i915, &active->base, __live_active_retire);
> +	active->retired = false;
> +
> +	if (!i915_active_acquire(&active->base)) {
> +		pr_err("First i915_active_acquire should report being idle\n");
> +		return -EINVAL;
> +	}
> +
> +	submit = heap_fence_create(GFP_KERNEL);
> +
> +	for_each_engine(engine, i915, id) {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_alloc(engine, i915->kernel_context);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> +						       submit,
> +						       GFP_KERNEL);
> +		if (err < 0) {
> +			pr_err("Failed to allocate submission fence!\n");
> +			i915_request_add(rq);
> +			break;
> +		}
> +
> +		err = i915_active_ref(&active->base, rq->fence.context, rq);
> +		if (err) {
> +			pr_err("Failed to track active ref!\n");
> +			i915_request_add(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);
> +		count++;
> +	}
> +
> +	i915_active_release(&active->base);
> +	if (active->retired) {
> +		pr_err("i915_active retired before submission!\n");
> +		err = -EINVAL;
> +	}
> +	if (active->base.count != count) {
> +		pr_err("i915_active not tracking all requests, found %d, expected %d\n",
> +		       active->base.count, count);
> +		err = -EINVAL;
> +	}
> +
> +	i915_sw_fence_commit(submit);
> +	heap_fence_put(submit);
> +
> +	return err;
> +}
> +
> +static int live_active_wait(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_active active;
> +	intel_wakeref_t wakeref;
> +	int err;
> +
> +	/* Check that we get a callback when requests upon waiting */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	err = __live_active_setup(i915, &active);
> +
> +	i915_active_wait(&active.base);
> +	if (!active.retired) {
> +		pr_err("i915_active not retired after waiting!\n");
> +		err = -EINVAL;
> +	}
> +
> +	i915_active_fini(&active.base);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
> +static int live_active_retire(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_active active;
> +	intel_wakeref_t wakeref;
> +	int err;
> +
> +	/* Check that we get a callback when requests are indirectly retired */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	err = __live_active_setup(i915, &active);
> +
> +	/* waits for & retires all requests */
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	if (!active.retired) {
> +		pr_err("i915_active not retired after flushing!\n");
> +		err = -EINVAL;
> +	}
> +
> +	i915_active_fini(&active.base);
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
> +int i915_active_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(live_active_wait),
> +		SUBTEST(live_active_retire),
> +	};
> +
> +	if (i915_terminally_wedged(&i915->gpu_error))
> +		return 0;
> +
> +	return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index 76b4f87fc853..6d766925ad04 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -12,8 +12,9 @@
>   selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
>   selftest(uncore, intel_uncore_live_selftests)
>   selftest(workarounds, intel_workarounds_live_selftests)
> -selftest(requests, i915_request_live_selftests)
>   selftest(timelines, i915_timeline_live_selftests)
> +selftest(requests, i915_request_live_selftests)
> +selftest(active, i915_active_live_selftests)
>   selftest(objects, i915_gem_object_live_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_live_selftests)
>   selftest(coherency, i915_gem_coherency_live_selftests)
> 

Body looks good. I left selftests for after I get a further in the series.

Regards,

Tvrtko