[Intel-gfx] [PATCH v3] drm/i915: Track vma activity per fence.context, not per engine

Wed Jul 4 11:19:44 UTC 2018

On 04/07/2018 10:13, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
> 
> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> index is fraught with aliasing of unsigned longs).
> v3: s/lookup_active/active_instance/ because we can never agree on names
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c   |   5 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
>   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
>   drivers/gpu/drm/i915/i915_request.h   |   1 +
>   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
>   6 files changed, 100 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..216463fb3a1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
> -	int i;
>   
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2005,14 +2004,14 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], NULL);
>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
>   	vma->ops = &pd_vma_ops;
>   	vma->private = ppgtt;
>   
> +	vma->active = RB_ROOT;
> +
>   	vma->size = size;
>   	vma->fence_size = size;
>   	vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   				struct drm_i915_error_buffer *err,
>   				int count)
>   {
> -	int i;
> -
>   	err_printf(m, "%s [%d]:\n", name, count);
>   
>   	while (count--) {
> -		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> +		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
>   			   upper_32_bits(err->gtt_offset),
>   			   lower_32_bits(err->gtt_offset),
>   			   err->size,
>   			   err->read_domains,
> -			   err->write_domain);
> -		for (i = 0; i < I915_NUM_ENGINES; i++)
> -			err_printf(m, "%02x ", err->rseqno[i]);
> -
> -		err_printf(m, "] %02x", err->wseqno);
> +			   err->write_domain,
> +			   err->wseqno);
>   		err_puts(m, tiling_flag(err->tiling));
>   		err_puts(m, dirty_flag(err->dirty));
>   		err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   		       struct i915_vma *vma)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int i;
>   
>   	err->size = obj->base.size;
>   	err->name = obj->base.name;
>   
> -	for (i = 0; i < I915_NUM_ENGINES; i++)
> -		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
>   	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
>   	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
>   	struct drm_i915_error_buffer {
>   		u32 size;
>   		u32 name;
> -		u32 rseqno[I915_NUM_ENGINES], wseqno;
> +		u32 wseqno;
>   		u64 gtt_offset;
>   		u32 read_domains;
>   		u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
>   init_request_active(struct i915_gem_active *active,
>   		    i915_gem_retire_fn retire)
>   {
> +	RCU_INIT_POINTER(active->request, NULL);
>   	INIT_LIST_HEAD(&active->link);
>   	active->retire = retire ?: i915_gem_retire_noop;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index c608579d03e9..4a2bafffb231 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> +struct i915_vma_active {
> +	struct i915_gem_active base;
> +	struct i915_vma *vma;
> +	struct rb_node node;
> +	u64 timeline;
> +};
> +
>   static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   {
> -	const unsigned int idx = rq->engine->id;
> -	struct i915_vma *vma =
> -		container_of(active, struct i915_vma, last_read[idx]);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> -	i915_vma_clear_active(vma, idx);
> -	if (i915_vma_is_active(vma))
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	if (--vma->active_count)
>   		return;
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
>   	}
>   }
>   
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	struct i915_vma_active *active =
> +		container_of(base, typeof(*active), base);
> +
> +	__i915_vma_retire(active->vma, rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   	struct rb_node *rb, **p;
> -	int i;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
>   	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], i915_vma_retire);
> +	vma->active = RB_ROOT;
> +
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	int i;
> +	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	if (!i915_vma_is_ggtt(vma))
>   		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>   
> +	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> +		kfree(iter);
> +	}
> +
>   	kmem_cache_free(i915->vmas, vma);
>   }
>   
> @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> +static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> +{
> +	struct i915_vma_active *active;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &vma->active.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		active = rb_entry(parent, struct i915_vma_active, node);
> +		if (active->timeline == idx)
> +			return &active->base;
> +
> +		if (active->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	active = kmalloc(sizeof(*active), GFP_KERNEL);
> +	if (unlikely(!active))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&active->base, i915_vma_retire);
> +	active->vma = vma;
> +	active->timeline = idx;
> +
> +	rb_link_node(&active->node, parent, p);
> +	rb_insert_color(&active->node, &vma->active);
> +
> +	return &active->base;
> +}
> +
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> +	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> +	active = active_instance(vma, rq->fence.context);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_vma_is_active(vma))
> +	if (!i915_gem_active_isset(active) && !vma->active_count++) {
> +		list_move_tail(&vma->vm_link, &vma->vm->active_list);
>   		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +	}
> +	i915_gem_active_set(active, rq);
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	GEM_BUG_ON(!obj->active_count);
>   
>   	obj->write_domain = 0;
>   	if (flags & EXEC_OBJECT_WRITE) {
> @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
> -	unsigned long active;
>   	int ret;
>   
>   	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 * have side-effects such as unpinning or even unbinding this vma.
>   	 */
>   	might_sleep();
> -	active = i915_vma_get_active(vma);
> -	if (active) {
> -		int idx;
> +	if (i915_vma_is_active(vma)) {
> +		struct i915_vma_active *active, *n;
>   
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
> @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		for_each_active(active, idx) {
> -			ret = i915_gem_active_retire(&vma->last_read[idx],
> +		rbtree_postorder_for_each_entry_safe(active, n,
> +						     &vma->active, node) {
> +			ret = i915_gem_active_retire(&active->base,
>   						     &vma->vm->i915->drm.struct_mutex);
>   			if (ret)
> -				break;
> -		}
> -
> -		if (!ret) {
> -			ret = i915_gem_active_retire(&vma->last_fence,
> -						     &vma->vm->i915->drm.struct_mutex);
> +				goto unpin;
>   		}
>   
> +		ret = i915_gem_active_retire(&vma->last_fence,
> +					     &vma->vm->i915->drm.struct_mutex);
> +unpin:
>   		__i915_vma_unpin(vma);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index a218b689e418..c297b0a0dc47 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
>   #define __I915_VMA_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_mm.h>
>   
> @@ -94,8 +95,8 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(12)
>   
> -	unsigned int active;
> -	struct i915_gem_active last_read[I915_NUM_ENGINES];
> +	unsigned int active_count;
> +	struct rb_root active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>   
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> +	return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
>   {
>   	return vma->flags & I915_VMA_GGTT;
> @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>   	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>   }
>   
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> -	return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> -	return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> -				       unsigned int engine)
> -{
> -	vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> -					 unsigned int engine)
> -{
> -	vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> -					      unsigned int engine)
> -{
> -	return vma->active & BIT(engine);
> -}
> -
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
> -
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko