[Intel-gfx] [PATCH v3] drm/i915: Track vma activity per fence.context, not per engine
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jul 4 11:19:44 UTC 2018
On 04/07/2018 10:13, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
>
> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> index is fraught with aliasing of unsigned longs).
> v3: s/lookup_active/active_instance/ because we can never agree on names
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 14 +---
> drivers/gpu/drm/i915/i915_gpu_error.h | 2 +-
> drivers/gpu/drm/i915/i915_request.h | 1 +
> drivers/gpu/drm/i915/i915_vma.c | 112 +++++++++++++++++++-------
> drivers/gpu/drm/i915/i915_vma.h | 46 +++--------
> 6 files changed, 100 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..216463fb3a1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> struct drm_i915_private *i915 = ppgtt->base.vm.i915;
> struct i915_ggtt *ggtt = &i915->ggtt;
> struct i915_vma *vma;
> - int i;
>
> GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2005,14 +2004,14 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> if (!vma)
> return ERR_PTR(-ENOMEM);
>
> - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> - init_request_active(&vma->last_read[i], NULL);
> init_request_active(&vma->last_fence, NULL);
>
> vma->vm = &ggtt->vm;
> vma->ops = &pd_vma_ops;
> vma->private = ppgtt;
>
> + vma->active = RB_ROOT;
> +
> vma->size = size;
> vma->fence_size = size;
> vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> struct drm_i915_error_buffer *err,
> int count)
> {
> - int i;
> -
> err_printf(m, "%s [%d]:\n", name, count);
>
> while (count--) {
> - err_printf(m, " %08x_%08x %8u %02x %02x [ ",
> + err_printf(m, " %08x_%08x %8u %02x %02x %02x",
> upper_32_bits(err->gtt_offset),
> lower_32_bits(err->gtt_offset),
> err->size,
> err->read_domains,
> - err->write_domain);
> - for (i = 0; i < I915_NUM_ENGINES; i++)
> - err_printf(m, "%02x ", err->rseqno[i]);
> -
> - err_printf(m, "] %02x", err->wseqno);
> + err->write_domain,
> + err->wseqno);
> err_puts(m, tiling_flag(err->tiling));
> err_puts(m, dirty_flag(err->dirty));
> err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> struct i915_vma *vma)
> {
> struct drm_i915_gem_object *obj = vma->obj;
> - int i;
>
> err->size = obj->base.size;
> err->name = obj->base.name;
>
> - for (i = 0; i < I915_NUM_ENGINES; i++)
> - err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
> err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
> err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
> struct drm_i915_error_buffer {
> u32 size;
> u32 name;
> - u32 rseqno[I915_NUM_ENGINES], wseqno;
> + u32 wseqno;
> u64 gtt_offset;
> u32 read_domains;
> u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
> init_request_active(struct i915_gem_active *active,
> i915_gem_retire_fn retire)
> {
> + RCU_INIT_POINTER(active->request, NULL);
> INIT_LIST_HEAD(&active->link);
> active->retire = retire ?: i915_gem_retire_noop;
> }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index c608579d03e9..4a2bafffb231 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>
> #endif
>
> +struct i915_vma_active {
> + struct i915_gem_active base;
> + struct i915_vma *vma;
> + struct rb_node node;
> + u64 timeline;
> +};
> +
> static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> {
> - const unsigned int idx = rq->engine->id;
> - struct i915_vma *vma =
> - container_of(active, struct i915_vma, last_read[idx]);
> struct drm_i915_gem_object *obj = vma->obj;
>
> - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> - i915_vma_clear_active(vma, idx);
> - if (i915_vma_is_active(vma))
> + GEM_BUG_ON(!i915_vma_is_active(vma));
> + if (--vma->active_count)
> return;
>
> GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> }
> }
>
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> + struct i915_vma_active *active =
> + container_of(base, typeof(*active), base);
> +
> + __i915_vma_retire(active->vma, rq);
> +}
> +
> static struct i915_vma *
> vma_create(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
> {
> struct i915_vma *vma;
> struct rb_node *rb, **p;
> - int i;
>
> /* The aliasing_ppgtt should never be used directly! */
> GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
> if (vma == NULL)
> return ERR_PTR(-ENOMEM);
>
> - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> - init_request_active(&vma->last_read[i], i915_vma_retire);
> + vma->active = RB_ROOT;
> +
> init_request_active(&vma->last_fence, NULL);
> vma->vm = vm;
> vma->ops = &vm->vma_ops;
> @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
> static void __i915_vma_destroy(struct i915_vma *vma)
> {
> struct drm_i915_private *i915 = vma->vm->i915;
> - int i;
> + struct i915_vma_active *iter, *n;
>
> GEM_BUG_ON(vma->node.allocated);
> GEM_BUG_ON(vma->fence);
>
> - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> - GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
> GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>
> list_del(&vma->obj_link);
> @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
> if (!i915_vma_is_ggtt(vma))
> i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>
> + rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> + GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> + kfree(iter);
> + }
> +
> kmem_cache_free(i915->vmas, vma);
> }
>
> @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
> reservation_object_unlock(resv);
> }
>
> +static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> +{
> + struct i915_vma_active *active;
> + struct rb_node **p, *parent;
> +
> + parent = NULL;
> + p = &vma->active.rb_node;
> + while (*p) {
> + parent = *p;
> +
> + active = rb_entry(parent, struct i915_vma_active, node);
> + if (active->timeline == idx)
> + return &active->base;
> +
> + if (active->timeline < idx)
> + p = &parent->rb_right;
> + else
> + p = &parent->rb_left;
> + }
> +
> + active = kmalloc(sizeof(*active), GFP_KERNEL);
> + if (unlikely(!active))
> + return ERR_PTR(-ENOMEM);
> +
> + init_request_active(&active->base, i915_vma_retire);
> + active->vma = vma;
> + active->timeline = idx;
> +
> + rb_link_node(&active->node, parent, p);
> + rb_insert_color(&active->node, &vma->active);
> +
> + return &active->base;
> +}
> +
> int i915_vma_move_to_active(struct i915_vma *vma,
> struct i915_request *rq,
> unsigned int flags)
> {
> struct drm_i915_gem_object *obj = vma->obj;
> - const unsigned int idx = rq->engine->id;
> + struct i915_gem_active *active;
>
> lockdep_assert_held(&rq->i915->drm.struct_mutex);
> GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>
> + active = active_instance(vma, rq->fence.context);
> + if (IS_ERR(active))
> + return PTR_ERR(active);
> +
> /*
> * Add a reference if we're newly entering the active list.
> * The order in which we add operations to the retirement queue is
> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> * add the active reference first and queue for it to be dropped
> * *last*.
> */
> - if (!i915_vma_is_active(vma))
> + if (!i915_gem_active_isset(active) && !vma->active_count++) {
> + list_move_tail(&vma->vm_link, &vma->vm->active_list);
> obj->active_count++;
> - i915_vma_set_active(vma, idx);
> - i915_gem_active_set(&vma->last_read[idx], rq);
> - list_move_tail(&vma->vm_link, &vma->vm->active_list);
> + }
> + i915_gem_active_set(active, rq);
> + GEM_BUG_ON(!i915_vma_is_active(vma));
> + GEM_BUG_ON(!obj->active_count);
>
> obj->write_domain = 0;
> if (flags & EXEC_OBJECT_WRITE) {
> @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>
> int i915_vma_unbind(struct i915_vma *vma)
> {
> - unsigned long active;
> int ret;
>
> lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> * have side-effects such as unpinning or even unbinding this vma.
> */
> might_sleep();
> - active = i915_vma_get_active(vma);
> - if (active) {
> - int idx;
> + if (i915_vma_is_active(vma)) {
> + struct i915_vma_active *active, *n;
>
> /*
> * When a closed VMA is retired, it is unbound - eek.
> @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
> */
> __i915_vma_pin(vma);
>
> - for_each_active(active, idx) {
> - ret = i915_gem_active_retire(&vma->last_read[idx],
> + rbtree_postorder_for_each_entry_safe(active, n,
> + &vma->active, node) {
> + ret = i915_gem_active_retire(&active->base,
> &vma->vm->i915->drm.struct_mutex);
> if (ret)
> - break;
> - }
> -
> - if (!ret) {
> - ret = i915_gem_active_retire(&vma->last_fence,
> - &vma->vm->i915->drm.struct_mutex);
> + goto unpin;
> }
>
> + ret = i915_gem_active_retire(&vma->last_fence,
> + &vma->vm->i915->drm.struct_mutex);
> +unpin:
> __i915_vma_unpin(vma);
> if (ret)
> return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index a218b689e418..c297b0a0dc47 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
> #define __I915_VMA_H__
>
> #include <linux/io-mapping.h>
> +#include <linux/rbtree.h>
>
> #include <drm/drm_mm.h>
>
> @@ -94,8 +95,8 @@ struct i915_vma {
> #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
> #define I915_VMA_GGTT_WRITE BIT(12)
>
> - unsigned int active;
> - struct i915_gem_active last_read[I915_NUM_ENGINES];
> + unsigned int active_count;
> + struct rb_root active;
> struct i915_gem_active last_fence;
>
> /**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>
> void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> + return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> + struct i915_request *rq,
> + unsigned int flags);
> +
> static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
> {
> return vma->flags & I915_VMA_GGTT;
> @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
> return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
> }
>
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> - return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> - return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> - unsigned int engine)
> -{
> - vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> - unsigned int engine)
> -{
> - vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> - unsigned int engine)
> -{
> - return vma->active & BIT(engine);
> -}
> -
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> - struct i915_request *rq,
> - unsigned int flags);
> -
> static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
> {
> GEM_BUG_ON(!i915_vma_is_ggtt(vma));
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list