[Intel-gfx] [PATCH v2] drm/i915: Track the last-active inside the i915_vma
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jul 5 11:38:46 UTC 2018
On 04/07/2018 09:34, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the rbtree if
> we need a second or more concurrent active trackers.
>
> v2: Comments on how we overwrite any existing last_active cache.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_vma.h | 1 +
> 2 files changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index cd94ffc7f079..33925e00f7e8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> __i915_vma_retire(active->vma, rq);
> }
>
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> + __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
> static struct i915_vma *
> vma_create(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>
> vma->active = RB_ROOT;
>
> + init_request_active(&vma->last_active, i915_vma_last_retire);
> init_request_active(&vma->last_fence, NULL);
> vma->vm = vm;
> vma->ops = &vm->vma_ops;
> @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> {
> struct i915_vma_active *active;
> struct rb_node **p, *parent;
> + struct i915_request *old;
> +
> + /*
> + * We track the most recently used timeline to skip a rbtree search
> + * for the common case, under typical loads we never need the rbtree
> + * at all. We can reuse the last_active slot if it is empty, that is
> + * after the previous activity has been retired, or if the active
> + * matches the current timeline.
> + */
> + old = i915_gem_active_raw(&vma->last_active,
> + &vma->vm->i915->drm.struct_mutex);
> + if (!old || old->fence.context == idx)
> + goto out;
Is the situation that retire can be out of order relative to
move_to_active? In other words, last_active can retire before the rbtree
record, and so the following new move_to_active will find last_active
empty and so could create a double entry for the same timeline?
Avoiding that would defeat the caching, unless when last_active is
available we also check the tree, *if* the vma->active_count > 0?
That way we avoid creating duplicate entries.
But would still need to pull out this tree entry into last_active after
the fact.
Regards,
Tvrtko
> +
> + /* Move the currently active fence into the rbtree */
> + idx = old->fence.context;
>
> parent = NULL;
> p = &vma->active.rb_node;
> @@ -903,7 +926,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>
> active = rb_entry(parent, struct i915_vma_active, node);
> if (active->timeline == idx)
> - return &active->base;
> + goto replace;
>
> if (active->timeline < idx)
> p = &parent->rb_right;
> @@ -922,7 +945,25 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> rb_link_node(&active->node, parent, p);
> rb_insert_color(&active->node, &vma->active);
>
> - return &active->base;
> +replace:
> + /*
> + * Overwrite the previous active slot in the rbtree with last_active,
> + * leaving last_active zeroed. If the previous slot is still active,
> + * we must be careful as we now only expect to recieve one retire
> + * callback not two, and so much undo the active counting for the
> + * overwritten slot.
> + */
> + if (i915_gem_active_isset(&active->base)) {
> + __list_del_entry(&active->base.link);
> + vma->active_count--;
> + GEM_BUG_ON(!vma->active_count);
> + }
> + GEM_BUG_ON(list_empty(&vma->last_active.link));
> + list_replace_init(&vma->last_active.link, &active->base.link);
> + active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> + return &vma->last_active;
> }
>
> int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1043,11 @@ int i915_vma_unbind(struct i915_vma *vma)
> */
> __i915_vma_pin(vma);
>
> + ret = i915_gem_active_retire(&vma->last_active,
> + &vma->vm->i915->drm.struct_mutex);
> + if (ret)
> + goto unpin;
> +
> rbtree_postorder_for_each_entry_safe(active, n,
> &vma->active, node) {
> ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>
> unsigned int active_count;
> struct rb_root active;
> + struct i915_gem_active last_active;
> struct i915_gem_active last_fence;
>
> /**
>
More information about the Intel-gfx
mailing list