[Intel-gfx] [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Jul 3 17:40:23 UTC 2018
On 29/06/2018 23:54, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the radixtree if
Is a rbtree now.
> we need a second or more concurrent active trackers.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_vma.h | 1 +
> 2 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 2faad2a1d00e..9b69d24c5cf1 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> __i915_vma_retire(active->vma, rq);
> }
>
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> + __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
> static struct i915_vma *
> vma_create(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>
> vma->active = RB_ROOT;
>
> + init_request_active(&vma->last_active, i915_vma_last_retire);
> init_request_active(&vma->last_fence, NULL);
> vma->vm = vm;
> vma->ops = &vm->vma_ops;
> @@ -895,6 +902,15 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> {
> struct i915_vma_active *active;
> struct rb_node **p, *parent;
> + struct i915_request *old;
> +
> + old = i915_gem_active_raw(&vma->last_active,
> + &vma->vm->i915->drm.struct_mutex);
> + if (!old || old->fence.context == idx)
> + goto out;
Put a comment for this block explaining the caching optimization.
> +
> + /* Move the currently active fence into the rbtree */
> + idx = old->fence.context;
I find "currently active fence" a bit confusing. It is just the "last
accessed/cached fence", no?
>
> parent = NULL;
> p = &vma->active.rb_node;
> @@ -903,7 +919,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>
> active = rb_entry(parent, struct i915_vma_active, node);
> if (active->timeline == idx)
> - return &active->base;
> + goto replace;
>
> if (active->timeline < idx)
> p = &parent->rb_right;
> @@ -922,7 +938,18 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> rb_link_node(&active->node, parent, p);
> rb_insert_color(&active->node, &vma->active);
>
> - return &active->base;
> +replace:
> + if (i915_gem_active_isset(&active->base)) {
> + __list_del_entry(&active->base.link);
> + vma->active_count--;
> + GEM_BUG_ON(!vma->active_count);
> + }
When does this trigger? Please put a comment for this block.
> + GEM_BUG_ON(list_empty(&vma->last_active.link));
> + list_replace_init(&vma->last_active.link, &active->base.link);
> + active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> + return &vma->last_active;
> }
>
> int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1029,11 @@ int i915_vma_unbind(struct i915_vma *vma)
> */
> __i915_vma_pin(vma);
>
> + ret = i915_gem_active_retire(&vma->last_active,
> + &vma->vm->i915->drm.struct_mutex);
Dang.. there goes my idea to loop/iterate purely over the tree..
> + if (ret)
> + goto unpin;
> +
> rbtree_postorder_for_each_entry_safe(active, n,
> &vma->active, node) {
> ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>
> unsigned int active_count;
> struct rb_root active;
> + struct i915_gem_active last_active;
> struct i915_gem_active last_fence;
>
> /**
>
Okay I get the idea and it is a good optimisation for the common case.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list