[Intel-gfx] [PATCH 06/28] drm/i915: Stop tracking MRU activity on VMA
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jan 28 10:09:54 UTC 2019
On 28/01/2019 01:02, Chris Wilson wrote:
> Our goal is to remove struct_mutex and replace it with fine grained
> locking. One of the thorny issues is our eviction logic for reclaiming
> space for an execbuffer (or GTT mmaping, among a few other examples).
> While eviction itself is easy to move under a per-VM mutex, performing
> the activity tracking is less agreeable. One solution is not to do any
> MRU tracking and do a simple coarse evaluation during eviction of
> active/inactive, with a loose temporal ordering of last
> insertion/evaluation. That keeps all the locking constrained to when we
> are manipulating the VM itself, neatly avoiding the tricky handling of
> possible recursive locking during execbuf and elsewhere.
>
> Note that discarding the MRU (currently implemented as a pair of lists,
> to avoid scanning the active list for a NONBLOCKING search) is unlikely
> to impact upon our efficiency to reclaim VM space (where we think a LRU
> model is best) as our current strategy is to use random idle replacement
> first before doing a search, and over time the use of softpinned 48b
> per-ppGTT is growing (thereby eliminating any need to perform any eviction
> searches, in theory at least) with the remaining users being found on
> much older devices (gen2-gen6).
>
> v2: Changelog and commentary rewritten to elaborate on the duality of a
> single list being both an inactive and active list.
> v3: Consolidate bool parameters into a single set of flags; don't
> comment on the duality of a single variable being a multiplicity of
> bits.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
> ---
> drivers/gpu/drm/i915/i915_gem.c | 10 +--
> drivers/gpu/drm/i915/i915_gem_evict.c | 87 +++++++++++--------
> drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++--
> drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +-----
> drivers/gpu/drm/i915/i915_gem_shrinker.c | 8 +-
> drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 42 ++++-----
> drivers/gpu/drm/i915/i915_vma.c | 9 +-
> .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +-
> 10 files changed, 95 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dcbe644869b3..12a0a80bc989 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -255,10 +255,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>
> pinned = ggtt->vm.reserved;
> mutex_lock(&dev->struct_mutex);
> - list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
> - if (i915_vma_is_pinned(vma))
> - pinned += vma->node.size;
> - list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
> + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
> if (i915_vma_is_pinned(vma))
> pinned += vma->node.size;
> mutex_unlock(&dev->struct_mutex);
> @@ -1541,13 +1538,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
> GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>
> for_each_ggtt_vma(vma, obj) {
> - if (i915_vma_is_active(vma))
> - continue;
> -
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> + list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> }
>
> i915 = to_i915(obj->base.dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index f6855401f247..d76839670632 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm,
> struct drm_i915_private *dev_priv = vm->i915;
> struct drm_mm_scan scan;
> struct list_head eviction_list;
> - struct list_head *phases[] = {
> - &vm->inactive_list,
> - &vm->active_list,
> - NULL,
> - }, **phase;
> struct i915_vma *vma, *next;
> struct drm_mm_node *node;
> enum drm_mm_insert_mode mode;
> + struct i915_vma *active;
> int ret;
>
> lockdep_assert_held(&vm->i915->drm.struct_mutex);
> trace_i915_gem_evict(vm, min_size, alignment, flags);
>
> /*
> - * The goal is to evict objects and amalgamate space in LRU order.
> - * The oldest idle objects reside on the inactive list, which is in
> - * retirement order. The next objects to retire are those in flight,
> - * on the active list, again in retirement order.
> + * The goal is to evict objects and amalgamate space in rough LRU order.
> + * Since both active and inactive objects reside on the same list,
> + * in a mix of creation and last scanned order, as we process the list
> + * we sort it into inactive/active, which keeps the active portion
> + * in a rough MRU order.
> *
> * The retirement sequence is thus:
> - * 1. Inactive objects (already retired)
> - * 2. Active objects (will stall on unbinding)
> - *
> - * On each list, the oldest objects lie at the HEAD with the freshest
> - * object on the TAIL.
> + * 1. Inactive objects (already retired, random order)
> + * 2. Active objects (will stall on unbinding, oldest scanned first)
> */
> mode = DRM_MM_INSERT_BEST;
> if (flags & PIN_HIGH)
> @@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm,
> */
> if (!(flags & PIN_NONBLOCK))
> i915_retire_requests(dev_priv);
> - else
> - phases[1] = NULL;
>
> search_again:
> + active = NULL;
> INIT_LIST_HEAD(&eviction_list);
> - phase = phases;
> - do {
> - list_for_each_entry(vma, *phase, vm_link)
> - if (mark_free(&scan, vma, flags, &eviction_list))
> - goto found;
> - } while (*++phase);
> + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
> + /*
> + * We keep this list in a rough least-recently scanned order
> + * of active elements (inactive elements are cheap to reap).
> + * New entries are added to the end, and we move anything we
> + * scan to the end. The assumption is that the working set
> + * of applications is either steady state (and thanks to the
> + * userspace bo cache it almost always is) or volatile and
> + * frequently replaced after a frame, which are self-evicting!
> + * Given that assumption, the MRU order of the scan list is
> + * fairly static, and keeping it in least-recently scan order
> + * is suitable.
> + *
> + * To notice when we complete one full cycle, we record the
> + * first active element seen, before moving it to the tail.
> + */
> + if (i915_vma_is_active(vma)) {
> + if (vma == active) {
> + if (flags & PIN_NONBLOCK)
> + break;
> +
> + active = ERR_PTR(-EAGAIN);
> + }
> +
> + if (active != ERR_PTR(-EAGAIN)) {
> + if (!active)
> + active = vma;
> +
> + list_move_tail(&vma->vm_link, &vm->bound_list);
> + continue;
> + }
> + }
> +
> + if (mark_free(&scan, vma, flags, &eviction_list))
> + goto found;
> + }
>
> /* Nothing found, clean up and bail out! */
> list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
> @@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
> */
> int i915_gem_evict_vm(struct i915_address_space *vm)
> {
> - struct list_head *phases[] = {
> - &vm->inactive_list,
> - &vm->active_list,
> - NULL
> - }, **phase;
> struct list_head eviction_list;
> struct i915_vma *vma, *next;
> int ret;
> @@ -412,16 +430,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> }
>
> INIT_LIST_HEAD(&eviction_list);
> - phase = phases;
> - do {
> - list_for_each_entry(vma, *phase, vm_link) {
> - if (i915_vma_is_pinned(vma))
> - continue;
> + list_for_each_entry(vma, &vm->bound_list, vm_link) {
> + if (i915_vma_is_pinned(vma))
> + continue;
>
> - __i915_vma_pin(vma);
> - list_add(&vma->evict_link, &eviction_list);
> - }
> - } while (*++phase);
> + __i915_vma_pin(vma);
> + list_add(&vma->evict_link, &eviction_list);
> + }
>
> ret = 0;
> list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 9081e3bc5a59..2ad9070a54c1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>
> stash_init(&vm->free_pages);
>
> - INIT_LIST_HEAD(&vm->active_list);
> - INIT_LIST_HEAD(&vm->inactive_list);
> INIT_LIST_HEAD(&vm->unbound_list);
> + INIT_LIST_HEAD(&vm->bound_list);
> }
>
> static void i915_address_space_fini(struct i915_address_space *vm)
> @@ -2111,8 +2110,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
> static void ppgtt_destroy_vma(struct i915_address_space *vm)
> {
> struct list_head *phases[] = {
> - &vm->active_list,
> - &vm->inactive_list,
> + &vm->bound_list,
> &vm->unbound_list,
> NULL,
> }, **phase;
> @@ -2135,8 +2133,7 @@ void i915_ppgtt_release(struct kref *kref)
>
> ppgtt_destroy_vma(&ppgtt->vm);
>
> - GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
> - GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
> + GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
> GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
>
> ppgtt->vm.cleanup(&ppgtt->vm);
> @@ -2801,8 +2798,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
> mutex_lock(&dev_priv->drm.struct_mutex);
> i915_gem_fini_aliasing_ppgtt(dev_priv);
>
> - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
> - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
> + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
> WARN_ON(i915_vma_unbind(vma));
>
> if (drm_mm_node_allocated(&ggtt->error_capture))
> @@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
> ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
>
> /* clflush objects bound into the GGTT and rebind them. */
> - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
> - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
> + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
> struct drm_i915_gem_object *obj = vma->obj;
>
> if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index a0039ea97cdc..bd679c8c56dd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -299,32 +299,12 @@ struct i915_address_space {
> struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
>
> /**
> - * List of objects currently involved in rendering.
> - *
> - * Includes buffers having the contents of their GPU caches
> - * flushed, not necessarily primitives. last_read_req
> - * represents when the rendering involved will be completed.
> - *
> - * A reference is held on the buffer while on this list.
> + * List of vma currently bound.
> */
> - struct list_head active_list;
> + struct list_head bound_list;
>
> /**
> - * LRU list of objects which are not in the ringbuffer and
> - * are ready to unbind, but are still in the GTT.
> - *
> - * last_read_req is NULL while an object is in this list.
> - *
> - * A reference is not held on the buffer while on this list,
> - * as merely being GTT-bound shouldn't prevent its being
> - * freed, and we'll pull it off the list in the free path.
> - */
> - struct list_head inactive_list;
> -
> - /**
> - * List of vma that have been unbound.
> - *
> - * A reference is not held on the buffer while on this list.
> + * List of vma that are not unbound.
> */
> struct list_head unbound_list;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 8ceecb026910..a76d6c95c824 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -462,9 +462,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>
> /* We also want to clear any cached iomaps as they wrap vmap */
> list_for_each_entry_safe(vma, next,
> - &i915->ggtt.vm.inactive_list, vm_link) {
> + &i915->ggtt.vm.bound_list, vm_link) {
> unsigned long count = vma->node.size >> PAGE_SHIFT;
> - if (vma->iomap && i915_vma_unbind(vma) == 0)
> +
> + if (!vma->iomap || i915_vma_is_active(vma))
> + continue;
> +
> + if (i915_vma_unbind(vma) == 0)
> freed_pages += count;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 9df615eea2d8..a9e365789686 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -701,7 +701,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
> vma->pages = obj->mm.pages;
> vma->flags |= I915_VMA_GLOBAL_BIND;
> __i915_vma_set_map_and_fenceable(vma);
> - list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
> +
> + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
>
> spin_lock(&dev_priv->mm.obj_lock);
> list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 4eef0462489c..898e06014295 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1121,7 +1121,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>
> static u32 capture_error_bo(struct drm_i915_error_buffer *err,
> int count, struct list_head *head,
> - bool pinned_only)
> + unsigned int flags)
> +#define ACTIVE_ONLY BIT(0)
> +#define PINNED_ONLY BIT(1)
> {
> struct i915_vma *vma;
> int i = 0;
> @@ -1130,7 +1132,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
> if (!vma->obj)
> continue;
>
> - if (pinned_only && !i915_vma_is_pinned(vma))
> + if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
> + continue;
> +
> + if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
> continue;
>
> capture_bo(err++, vma);
> @@ -1601,14 +1606,17 @@ static void gem_capture_vm(struct i915_gpu_state *error,
> int count;
>
> count = 0;
> - list_for_each_entry(vma, &vm->active_list, vm_link)
> - count++;
> + list_for_each_entry(vma, &vm->bound_list, vm_link)
> + if (i915_vma_is_active(vma))
> + count++;
>
> active_bo = NULL;
> if (count)
> active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
> if (active_bo)
> - count = capture_error_bo(active_bo, count, &vm->active_list, false);
> + count = capture_error_bo(active_bo,
> + count, &vm->bound_list,
> + ACTIVE_ONLY);
> else
> count = 0;
>
> @@ -1646,28 +1654,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
> struct i915_address_space *vm = &error->i915->ggtt.vm;
> struct drm_i915_error_buffer *bo;
> struct i915_vma *vma;
> - int count_inactive, count_active;
> -
> - count_inactive = 0;
> - list_for_each_entry(vma, &vm->inactive_list, vm_link)
> - count_inactive++;
> + int count;
>
> - count_active = 0;
> - list_for_each_entry(vma, &vm->active_list, vm_link)
> - count_active++;
> + count = 0;
> + list_for_each_entry(vma, &vm->bound_list, vm_link)
> + count++;
>
> bo = NULL;
> - if (count_inactive + count_active)
> - bo = kcalloc(count_inactive + count_active,
> - sizeof(*bo), GFP_ATOMIC);
> + if (count)
> + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
> if (!bo)
> return;
>
> - count_inactive = capture_error_bo(bo, count_inactive,
> - &vm->active_list, true);
> - count_active = capture_error_bo(bo + count_inactive, count_active,
> - &vm->inactive_list, true);
> - error->pinned_bo_count = count_inactive + count_active;
> + error->pinned_bo_count =
> + capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
> error->pinned_bo = bo;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 5b4d78cdb4ca..7de28baffb8f 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> if (--vma->active_count)
> return;
>
> - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> -
> GEM_BUG_ON(!i915_gem_object_is_active(obj));
> if (--obj->active_count)
> return;
> @@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
>
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> + list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>
> if (vma->obj) {
> struct drm_i915_gem_object *obj = vma->obj;
> @@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> * add the active reference first and queue for it to be dropped
> * *last*.
> */
> - if (!i915_gem_active_isset(active) && !vma->active_count++) {
> - list_move_tail(&vma->vm_link, &vma->vm->active_list);
> + if (!i915_gem_active_isset(active) && !vma->active_count++)
> obj->active_count++;
> - }
> i915_gem_active_set(active, rq);
> GEM_BUG_ON(!i915_vma_is_active(vma));
> GEM_BUG_ON(!obj->active_count);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index d0553bc69705..af9b85cb8639 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915,
> return -EINVAL;
> }
>
> - if (list_empty(&i915->ggtt.vm.inactive_list)) {
> + if (list_empty(&i915->ggtt.vm.bound_list)) {
> pr_err("No objects on the GGTT inactive list!\n");
> return -EINVAL;
> }
> @@ -96,7 +96,7 @@ static void unpin_ggtt(struct drm_i915_private *i915)
> {
> struct i915_vma *vma;
>
> - list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
> + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
> if (vma->obj->mm.quirked)
> i915_vma_unpin(vma);
> }
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 06bde4a273cb..8feb4af308ff 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma)
> __i915_gem_object_pin_pages(obj);
>
> vma->pages = obj->mm.pages;
> - list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
> + list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> }
>
> static int exercise_mock(struct drm_i915_private *i915,
>
More information about the Intel-gfx
mailing list