[Intel-gfx] [PATCH 01/46] drm/i915: Return immediately if trylock fails for direct-reclaim
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jan 7 14:00:25 UTC 2019
On 07/01/2019 11:54, Chris Wilson wrote:
> Ignore trying to shrink from i915 if we fail to acquire the struct_mutex
> in the shrinker while performing direct-reclaim. The trade-off being
> (much) lower latency for non-i915 clients at an increased risk of being
> unable to obtain a page from direct-reclaim without hitting the
> oom-notifier. The proviso being that we still keep trying to hard
> obtain the lock for kswapd so that we can reap under heavy memory
> pressure.
>
> v2: Taint all mutexes taken within the shrinker with the struct_mutex
> subclass as an early warning system, and drop I915_SHRINK_ACTIVE from
> vmap to reduce the number of dangerous paths. We also have to drop
> I915_SHRINK_ACTIVE from oom-notifier to be able to make the same claim
> that ACTIVE is only used from outside context, which fits in with a
> longer strategy of avoiding stalls due to scanning active during
> shrinking.
Oom notifier is not always the outside context?
>
> The danger in using the subclass struct_mutex is that we declare
> ourselves more knowledgable than lockdep and deprive ourselves of
> automatic coverage. Instead, we require ourselves to mark up any mutex
> taken inside the shrinker in order to detect lock-inversion, and if we
> miss any we are doomed to a deadlock at the worst possible moment.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 7 +--
> drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +--
> drivers/gpu/drm/i915/i915_gem_shrinker.c | 68 ++++++++++++++++--------
> 3 files changed, 54 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7fa2a405c5fe..17a017645c5d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2899,9 +2899,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> __i915_gem_object_unpin_pages(obj);
> }
>
> -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
> I915_MM_NORMAL = 0,
> - I915_MM_SHRINKER
> + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
> };
>
> void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> @@ -3187,7 +3187,8 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
> unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
> void i915_gem_shrinker_register(struct drm_i915_private *i915);
> void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
> -void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
> +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
> + struct mutex *mutex);
>
> /* i915_gem_tiling.c */
> static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d4c5973ea33d..5cc8968eb3bf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -483,7 +483,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
> * attempt holding the lock is immediately reported by lockdep.
> */
> mutex_init(&vm->mutex);
> - i915_gem_shrinker_taints_mutex(&vm->mutex);
> + i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
>
> GEM_BUG_ON(!vm->total);
> drm_mm_init(&vm->mm, 0, vm->total);
> @@ -2245,7 +2245,8 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> DMA_ATTR_NO_WARN))
> return 0;
>
> - /* If the DMA remap fails, one cause can be that we have
> + /*
> + * If the DMA remap fails, one cause can be that we have
> * too many objects pinned in a small remapping table,
> * such as swiotlb. Incrementally purge all other objects and
> * try again - if there are no more pages to remove from
> @@ -2255,8 +2256,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> } while (i915_gem_shrink(to_i915(obj->base.dev),
> obj->base.size >> PAGE_SHIFT, NULL,
> I915_SHRINK_BOUND |
> - I915_SHRINK_UNBOUND |
> - I915_SHRINK_ACTIVE));
> + I915_SHRINK_UNBOUND));
>
Why this change?
Regards,
Tvrtko
> return -ENOSPC;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index ea90d3a0d511..72d6ea0cac7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -36,7 +36,9 @@
> #include "i915_drv.h"
> #include "i915_trace.h"
>
> -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
> +static bool shrinker_lock(struct drm_i915_private *i915,
> + unsigned int flags,
> + bool *unlock)
> {
> switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
> case MUTEX_TRYLOCK_RECURSIVE:
> @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
>
> case MUTEX_TRYLOCK_FAILED:
> *unlock = false;
> - preempt_disable();
> - do {
> - cpu_relax();
> - if (mutex_trylock(&i915->drm.struct_mutex)) {
> - *unlock = true;
> - break;
> - }
> - } while (!need_resched());
> - preempt_enable();
> + if (flags & I915_SHRINK_ACTIVE) {
> + mutex_lock_nested(&i915->drm.struct_mutex,
> + I915_MM_SHRINKER);
> + *unlock = true;
> + }
> return *unlock;
>
> case MUTEX_TRYLOCK_SUCCESS:
> @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
> unsigned long scanned = 0;
> bool unlock;
>
> - if (!shrinker_lock(i915, &unlock))
> + if (!shrinker_lock(i915, flags, &unlock))
> return 0;
>
> /*
> @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>
> sc->nr_scanned = 0;
>
> - if (!shrinker_lock(i915, &unlock))
> + if (!shrinker_lock(i915, 0, &unlock))
> return SHRINK_STOP;
>
> freed = i915_gem_shrink(i915,
> @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
> do {
> if (i915_gem_wait_for_idle(i915,
> 0, MAX_SCHEDULE_TIMEOUT) == 0 &&
> - shrinker_lock(i915, unlock))
> + shrinker_lock(i915, 0, unlock))
> break;
>
> schedule_timeout_killable(1);
> @@ -421,7 +419,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
> struct drm_i915_gem_object *obj;
> unsigned long unevictable, bound, unbound, freed_pages;
>
> - freed_pages = i915_gem_shrink_all(i915);
> + intel_runtime_pm_get(i915);
> + freed_pages = i915_gem_shrink(i915, -1UL, NULL,
> + I915_SHRINK_BOUND |
> + I915_SHRINK_UNBOUND);
> + intel_runtime_pm_put(i915);
>
> /* Because we may be allocating inside our own driver, we cannot
> * assert that there are no objects with pinned pages that are not
> @@ -447,10 +449,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
> pr_info("Purging GPU memory, %lu pages freed, "
> "%lu pages still pinned.\n",
> freed_pages, unevictable);
> - if (unbound || bound)
> - pr_err("%lu and %lu pages still available in the "
> - "bound and unbound GPU page lists.\n",
> - bound, unbound);
>
> *(unsigned long *)ptr += freed_pages;
> return NOTIFY_DONE;
> @@ -480,7 +478,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
> freed_pages += i915_gem_shrink(i915, -1UL, NULL,
> I915_SHRINK_BOUND |
> I915_SHRINK_UNBOUND |
> - I915_SHRINK_ACTIVE |
> I915_SHRINK_VMAPS);
> intel_runtime_pm_put(i915);
>
> @@ -533,13 +530,40 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
> unregister_shrinker(&i915->mm.shrinker);
> }
>
> -void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
> +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
> + struct mutex *mutex)
> {
> + bool unlock = false;
> +
> if (!IS_ENABLED(CONFIG_LOCKDEP))
> return;
>
> + if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
> + mutex_acquire(&i915->drm.struct_mutex.dep_map,
> + I915_MM_NORMAL, 0, _RET_IP_);
> + unlock = true;
> + }
> +
> fs_reclaim_acquire(GFP_KERNEL);
> - mutex_lock(mutex);
> - mutex_unlock(mutex);
> +
> + /*
> + * As we invariably rely on the struct_mutex within the shrinker,
> + * but have a complicated recursion dance, taint all the mutexes used
> + * within the shrinker with the struct_mutex. For completeness, we
> + * taint with all subclass of struct_mutex, even though we should
> + * only need tainting by I915_MM_NORMAL to catch possible ABBA
> + * deadlocks from using struct_mutex inside @mutex.
> + */
> + mutex_acquire(&i915->drm.struct_mutex.dep_map,
> + I915_MM_SHRINKER, 0, _RET_IP_);
> +
> + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
> + mutex_release(&mutex->dep_map, 0, _RET_IP_);
> +
> + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
> +
> fs_reclaim_release(GFP_KERNEL);
> +
> + if (unlock)
> + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
> }
>
More information about the Intel-gfx
mailing list