[Intel-gfx] [PATCH 5/5] drm/i915: Implement an oom-notifier for last resort shrinking
Daniel Vetter
daniel at ffwll.ch
Tue May 20 10:25:13 CEST 2014
On Tue, May 20, 2014 at 08:28:43AM +0100, Chris Wilson wrote:
> Before the process killer is invoked, oom-notifiers are executed for one
> last try at recovering pages. We can hook into this callback to be sure
> that everything that can be is purged from our page lists, and to give a
> summary of how much memory is still pinned by the GPU in the case of an
> oom.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=72742
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Tested-by: lu hua <huax.lu at intel.com>
Dave, can you please have a look at this and ack it from a core mm
perspective? I just like your ack since you've worked together with Chris
on these issues.
Thanks, Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gem.c | 74 +++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 72 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e69cb51de738..389204d44431 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1057,6 +1057,7 @@ struct i915_gem_mm {
> /** PPGTT used for aliasing the PPGTT with the GTT */
> struct i915_hw_ppgtt *aliasing_ppgtt;
>
> + struct notifier_block oom_notifier;
> struct shrinker shrinker;
> bool shrinker_no_lock_stealing;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ea93898d51bc..dc8e1ef50bfb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -31,6 +31,7 @@
> #include "i915_drv.h"
> #include "i915_trace.h"
> #include "intel_drv.h"
> +#include <linux/oom.h>
> #include <linux/shmem_fs.h>
> #include <linux/slab.h>
> #include <linux/swap.h>
> @@ -61,6 +62,9 @@ static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker,
> struct shrink_control *sc);
> static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker,
> struct shrink_control *sc);
> +static int i915_gem_shrinker_oom(struct notifier_block *nb,
> + unsigned long event,
> + void *ptr);
> static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
> static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
>
> @@ -4759,6 +4763,9 @@ i915_gem_load(struct drm_device *dev)
> dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
> dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
> register_shrinker(&dev_priv->mm.shrinker);
> +
> + dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
> + register_oom_notifier(&dev_priv->mm.oom_notifier);
> }
>
> /*
> @@ -5154,15 +5161,76 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
> freed += __i915_gem_shrink(dev_priv,
> sc->nr_to_scan - freed,
> false);
> - if (freed < sc->nr_to_scan)
> - freed += i915_gem_shrink_all(dev_priv);
> -
> if (unlock)
> mutex_unlock(&dev->struct_mutex);
>
> return freed;
> }
>
> +static int
> +i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
> +{
> + struct drm_i915_private *dev_priv =
> + container_of(nb, struct drm_i915_private, mm.oom_notifier);
> + struct drm_device *dev = dev_priv->dev;
> + struct drm_i915_gem_object *obj;
> + unsigned long timeout = msecs_to_jiffies(5000) + 1;
> + unsigned long pinned, bound, unbound, freed;
> + bool was_interruptible;
> + bool unlock;
> +
> + while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout)
> + schedule_timeout_killable(1);
> + if (timeout == 0) {
> + pr_err("Unable to purge GPU memory due lock contention.\n");
> + return NOTIFY_DONE;
> + }
> +
> + was_interruptible = dev_priv->mm.interruptible;
> + dev_priv->mm.interruptible = false;
> +
> + freed = i915_gem_shrink_all(dev_priv);
> +
> + dev_priv->mm.interruptible = was_interruptible;
> +
> + /* Because we may be allocating inside our own driver, we cannot
> + * assert that there are no objects with pinned pages that are not
> + * being pointed to by hardware.
> + */
> + unbound = bound = pinned = 0;
> + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
> + if (!obj->base.filp) /* not backed by a freeable object */
> + continue;
> +
> + if (obj->pages_pin_count)
> + pinned += obj->base.size;
> + else
> + unbound += obj->base.size;
> + }
> + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> + if (!obj->base.filp)
> + continue;
> +
> + if (obj->pages_pin_count)
> + pinned += obj->base.size;
> + else
> + bound += obj->base.size;
> + }
> +
> + if (unlock)
> + mutex_unlock(&dev->struct_mutex);
> +
> + pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n",
> + freed, pinned);
> + if (unbound | bound)
> + pr_err("%lu and %lu bytes still available in the "
> + "bound and unbound GPU page lists.\n",
> + bound, unbound);
> +
> + *(unsigned long *)ptr += freed;
> + return NOTIFY_DONE;
> +}
> +
> struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
> {
> struct i915_vma *vma;
> --
> 2.0.0.rc2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list