[Intel-gfx] [PATCH v3] drm/i915: Keep the device awake whilst in the GTT domain
Daniel Vetter
daniel at ffwll.ch
Mon Sep 4 08:16:31 UTC 2017
On Fri, Sep 01, 2017 at 01:16:06PM +0100, Chris Wilson wrote:
> Since runtime suspend is very harsh on GTT mmappings (they all get
> zapped on suspend) keep the device awake while the buffer remains in
> the GTT domain. However, userspace can control the domain and
> although there is a soft contract that writes must be flushed (for e.g.
> flushing scanouts and fbc), we are intentionally lax with respect to read
> domains, allowing them to persist for as long as is feasible.
>
> We acquire a wakeref when using the buffer in the GEM domain so that all
> subsequent operations on that object are fast, trying to avoid
> suspending while the GTT is still in active use by userspace. To ensure
> that the device can eventually suspend, we install a timer and expire the
> GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
> to estimate the cost of restoring actively used GTT mmapping.
Please tag with for-CI or something like that when throwing patches at
the shards :-) At least that's what I assuming given lack of sob and
revision of changes ...
Thanks, Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 2 +
> drivers/gpu/drm/i915/i915_drv.h | 11 ++++
> drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/i915_gem_object.h | 5 ++
> drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +-
> drivers/gpu/drm/i915/intel_lrc.c | 1 +
> drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +
> 7 files changed, 118 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48572b157222..dbb07612aa5a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
> if (!HAS_RUNTIME_PM(dev_priv))
> seq_puts(m, "Runtime power management not supported\n");
>
> + seq_printf(m, "GTT wakeref count: %d\n",
> + atomic_read(&dev_priv->mm.gtt_wakeref.count));
> seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
> seq_printf(m, "IRQs disabled: %s\n",
> yesno(!intel_irqs_enabled(dev_priv)));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0383e879a315..14dcf6614f3c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,17 @@ struct i915_gem_mm {
> */
> struct list_head userfault_list;
>
> + /* List of all objects in gtt domain, holding a wakeref.
> + * The list is reaped periodically, and protected by its own mutex.
> + */
> + struct {
> + struct mutex lock;
> + struct list_head list;
> + atomic_t count;
> +
> + struct delayed_work work;
> + } gtt_wakeref;
> +
> /**
> * List of objects which are pending destruction.
> */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e4cc08bc518c..09baf80889e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>
> static void __start_cpu_write(struct drm_i915_gem_object *obj)
> {
> + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
> obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> if (cpu_write_needs_clflush(obj))
> @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
> obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
> }
>
> -static void
> +void
> flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> {
> struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>
> + lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
> if (!(obj->base.write_domain & flush_domains))
> return;
>
> @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>
> switch (obj->base.write_domain) {
> case I915_GEM_DOMAIN_GTT:
> - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> - intel_runtime_pm_get(dev_priv);
> - spin_lock_irq(&dev_priv->uncore.lock);
> - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> - spin_unlock_irq(&dev_priv->uncore.lock);
> - intel_runtime_pm_put(dev_priv);
> - }
> + mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> + if (!list_empty(&obj->mm.gtt_wakeref_link)) {
> + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> + spin_lock_irq(&dev_priv->uncore.lock);
> + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> + spin_unlock_irq(&dev_priv->uncore.lock);
> + }
>
> - intel_fb_obj_flush(obj,
> - fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> +
> + list_del_init(&obj->mm.gtt_wakeref_link);
> + }
> + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
> break;
>
> case I915_GEM_DOMAIN_CPU:
> @@ -3425,6 +3431,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
> flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> if (obj->cache_dirty)
> i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
> + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
> obj->base.write_domain = 0;
> }
>
> @@ -3501,6 +3508,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> return 0;
> }
>
> +static void wakeref_timeout(struct work_struct *work)
> +{
> + struct drm_i915_private *dev_priv =
> + container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work);
> + struct drm_i915_gem_object *obj, *on;
> + unsigned int count;
> +
> + mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> + count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0);
> + if (count) {
> + unsigned long timeout;
> +
> + GEM_BUG_ON(count == -1);
> +
> + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> + spin_lock_irq(&dev_priv->uncore.lock);
> + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> + spin_unlock_irq(&dev_priv->uncore.lock);
> + }
> +
> + count = 0;
> + list_for_each_entry_safe(obj, on,
> + &dev_priv->mm.gtt_wakeref.list,
> + mm.gtt_wakeref_link) {
> + list_del_init(&obj->mm.gtt_wakeref_link);
> + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> + intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> + obj->base.write_domain = 0;
> + }
> + count++;
> + }
> +
> + timeout = HZ * (ilog2(count) + 1) / 2;
> + mod_delayed_work(system_wq,
> + &dev_priv->mm.gtt_wakeref.work,
> + round_jiffies_up_relative(timeout));
> + } else {
> + intel_runtime_pm_put(dev_priv);
> + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> + }
> + mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
> +}
> +
> /**
> * Moves a single object to the GTT read, and possibly write domain.
> * @obj: object to act on
> @@ -3512,6 +3562,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> int
> i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> {
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> int ret;
>
> lockdep_assert_held(&obj->base.dev->struct_mutex);
> @@ -3525,6 +3576,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> if (ret)
> return ret;
>
> + mutex_lock(&i915->mm.gtt_wakeref.lock);
> + if (list_empty(&obj->mm.gtt_wakeref_link)) {
> + if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) {
> + intel_runtime_pm_get(i915);
> + schedule_delayed_work(&i915->mm.gtt_wakeref.work,
> + round_jiffies_up_relative(HZ));
> + }
> + list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list);
> + }
> + mutex_unlock(&i915->mm.gtt_wakeref.lock);
> +
> if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> return 0;
>
> @@ -4257,6 +4319,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>
> INIT_LIST_HEAD(&obj->global_link);
> INIT_LIST_HEAD(&obj->userfault_link);
> + INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link);
> INIT_LIST_HEAD(&obj->vma_list);
> INIT_LIST_HEAD(&obj->lut_list);
> INIT_LIST_HEAD(&obj->batch_pool_link);
> @@ -4394,6 +4457,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>
> trace_i915_gem_object_destroy(obj);
>
> + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +
> + if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) {
> + mutex_lock(&i915->mm.gtt_wakeref.lock);
> + list_del(&obj->mm.gtt_wakeref_link);
> + mutex_unlock(&i915->mm.gtt_wakeref.lock);
> + }
> +
> GEM_BUG_ON(i915_gem_object_is_active(obj));
> list_for_each_entry_safe(vma, vn,
> &obj->vma_list, obj_link) {
> @@ -4548,6 +4619,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
> int ret;
>
> intel_runtime_pm_get(dev_priv);
> + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> + ;
> + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
> intel_suspend_gt_powersave(dev_priv);
>
> mutex_lock(&dev->struct_mutex);
> @@ -4932,6 +5006,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
> if (err)
> goto err_priorities;
>
> + INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list);
> + INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout);
> + mutex_init(&dev_priv->mm.gtt_wakeref.lock);
> + atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> +
> INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
> init_llist_head(&dev_priv->mm.free_list);
> INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> @@ -4978,6 +5057,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
> WARN_ON(!list_empty(&dev_priv->gt.timelines));
> mutex_unlock(&dev_priv->drm.struct_mutex);
>
> + while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> + ;
> + WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
> +
> kmem_cache_destroy(dev_priv->priorities);
> kmem_cache_destroy(dev_priv->dependencies);
> kmem_cache_destroy(dev_priv->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index c30d8f808185..3ca13edf32b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -177,6 +177,8 @@ struct drm_i915_gem_object {
> struct mutex lock; /* protects this cache */
> } get_page;
>
> + struct list_head gtt_wakeref_link;
> +
> /**
> * Advice: are the backing pages purgeable?
> */
> @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
> unsigned int cache_level);
> void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
>
> +void
> +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains);
> +
> #endif
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 77fb39808131..71110b7d3ca0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>
> static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
> {
> - if (i915_gem_object_unbind(obj) == 0)
> + if (i915_gem_object_unbind(obj) == 0) {
> + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> + }
> return !READ_ONCE(obj->mm.pages);
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d89e1b8e1cc5..357eee6f907c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
> i915_ggtt_offset(ce->ring->vma);
>
> ce->state->obj->mm.dirty = true;
> + flush_write_domain(ce->state->obj, ~0);
>
> i915_gem_context_get(ctx);
> out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index cdf084ef5aae..571a5b1f4f54 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring,
> if (IS_ERR(addr))
> goto err;
>
> + flush_write_domain(vma->obj, ~0);
> +
> ring->vaddr = addr;
> return 0;
>
> @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
> goto err;
>
> ce->state->obj->mm.dirty = true;
> + flush_write_domain(ce->state->obj, ~0);
> }
>
> /* The kernel context is only used as a placeholder for flushing the
> --
> 2.14.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list