[Intel-gfx] [PATCH v3] drm/i915: Keep the device awake whilst in the GTT domain

Daniel Vetter daniel at ffwll.ch
Mon Sep 4 08:16:31 UTC 2017


On Fri, Sep 01, 2017 at 01:16:06PM +0100, Chris Wilson wrote:
> Since runtime suspend is very harsh on GTT mmappings (they all get
> zapped on suspend) keep the device awake while the buffer remains in
> the GTT domain. However, userspace can control the domain and
> although there is a soft contract that writes must be flushed (for e.g.
> flushing scanouts and fbc), we are intentionally lax with respect to read
> domains, allowing them to persist for as long as is feasible.
> 
> We acquire a wakeref when using the buffer in the GEM domain so that all
> subsequent operations on that object are fast, trying to avoid
> suspending while the GTT is still in active use by userspace.  To ensure
> that the device can eventually suspend, we install a timer and expire the
> GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
> to estimate the cost of restoring actively used GTT mmapping.

Please tag with for-CI or something like that when throwing patches at
the shards :-) At least that's what I assuming given lack of sob and
revision of changes ...

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |   2 +
>  drivers/gpu/drm/i915/i915_drv.h          |  11 ++++
>  drivers/gpu/drm/i915/i915_gem.c          | 103 ++++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_gem_object.h   |   5 ++
>  drivers/gpu/drm/i915/i915_gem_shrinker.c |   4 +-
>  drivers/gpu/drm/i915/intel_lrc.c         |   1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +
>  7 files changed, 118 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48572b157222..dbb07612aa5a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>  	if (!HAS_RUNTIME_PM(dev_priv))
>  		seq_puts(m, "Runtime power management not supported\n");
>  
> +	seq_printf(m, "GTT wakeref count: %d\n",
> +		   atomic_read(&dev_priv->mm.gtt_wakeref.count));
>  	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
>  	seq_printf(m, "IRQs disabled: %s\n",
>  		   yesno(!intel_irqs_enabled(dev_priv)));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0383e879a315..14dcf6614f3c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,17 @@ struct i915_gem_mm {
>  	 */
>  	struct list_head userfault_list;
>  
> +	/* List of all objects in gtt domain, holding a wakeref.
> +	 * The list is reaped periodically, and protected by its own mutex.
> +	 */
> +	struct {
> +		struct mutex lock;
> +		struct list_head list;
> +		atomic_t count;
> +
> +		struct delayed_work work;
> +	} gtt_wakeref;
> +
>  	/**
>  	 * List of objects which are pending destruction.
>  	 */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e4cc08bc518c..09baf80889e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  
>  static void __start_cpu_write(struct drm_i915_gem_object *obj)
>  {
> +	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>  	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>  	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>  	if (cpu_write_needs_clflush(obj))
> @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
>  		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
>  }
>  
> -static void
> +void
>  flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
>  	if (!(obj->base.write_domain & flush_domains))
>  		return;
>  
> @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>  
>  	switch (obj->base.write_domain) {
>  	case I915_GEM_DOMAIN_GTT:
> -		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> -			intel_runtime_pm_get(dev_priv);
> -			spin_lock_irq(&dev_priv->uncore.lock);
> -			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> -			spin_unlock_irq(&dev_priv->uncore.lock);
> -			intel_runtime_pm_put(dev_priv);
> -		}
> +		mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> +		if (!list_empty(&obj->mm.gtt_wakeref_link)) {
> +			if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> +				spin_lock_irq(&dev_priv->uncore.lock);
> +				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> +				spin_unlock_irq(&dev_priv->uncore.lock);
> +			}
>  
> -		intel_fb_obj_flush(obj,
> -				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> +			intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> +
> +			list_del_init(&obj->mm.gtt_wakeref_link);
> +		}
> +		mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
>  		break;
>  
>  	case I915_GEM_DOMAIN_CPU:
> @@ -3425,6 +3431,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
>  	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>  	if (obj->cache_dirty)
>  		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
> +	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>  	obj->base.write_domain = 0;
>  }
>  
> @@ -3501,6 +3508,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>  	return 0;
>  }
>  
> +static void wakeref_timeout(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work);
> +	struct drm_i915_gem_object *obj, *on;
> +	unsigned int count;
> +
> +	mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> +	count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0);
> +	if (count) {
> +		unsigned long timeout;
> +
> +		GEM_BUG_ON(count == -1);
> +
> +		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> +			spin_lock_irq(&dev_priv->uncore.lock);
> +			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> +			spin_unlock_irq(&dev_priv->uncore.lock);
> +		}
> +
> +		count = 0;
> +		list_for_each_entry_safe(obj, on,
> +					 &dev_priv->mm.gtt_wakeref.list,
> +					 mm.gtt_wakeref_link) {
> +			list_del_init(&obj->mm.gtt_wakeref_link);
> +			if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> +				intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> +				obj->base.write_domain = 0;
> +			}
> +			count++;
> +		}
> +
> +		timeout = HZ * (ilog2(count) + 1) / 2;
> +		mod_delayed_work(system_wq,
> +				 &dev_priv->mm.gtt_wakeref.work,
> +				 round_jiffies_up_relative(timeout));
> +	} else {
> +		intel_runtime_pm_put(dev_priv);
> +		atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> +	}
> +	mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
> +}
> +
>  /**
>   * Moves a single object to the GTT read, and possibly write domain.
>   * @obj: object to act on
> @@ -3512,6 +3562,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>  int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  {
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>  	int ret;
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
> @@ -3525,6 +3576,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  	if (ret)
>  		return ret;
>  
> +	mutex_lock(&i915->mm.gtt_wakeref.lock);
> +	if (list_empty(&obj->mm.gtt_wakeref_link)) {
> +		if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) {
> +			intel_runtime_pm_get(i915);
> +			schedule_delayed_work(&i915->mm.gtt_wakeref.work,
> +					      round_jiffies_up_relative(HZ));
> +		}
> +		list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list);
> +	}
> +	mutex_unlock(&i915->mm.gtt_wakeref.lock);
> +
>  	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
>  		return 0;
>  
> @@ -4257,6 +4319,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  
>  	INIT_LIST_HEAD(&obj->global_link);
>  	INIT_LIST_HEAD(&obj->userfault_link);
> +	INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link);
>  	INIT_LIST_HEAD(&obj->vma_list);
>  	INIT_LIST_HEAD(&obj->lut_list);
>  	INIT_LIST_HEAD(&obj->batch_pool_link);
> @@ -4394,6 +4457,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>  
>  		trace_i915_gem_object_destroy(obj);
>  
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +
> +		if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) {
> +			mutex_lock(&i915->mm.gtt_wakeref.lock);
> +			list_del(&obj->mm.gtt_wakeref_link);
> +			mutex_unlock(&i915->mm.gtt_wakeref.lock);
> +		}
> +
>  		GEM_BUG_ON(i915_gem_object_is_active(obj));
>  		list_for_each_entry_safe(vma, vn,
>  					 &obj->vma_list, obj_link) {
> @@ -4548,6 +4619,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>  	int ret;
>  
>  	intel_runtime_pm_get(dev_priv);
> +	while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> +		;
> +	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
>  	intel_suspend_gt_powersave(dev_priv);
>  
>  	mutex_lock(&dev->struct_mutex);
> @@ -4932,6 +5006,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>  	if (err)
>  		goto err_priorities;
>  
> +	INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list);
> +	INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout);
> +	mutex_init(&dev_priv->mm.gtt_wakeref.lock);
> +	atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> +
>  	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
>  	init_llist_head(&dev_priv->mm.free_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> @@ -4978,6 +5057,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
>  	WARN_ON(!list_empty(&dev_priv->gt.timelines));
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +	while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> +		;
> +	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
> +
>  	kmem_cache_destroy(dev_priv->priorities);
>  	kmem_cache_destroy(dev_priv->dependencies);
>  	kmem_cache_destroy(dev_priv->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index c30d8f808185..3ca13edf32b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -177,6 +177,8 @@ struct drm_i915_gem_object {
>  			struct mutex lock; /* protects this cache */
>  		} get_page;
>  
> +		struct list_head gtt_wakeref_link;
> +
>  		/**
>  		 * Advice: are the backing pages purgeable?
>  		 */
> @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>  					 unsigned int cache_level);
>  void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
>  
> +void
> +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains);
> +
>  #endif
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 77fb39808131..71110b7d3ca0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>  
>  static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
>  {
> -	if (i915_gem_object_unbind(obj) == 0)
> +	if (i915_gem_object_unbind(obj) == 0) {
> +		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>  		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> +	}
>  	return !READ_ONCE(obj->mm.pages);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d89e1b8e1cc5..357eee6f907c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
>  		i915_ggtt_offset(ce->ring->vma);
>  
>  	ce->state->obj->mm.dirty = true;
> +	flush_write_domain(ce->state->obj, ~0);
>  
>  	i915_gem_context_get(ctx);
>  out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index cdf084ef5aae..571a5b1f4f54 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring,
>  	if (IS_ERR(addr))
>  		goto err;
>  
> +	flush_write_domain(vma->obj, ~0);
> +
>  	ring->vaddr = addr;
>  	return 0;
>  
> @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
>  			goto err;
>  
>  		ce->state->obj->mm.dirty = true;
> +		flush_write_domain(ce->state->obj, ~0);
>  	}
>  
>  	/* The kernel context is only used as a placeholder for flushing the
> -- 
> 2.14.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list