[PATCH] drm/i915: Keep the device awake whilst in the GTT domain

Chris Wilson chris at chris-wilson.co.uk
Thu Aug 31 19:51:10 UTC 2017


Since runtime suspend is very harsh on GTT mmappings (they all get
zapped on suspend) keep the device awake while the buffer remains in
the GTT domain. However, userspace can control the domain and
although there is a soft contract that writes must be flushed (for e.g.
flushing scanouts and fbc), we are intentionally lax with respect to read
domains, allowing them to persist for as long as is feasible. To ensure
that the device can eventually suspend, we install a timer. So in effect
we have just a fancy pm autosuspend that tries to estimate the cost of
restoring actively used GTT mmappings.
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
 drivers/gpu/drm/i915/i915_drv.h          |  8 +++
 drivers/gpu/drm/i915/i915_gem.c          | 94 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_object.h   |  5 ++
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c         |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 +
 7 files changed, 106 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..1432392fb2f8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2812,6 +2812,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
 	seq_printf(m, "IRQs disabled: %s\n",
 		   yesno(!intel_irqs_enabled(dev_priv)));
+	seq_printf(m, "GTT wakeref count: %d\n",
+		   atomic_read(&dev_priv->mm.wakeref_count));
 #ifdef CONFIG_PM
 	seq_printf(m, "Usage count: %d\n",
 		   atomic_read(&dev_priv->drm.dev->power.usage_count));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0383e879a315..deaedbdfafe7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,14 @@ struct i915_gem_mm {
 	 */
 	struct list_head userfault_list;
 
+	/** List of all objects in gtt domain, holding a wakeref.
+	 * The list is reaped periodically.
+	 */
+	struct list_head wakeref_list;
+	struct delayed_work wakeref_work;
+	struct mutex wakeref_lock;
+	atomic_t wakeref_count;
+
 	/**
 	 * List of objects which are pending destruction.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4cc08bc518c..1b949a8aa395 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	if (cpu_write_needs_clflush(obj))
@@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
+void
 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	if (!(obj->base.write_domain & flush_domains))
 		return;
 
@@ -694,16 +697,20 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 
 	switch (obj->base.write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-			intel_runtime_pm_get(dev_priv);
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
+		mutex_lock(&dev_priv->mm.wakeref_lock);
+		if (!list_empty(&obj->mm.wakeref_link)) {
+			if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+			}
 
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+			intel_fb_obj_flush(obj,
+					fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+			list_del_init(&obj->mm.wakeref_link);
+		}
+		mutex_unlock(&dev_priv->mm.wakeref_lock);
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
@@ -3425,6 +3432,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.write_domain = 0;
 }
 
@@ -3501,6 +3509,44 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+static void wakeref_timeout(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), mm.wakeref_work.work);
+	struct drm_i915_gem_object *obj, *on;
+	unsigned int count;
+
+	mutex_lock(&dev_priv->mm.wakeref_lock);
+	count = atomic_xchg(&dev_priv->mm.wakeref_count, 0);
+	if (count) {
+		unsigned long timeout;
+
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			spin_lock_irq(&dev_priv->uncore.lock);
+			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+			spin_unlock_irq(&dev_priv->uncore.lock);
+		}
+
+		list_for_each_entry_safe(obj, on,
+					 &dev_priv->mm.wakeref_list,
+					 mm.wakeref_link) {
+			list_del_init(&obj->mm.wakeref_link);
+			if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
+				intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+				obj->base.write_domain = 0;
+			}
+		}
+
+		timeout = HZ * ilog2(count);
+		schedule_delayed_work(&dev_priv->mm.wakeref_work,
+				      round_jiffies_up_relative(timeout));
+	} else {
+		intel_runtime_pm_put(dev_priv);
+		atomic_set(&dev_priv->mm.wakeref_count, -1);
+	}
+	mutex_unlock(&dev_priv->mm.wakeref_lock);
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3512,6 +3558,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3525,6 +3572,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	mutex_lock(&i915->mm.wakeref_lock);
+	if (list_empty(&obj->mm.wakeref_link)) {
+		if (atomic_inc_and_test(&i915->mm.wakeref_count)) {
+			intel_runtime_pm_get(i915);
+			schedule_delayed_work(&i915->mm.wakeref_work,
+					      round_jiffies_up_relative(HZ));
+		}
+		list_add(&obj->mm.wakeref_link, &i915->mm.wakeref_list);
+	}
+	mutex_unlock(&i915->mm.wakeref_lock);
+
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
@@ -4257,6 +4315,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
+	INIT_LIST_HEAD(&obj->mm.wakeref_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4394,6 +4453,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		trace_i915_gem_object_destroy(obj);
 
+		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+		if (!list_empty_careful(&obj->mm.wakeref_link)) {
+			mutex_lock(&i915->mm.wakeref_lock);
+			list_del_init(&obj->mm.wakeref_link);
+			mutex_unlock(&i915->mm.wakeref_lock);
+		}
+
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
 		list_for_each_entry_safe(vma, vn,
 					 &obj->vma_list, obj_link) {
@@ -4548,6 +4615,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
+	while (flush_delayed_work(&dev_priv->mm.wakeref_work))
+		;
 	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
@@ -4932,6 +5001,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (err)
 		goto err_priorities;
 
+	INIT_LIST_HEAD(&dev_priv->mm.wakeref_list);
+	INIT_DELAYED_WORK(&dev_priv->mm.wakeref_work, wakeref_timeout);
+	mutex_init(&dev_priv->mm.wakeref_lock);
+	atomic_set(&dev_priv->mm.wakeref_count, -1);
+
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
 	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c30d8f808185..45b6bab4e118 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -177,6 +177,8 @@ struct drm_i915_gem_object {
 			struct mutex lock; /* protects this cache */
 		} get_page;
 
+		struct list_head wakeref_link;
+
 		/**
 		 * Advice: are the backing pages purgeable?
 		 */
@@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 77fb39808131..71110b7d3ca0 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
-	if (i915_gem_object_unbind(obj) == 0)
+	if (i915_gem_object_unbind(obj) == 0) {
+		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+	}
 	return !READ_ONCE(obj->mm.pages);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d89e1b8e1cc5..357eee6f907c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 		i915_ggtt_offset(ce->ring->vma);
 
 	ce->state->obj->mm.dirty = true;
+	flush_write_domain(ce->state->obj, ~0);
 
 	i915_gem_context_get(ctx);
 out:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cdf084ef5aae..571a5b1f4f54 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring,
 	if (IS_ERR(addr))
 		goto err;
 
+	flush_write_domain(vma->obj, ~0);
+
 	ring->vaddr = addr;
 	return 0;
 
@@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 			goto err;
 
 		ce->state->obj->mm.dirty = true;
+		flush_write_domain(ce->state->obj, ~0);
 	}
 
 	/* The kernel context is only used as a placeholder for flushing the
-- 
2.14.1



More information about the Intel-gfx-trybot mailing list