[PATCH 1/2] drm/i915: Keep the device awake whilst in the GTT domain

Chris Wilson chris at chris-wilson.co.uk
Fri Sep 1 12:53:38 UTC 2017


Since runtime suspend is very harsh on GTT mmappings (they all get
zapped on suspend) keep the device awake while the buffer remains in
the GTT domain. However, userspace can control the domain and
although there is a soft contract that writes must be flushed (for e.g.
flushing scanouts and fbc), we are intentionally lax with respect to read
domains, allowing them to persist for as long as is feasible.

We acquire a wakeref when using the buffer in the GEM domain so that all
subsequent operations on that object are fast, trying to avoid
suspending while the GTT is still in active use by userspace.  To ensure
that the device can eventually suspend, we install a timer and expire the
GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
to estimate the cost of restoring actively used GTT mmapping.
---
 drivers/gpu/drm/i915/i915_debugfs.c              |   2 +
 drivers/gpu/drm/i915/i915_drv.h                  |  12 +++
 drivers/gpu/drm/i915/i915_gem.c                  | 118 +++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_object.h           |   5 +
 drivers/gpu/drm/i915/i915_gem_shrinker.c         |   4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_object.c |   2 +-
 6 files changed, 124 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..dbb07612aa5a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (!HAS_RUNTIME_PM(dev_priv))
 		seq_puts(m, "Runtime power management not supported\n");
 
+	seq_printf(m, "GTT wakeref count: %d\n",
+		   atomic_read(&dev_priv->mm.gtt_wakeref.count));
 	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
 	seq_printf(m, "IRQs disabled: %s\n",
 		   yesno(!intel_irqs_enabled(dev_priv)));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0383e879a315..92f47cbf91f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,18 @@ struct i915_gem_mm {
 	 */
 	struct list_head userfault_list;
 
+	/* List of all objects in gtt domain, holding a wakeref.
+	 * The list is reaped periodically, and protected by its own mutex.
+	 * Entries on this list do not carry a reference.
+	 */
+	struct {
+		struct mutex lock;
+		struct list_head list;
+		atomic_t count; /* -1 == no timeout scheduled */
+
+		struct delayed_work work;
+	} gtt_wakeref;
+
 	/**
 	 * List of objects which are pending destruction.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4cc08bc518c..08243a49538c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	if (cpu_write_needs_clflush(obj))
@@ -667,11 +668,14 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	if (!(obj->base.write_domain & flush_domains))
 		return;
 
@@ -694,16 +698,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 
 	switch (obj->base.write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-			intel_runtime_pm_get(dev_priv);
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
+		mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
+		if (!list_empty(&obj->mm.gtt_wakeref_link)) {
+			if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+			}
 
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+			intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+			list_del_init(&obj->mm.gtt_wakeref_link);
+		}
+		mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
@@ -808,7 +815,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -861,7 +868,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -3422,9 +3429,10 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.write_domain = 0;
 }
 
@@ -3477,7 +3485,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3501,6 +3509,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+static void wakeref_timeout(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work);
+	struct drm_i915_gem_object *obj, *on;
+	unsigned int count;
+
+	mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
+	count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0);
+	if (count) {
+		unsigned long timeout;
+
+		GEM_BUG_ON(count == -1);
+
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			spin_lock_irq(&dev_priv->uncore.lock);
+			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+			spin_unlock_irq(&dev_priv->uncore.lock);
+		}
+
+		count = 0;
+		list_for_each_entry_safe(obj, on,
+					 &dev_priv->mm.gtt_wakeref.list,
+					 mm.gtt_wakeref_link) {
+			list_del_init(&obj->mm.gtt_wakeref_link);
+			if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
+				intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+				obj->base.write_domain = 0;
+			}
+			count++;
+		}
+
+		timeout = HZ * (ilog2(count) + 1) / 2;
+		mod_delayed_work(system_wq,
+				 &dev_priv->mm.gtt_wakeref.work,
+				 round_jiffies_up_relative(timeout));
+	} else {
+		intel_runtime_pm_put(dev_priv);
+		atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
+	}
+	mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3512,6 +3563,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3525,6 +3577,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	if (list_empty_careful(&obj->mm.gtt_wakeref_link)) {
+		mutex_lock(&i915->mm.gtt_wakeref.lock);
+		if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) {
+			intel_runtime_pm_get(i915);
+			schedule_delayed_work(&i915->mm.gtt_wakeref.work,
+					      round_jiffies_up_relative(HZ));
+		}
+		list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list);
+		mutex_unlock(&i915->mm.gtt_wakeref.lock);
+	}
+
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
@@ -3540,7 +3603,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3908,7 +3971,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -4257,6 +4320,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
+	INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4394,6 +4458,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		trace_i915_gem_object_destroy(obj);
 
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+		if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) {
+			mutex_lock(&i915->mm.gtt_wakeref.lock);
+			list_del(&obj->mm.gtt_wakeref_link);
+			mutex_unlock(&i915->mm.gtt_wakeref.lock);
+		}
+
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
 		list_for_each_entry_safe(vma, vn,
 					 &obj->vma_list, obj_link) {
@@ -4548,6 +4620,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
+	while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
+		;
+	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
 	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
@@ -4932,6 +5007,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (err)
 		goto err_priorities;
 
+	INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list);
+	INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout);
+	mutex_init(&dev_priv->mm.gtt_wakeref.lock);
+	atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
+
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
 	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
@@ -4978,6 +5058,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
+		;
+	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
+
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c30d8f808185..5797b75a804b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -177,6 +177,8 @@ struct drm_i915_gem_object {
 			struct mutex lock; /* protects this cache */
 		} get_page;
 
+		struct list_head gtt_wakeref_link;
+
 		/**
 		 * Advice: are the backing pages purgeable?
 		 */
@@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+					unsigned int flush_domains);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 77fb39808131..8d146584ee49 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
-	if (i915_gem_object_unbind(obj) == 0)
+	if (i915_gem_object_unbind(obj) == 0) {
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+	}
 	return !READ_ONCE(obj->mm.pages);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 8f011c447e41..f3f6588afbdb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
-- 
2.14.1



More information about the Intel-gfx-trybot mailing list