[PATCH 15/36] drm/i915: Keep the device awake whilst in the GTT domain

Chris Wilson chris at chris-wilson.co.uk
Mon Sep 25 20:34:34 UTC 2017


Since runtime suspend is very harsh on GTT mmappings (they all get
zapped on suspend) keep the device awake while the buffer remains in
the GTT domain. However, userspace can control the domain and
although there is a soft contract that writes must be flushed (for e.g.
flushing scanouts and fbc), we are intentionally lax with respect to read
domains, allowing them to persist for as long as is feasible.

We acquire a wakeref when using the buffer in the GEM domain so that all
subsequent operations on that object are fast, trying to avoid
suspending while the GTT is still in active use by userspace.  To ensure
that the device can eventually suspend, we install a timer and expire the
GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
to estimate the cost of restoring actively used GTT mmapping.

v2: Don't forget to setup/cleanup selftests.

References: https://bugs.freedesktop.org/show_bug.cgi?id=102490
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c              |   2 +
 drivers/gpu/drm/i915/i915_drv.h                  |  12 +++
 drivers/gpu/drm/i915/i915_gem.c                  | 122 +++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_object.h           |   5 +
 drivers/gpu/drm/i915/i915_gem_shrinker.c         |   4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_object.c |   2 +-
 drivers/gpu/drm/i915/selftests/mock_gem_device.c |   6 ++
 7 files changed, 134 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 18a87ce0d045..ba647122231f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (!HAS_RUNTIME_PM(dev_priv))
 		seq_puts(m, "Runtime power management not supported\n");
 
+	seq_printf(m, "GTT wakeref count: %d\n",
+		   atomic_read(&dev_priv->mm.gtt_wakeref.count));
 	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
 	seq_printf(m, "IRQs disabled: %s\n",
 		   yesno(!intel_irqs_enabled(dev_priv)));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a7418a52c02..5250258093f7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1488,6 +1488,18 @@ struct i915_gem_mm {
 	 */
 	struct list_head userfault_list;
 
+	/* List of all objects in gtt domain, holding a wakeref.
+	 * The list is reaped periodically, and protected by its own mutex.
+	 * Entries on this list do not carry a reference.
+	 */
+	struct {
+		struct mutex lock;
+		struct list_head list;
+		atomic_t count; /* -1 == no timeout scheduled */
+
+		struct delayed_work work;
+	} gtt_wakeref;
+
 	/**
 	 * List of objects which are pending destruction.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7192a9249a5a..6024ecc174e4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	if (cpu_write_needs_clflush(obj))
@@ -667,11 +668,14 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	if (!(obj->base.write_domain & flush_domains))
 		return;
 
@@ -694,16 +698,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 
 	switch (obj->base.write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		if (!HAS_LLC(dev_priv)) {
-			intel_runtime_pm_get(dev_priv);
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
+		mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
+		if (!list_empty(&obj->mm.gtt_wakeref_link)) {
+			if (!HAS_LLC(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+			}
 
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+			intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+			list_del_init(&obj->mm.gtt_wakeref_link);
+		}
+		mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
@@ -808,7 +815,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -861,7 +868,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -3387,9 +3394,10 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 	obj->base.write_domain = 0;
 }
 
@@ -3442,7 +3450,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3466,6 +3474,49 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+static void wakeref_timeout(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work);
+	struct drm_i915_gem_object *obj, *on;
+	unsigned int count;
+
+	mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
+	count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0);
+	if (count) {
+		unsigned long timeout;
+
+		GEM_BUG_ON(count == -1);
+
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			spin_lock_irq(&dev_priv->uncore.lock);
+			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+			spin_unlock_irq(&dev_priv->uncore.lock);
+		}
+
+		timeout = 0;
+		list_for_each_entry_safe(obj, on,
+					 &dev_priv->mm.gtt_wakeref.list,
+					 mm.gtt_wakeref_link) {
+			list_del_init(&obj->mm.gtt_wakeref_link);
+			if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
+				intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+				obj->base.write_domain = 0;
+				timeout++;
+			}
+			timeout++;
+		}
+
+		timeout = HZ * (ilog2(timeout) + 1) / 2;
+		schedule_delayed_work(&dev_priv->mm.gtt_wakeref.work,
+				      round_jiffies_up_relative(timeout));
+	} else {
+		intel_runtime_pm_put(dev_priv);
+		atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
+	}
+	mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3477,6 +3528,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -3490,6 +3542,17 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	mutex_lock(&i915->mm.gtt_wakeref.lock);
+	if (list_empty(&obj->mm.gtt_wakeref_link)) {
+		if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) {
+			intel_runtime_pm_get(i915);
+			schedule_delayed_work(&i915->mm.gtt_wakeref.work,
+					      round_jiffies_up_relative(HZ));
+		}
+		list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list);
+	}
+	mutex_unlock(&i915->mm.gtt_wakeref.lock);
+
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
@@ -3505,7 +3568,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3873,7 +3936,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -4222,6 +4285,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
+	INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4359,6 +4423,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		trace_i915_gem_object_destroy(obj);
 
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
 		GEM_BUG_ON(i915_gem_object_is_active(obj));
 		list_for_each_entry_safe(vma, vn,
 					 &obj->vma_list, obj_link) {
@@ -4381,6 +4447,12 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
+		if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) {
+			mutex_lock(&i915->mm.gtt_wakeref.lock);
+			list_del(&obj->mm.gtt_wakeref_link);
+			mutex_unlock(&i915->mm.gtt_wakeref.lock);
+		}
+
 		if (obj->ops->release)
 			obj->ops->release(obj);
 
@@ -4513,6 +4585,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
+	drain_delayed_work(&dev_priv->mm.gtt_wakeref.work);
+	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
 	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
@@ -4855,6 +4929,15 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 	i915_gem_detect_bit_6_swizzle(dev_priv);
 }
 
+static void
+i915_gem_load_init__wakeref(struct drm_i915_private *i915)
+{
+	INIT_LIST_HEAD(&i915->mm.gtt_wakeref.list);
+	INIT_DELAYED_WORK(&i915->mm.gtt_wakeref.work, wakeref_timeout);
+	mutex_init(&i915->mm.gtt_wakeref.lock);
+	atomic_set(&i915->mm.gtt_wakeref.count, -1);
+}
+
 int
 i915_gem_load_init(struct drm_i915_private *dev_priv)
 {
@@ -4896,6 +4979,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (err)
 		goto err_priorities;
 
+	i915_gem_load_init__wakeref(dev_priv);
+
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
 	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
@@ -4942,6 +5027,9 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	drain_delayed_work(&dev_priv->mm.gtt_wakeref.work);
+	WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
+
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c30d8f808185..5797b75a804b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -177,6 +177,8 @@ struct drm_i915_gem_object {
 			struct mutex lock; /* protects this cache */
 		} get_page;
 
+		struct list_head gtt_wakeref_link;
+
 		/**
 		 * Advice: are the backing pages purgeable?
 		 */
@@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+					unsigned int flush_domains);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 74002b2d1b6f..6d89d14ab0ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
-	if (i915_gem_object_unbind(obj) == 0)
+	if (i915_gem_object_unbind(obj) == 0) {
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+	}
 	return !READ_ONCE(obj->mm.pages);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 8f011c447e41..f3f6588afbdb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 2388424a14da..d01dd731b7da 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -67,6 +67,9 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_fini(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	drain_delayed_work(&i915->mm.gtt_wakeref.work);
+	WARN_ON(atomic_read(&i915->mm.gtt_wakeref.count) != -1);
+
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
@@ -171,6 +174,7 @@ struct drm_i915_private *mock_gem_device(void)
 	drm_mode_config_init(&i915->drm);
 
 	mkwrite_device_info(i915)->gen = -1;
+	mkwrite_device_info(i915)->has_llc = true;
 
 	spin_lock_init(&i915->mm.object_stat_lock);
 	mock_uncore_init(i915);
@@ -182,6 +186,8 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->wq)
 		goto put_device;
 
+	i915_gem_load_init__wakeref(i915);
+
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
 	init_llist_head(&i915->mm.free_list);
 	INIT_LIST_HEAD(&i915->mm.unbound_list);
-- 
2.14.1



More information about the Intel-gfx-trybot mailing list