[Intel-gfx] [PATCH 2/2] drm/i915: Cancel retire_worker on parking

Chris Wilson chris at chris-wilson.co.uk
Tue Apr 30 09:44:05 UTC 2019


Replace the racy continuation check within retire_work with a definite
kill-switch on idling. The race was being exposed by gem_concurrent_blit
where the retire_worker would be terminated too early leaving us
spinning in debugfs/i915_drop_caches with nothing flushing the
retirement queue.

Although that the igt is trying to idle from one child while submitting
from another may be a contributing factor as to why  it runs so slowly...

Testcase: igt/gem_concurrent_blit
Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_pm.c            | 27 ++++++++++---------
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
index 3b6e8d5be8e1..88be810758ae 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -46,15 +46,23 @@ static void idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.idle_work.work);
+	bool restart = true;
 
+	cancel_delayed_work_sync(&i915->gem.retire_work);
 	mutex_lock(&i915->drm.struct_mutex);
 
 	intel_wakeref_lock(&i915->gt.wakeref);
-	if (!intel_wakeref_active(&i915->gt.wakeref))
+	if (!intel_wakeref_active(&i915->gt.wakeref)) {
 		i915_gem_park(i915);
+		restart = false;
+	}
 	intel_wakeref_unlock(&i915->gt.wakeref);
 
 	mutex_unlock(&i915->drm.struct_mutex);
+	if (restart)
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
 }
 
 static void retire_work_handler(struct work_struct *work)
@@ -68,10 +76,9 @@ static void retire_work_handler(struct work_struct *work)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	if (intel_wakeref_active(&i915->gt.wakeref))
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
+	queue_delayed_work(i915->wq,
+			   &i915->gem.retire_work,
+			   round_jiffies_up_relative(HZ));
 }
 
 static int pm_notifier(struct notifier_block *nb,
@@ -159,15 +166,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * reset the GPU back to its idle, low power state.
 	 */
 	GEM_BUG_ON(i915->gt.awake);
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
-	drain_delayed_work(&i915->gem.retire_work);
+	flush_delayed_work(&i915->gem.idle_work);
 
-	/*
-	 * As the idle_work is rearming if it detects a race, play safe and
-	 * repeat the flush until it is definitely idle.
-	 */
-	drain_delayed_work(&i915->gem.idle_work);
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
 	i915_gem_drain_freed_objects(i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index e4033d0576c4..ce54f8dc13cc 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -58,8 +58,7 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	drain_delayed_work(&i915->gem.retire_work);
-	drain_delayed_work(&i915->gem.idle_work);
+	flush_delayed_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
-- 
2.20.1



More information about the Intel-gfx mailing list