[PATCH 09/10] drm/i915/guc: Fix circular locking dependency between struct_mutex, i_mutex_key, mmap_sem

Sagar Arun Kamble sagar.a.kamble at intel.com
Tue Jan 9 07:39:02 UTC 2018


With relay, we create debugfs file that requires i_mutex_key lock and we
were doing that under struct_mutex. So we introduced newer dependency as
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the guc_log_relay_file_create operation from under struct_mutex.

<4>[   27.531910] ======================================================
<4>[   27.531912] WARNING: possible circular locking dependency detected
<4>[   27.531915] 4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
<4>[   27.531917] ------------------------------------------------------
<4>[   27.531919] debugfs_test/1388 is trying to acquire lock:
<4>[   27.531921]  (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.531989]
                  but task is already holding lock:
<4>[   27.531991]  (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
<4>[   27.531997]
                  which lock already depends on the new lock.

<4>[   27.532000]
                  the existing dependency chain (in reverse order) is:
<4>[   27.532002]
                  -> #3 (&mm->mmap_sem){++++}:
<4>[   27.532009]        _copy_to_user+0x1e/0x70
<4>[   27.532013]        filldir+0x8c/0xf0
<4>[   27.532016]        dcache_readdir+0xeb/0x160
<4>[   27.532019]        iterate_dir+0xdc/0x140
<4>[   27.532021]        SyS_getdents+0xa0/0x130
<4>[   27.532024]        entry_SYSCALL_64_fastpath+0x1c/0x89
<4>[   27.532026]
                  -> #2 (&sb->s_type->i_mutex_key#3){++++}:
<4>[   27.532033]        start_creating+0x59/0x110
<4>[   27.532035]        __debugfs_create_file+0x2e/0xe0
<4>[   27.532039]        relay_create_buf_file+0x62/0x80
<4>[   27.532041]        relay_late_setup_files+0x84/0x250
<4>[   27.532089]        guc_log_late_setup+0x4f/0x110 [i915]
<4>[   27.532131]        i915_guc_log_register+0x32/0x40 [i915]
<4>[   27.532165]        i915_driver_load+0x7b6/0x1720 [i915]
<4>[   27.532199]        i915_pci_probe+0x2e/0x90 [i915]
<4>[   27.532202]        pci_device_probe+0x9c/0x120
<4>[   27.532205]        driver_probe_device+0x2a3/0x480
<4>[   27.532207]        __driver_attach+0xd9/0xe0
<4>[   27.532209]        bus_for_each_dev+0x57/0x90
<4>[   27.532211]        bus_add_driver+0x168/0x260
<4>[   27.532213]        driver_register+0x52/0xc0
<4>[   27.532215]        do_one_initcall+0x39/0x150
<4>[   27.532217]        do_init_module+0x56/0x1ef
<4>[   27.532221]        load_module+0x231c/0x2d70
<4>[   27.532223]        SyS_finit_module+0xa5/0xe0
<4>[   27.532225]        entry_SYSCALL_64_fastpath+0x1c/0x89
<4>[   27.532227]
                  -> #1 (relay_channels_mutex){+.+.}:
<4>[   27.532233]        relay_open+0x12c/0x2b0
<4>[   27.532274]        intel_guc_log_runtime_create+0xab/0x230 [i915]
<4>[   27.532316]        intel_guc_init+0x81/0x120 [i915]
<4>[   27.532357]        intel_uc_init+0x29/0xa0 [i915]
<4>[   27.532396]        i915_gem_init+0x182/0x530 [i915]
<4>[   27.532429]        i915_driver_load+0xaa9/0x1720 [i915]
<4>[   27.532463]        i915_pci_probe+0x2e/0x90 [i915]
<4>[   27.532465]        pci_device_probe+0x9c/0x120
<4>[   27.532467]        driver_probe_device+0x2a3/0x480
<4>[   27.532469]        __driver_attach+0xd9/0xe0
<4>[   27.532471]        bus_for_each_dev+0x57/0x90
<4>[   27.532473]        bus_add_driver+0x168/0x260
<4>[   27.532475]        driver_register+0x52/0xc0
<4>[   27.532477]        do_one_initcall+0x39/0x150
<4>[   27.532479]        do_init_module+0x56/0x1ef
<4>[   27.532481]        load_module+0x231c/0x2d70
<4>[   27.532484]        SyS_finit_module+0xa5/0xe0
<4>[   27.532486]        entry_SYSCALL_64_fastpath+0x1c/0x89
<4>[   27.532487]
                  -> #0 (&dev->struct_mutex){+.+.}:
<4>[   27.532494]        __mutex_lock+0x81/0x9b0
<4>[   27.532531]        i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532570]        i915_gem_fault+0x201/0x790 [i915]
<4>[   27.532573]        __do_fault+0x15/0x70
<4>[   27.532576]        __handle_mm_fault+0x677/0xdc0
<4>[   27.532578]        handle_mm_fault+0x14f/0x2f0
<4>[   27.532580]        __do_page_fault+0x2d1/0x560
<4>[   27.532583]        page_fault+0x4c/0x60
<4>[   27.532584]
                  other info that might help us debug this:

<4>[   27.532587] Chain exists of:
                    &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem

<4>[   27.532593]  Possible unsafe locking scenario:

<4>[   27.532595]        CPU0                    CPU1
<4>[   27.532596]        ----                    ----
<4>[   27.532597]   lock(&mm->mmap_sem);
<4>[   27.532599]                                lock(&sb->s_type->i_mutex_key#3);
<4>[   27.532602]                                lock(&mm->mmap_sem);
<4>[   27.532605]   lock(&dev->struct_mutex);
<4>[   27.532607]
                   *** DEADLOCK ***

<4>[   27.532610] 1 lock held by debugfs_test/1388:
<4>[   27.532611]  #0:  (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
<4>[   27.532616]
                  stack backtrace:
<4>[   27.532619] CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
<4>[   27.532621] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
<4>[   27.532623] Call Trace:
<4>[   27.532628]  dump_stack+0x5f/0x86
<4>[   27.532633]  print_circular_bug.isra.18+0x1d0/0x2c0
<4>[   27.532636]  __lock_acquire+0x14ae/0x1b60
<4>[   27.532642]  ? lock_acquire+0xaf/0x200
<4>[   27.532644]  lock_acquire+0xaf/0x200
<4>[   27.532683]  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532687]  __mutex_lock+0x81/0x9b0
<4>[   27.532726]  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532764]  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532804]  ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532842]  i915_mutex_lock_interruptible+0x47/0x130 [i915]
<4>[   27.532847]  ? __pm_runtime_resume+0x4f/0x80
<4>[   27.532886]  i915_gem_fault+0x201/0x790 [i915]
<4>[   27.532891]  __do_fault+0x15/0x70
<4>[   27.532893]  ? _raw_spin_unlock+0x29/0x40
<4>[   27.532896]  __handle_mm_fault+0x677/0xdc0
<4>[   27.532901]  handle_mm_fault+0x14f/0x2f0
<4>[   27.532904]  __do_page_fault+0x2d1/0x560
<4>[   27.532908]  ? page_fault+0x36/0x60
<4>[   27.532910]  page_fault+0x4c/0x60

Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  7 -------
 drivers/gpu/drm/i915/intel_guc_log.c | 29 ++++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index eef4c8b..f08b5d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2481,15 +2481,8 @@ static int i915_guc_log_control_set(void *data, u64 val)
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	intel_runtime_pm_get(dev_priv);
 	ret = i915_guc_log_control(dev_priv, val);
-	intel_runtime_pm_put(dev_priv);
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index fd2a40e..2bc62f4 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -441,14 +441,17 @@ static int guc_log_late_setup(struct intel_guc *guc)
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (!guc_log_has_runtime(guc)) {
 		/* If log_level was set as -1 at boot time, then setup needed to
 		 * handle log buffer flush interrupts would not have been done yet,
 		 * so do that now.
 		 */
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		intel_runtime_pm_get(dev_priv);
 		ret = intel_guc_log_runtime_create(guc);
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+
 		if (ret)
 			goto err;
 	}
@@ -460,7 +463,9 @@ static int guc_log_late_setup(struct intel_guc *guc)
 	return 0;
 
 err_runtime:
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_guc_log_runtime_destroy(guc);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 err:
 	/* logging will remain off */
 	guc->log.level = -1;
@@ -573,7 +578,14 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 	if (!log_param.logging_enabled && guc->log.level < 0)
 		return 0;
 
+	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
+	if (ret)
+		return ret;
+	intel_runtime_pm_get(dev_priv);
 	ret = guc_log_control(guc, log_param.value);
+	intel_runtime_pm_put(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
 	if (ret < 0) {
 		DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
 		return ret;
@@ -592,14 +604,23 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 			return ret;
 		}
 
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		intel_runtime_pm_get(dev_priv);
 		intel_guc_log_enable_interrupts(guc);
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+
 	} else {
 		/* Once logging is disabled, GuC won't generate logs & send an
 		 * interrupt. But there could be some data in the log buffer
 		 * which is yet to be captured. So request GuC to update the log
 		 * buffer state and then collect the left over logs.
 		 */
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		intel_runtime_pm_get(dev_priv);
 		guc_flush_logs(guc);
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
 
 		/* As logging is disabled, update log level to reflect that */
 		guc->log.level = -1;
@@ -616,9 +637,7 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
 	if (dev_priv->guc.log.level < 0)
 		return;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	guc_log_late_setup(&dev_priv->guc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	 guc_log_late_setup(&dev_priv->guc);
 }
 
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
-- 
1.9.1



More information about the Intel-gfx-trybot mailing list