[PATCH 2/3] drm/i915/guc: Fix circular locking dependency due to log relay channel handling under struct_mutex
Sagar Arun Kamble
sagar.a.kamble at intel.com
Wed Jan 10 17:22:14 UTC 2018
This patch fixes deadlock issue due to circular locking dependency of
struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
For GuC log relay channel we create debugfs file that requires i_mutex_key
lock and we are doing that under struct_mutex. So we introduced newer
dependency as:
&dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the relay create/destroy operation from under struct_mutex.
======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
------------------------------------------------------
debugfs_test/1388 is trying to acquire lock:
(&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]
but task is already holding lock:
(&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #3 (&mm->mmap_sem){++++}:
_copy_to_user+0x1e/0x70
filldir+0x8c/0xf0
dcache_readdir+0xeb/0x160
iterate_dir+0xdc/0x140
SyS_getdents+0xa0/0x130
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #2 (&sb->s_type->i_mutex_key#3){++++}:
start_creating+0x59/0x110
__debugfs_create_file+0x2e/0xe0
relay_create_buf_file+0x62/0x80
relay_late_setup_files+0x84/0x250
guc_log_late_setup+0x4f/0x110 [i915]
i915_guc_log_register+0x32/0x40 [i915]
i915_driver_load+0x7b6/0x1720 [i915]
i915_pci_probe+0x2e/0x90 [i915]
pci_device_probe+0x9c/0x120
driver_probe_device+0x2a3/0x480
__driver_attach+0xd9/0xe0
bus_for_each_dev+0x57/0x90
bus_add_driver+0x168/0x260
driver_register+0x52/0xc0
do_one_initcall+0x39/0x150
do_init_module+0x56/0x1ef
load_module+0x231c/0x2d70
SyS_finit_module+0xa5/0xe0
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #1 (relay_channels_mutex){+.+.}:
relay_open+0x12c/0x2b0
intel_guc_log_runtime_create+0xab/0x230 [i915]
intel_guc_init+0x81/0x120 [i915]
intel_uc_init+0x29/0xa0 [i915]
i915_gem_init+0x182/0x530 [i915]
i915_driver_load+0xaa9/0x1720 [i915]
i915_pci_probe+0x2e/0x90 [i915]
pci_device_probe+0x9c/0x120
driver_probe_device+0x2a3/0x480
__driver_attach+0xd9/0xe0
bus_for_each_dev+0x57/0x90
bus_add_driver+0x168/0x260
driver_register+0x52/0xc0
do_one_initcall+0x39/0x150
do_init_module+0x56/0x1ef
load_module+0x231c/0x2d70
SyS_finit_module+0xa5/0xe0
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #0 (&dev->struct_mutex){+.+.}:
__mutex_lock+0x81/0x9b0
i915_mutex_lock_interruptible+0x47/0x130 [i915]
i915_gem_fault+0x201/0x790 [i915]
__do_fault+0x15/0x70
__handle_mm_fault+0x677/0xdc0
handle_mm_fault+0x14f/0x2f0
__do_page_fault+0x2d1/0x560
page_fault+0x4c/0x60
other info that might help us debug this:
Chain exists of:
&dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&mm->mmap_sem);
lock(&sb->s_type->i_mutex_key#3);
lock(&mm->mmap_sem);
lock(&dev->struct_mutex);
*** DEADLOCK ***
1 lock held by debugfs_test/1388:
#0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
stack backtrace:
CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
Call Trace:
dump_stack+0x5f/0x86
print_circular_bug.isra.18+0x1d0/0x2c0
__lock_acquire+0x14ae/0x1b60
? lock_acquire+0xaf/0x200
lock_acquire+0xaf/0x200
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
__mutex_lock+0x81/0x9b0
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
i915_mutex_lock_interruptible+0x47/0x130 [i915]
? __pm_runtime_resume+0x4f/0x80
i915_gem_fault+0x201/0x790 [i915]
__do_fault+0x15/0x70
? _raw_spin_unlock+0x29/0x40
__handle_mm_fault+0x677/0xdc0
handle_mm_fault+0x14f/0x2f0
__do_page_fault+0x2d1/0x560
? page_fault+0x36/0x60
page_fault+0x4c/0x60
Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 12 +---
drivers/gpu/drm/i915/i915_gem.c | 8 +++
drivers/gpu/drm/i915/intel_guc_log.c | 118 ++++++++++++++++++++++++++---------
drivers/gpu/drm/i915/intel_guc_log.h | 7 +++
drivers/gpu/drm/i915/intel_uc.c | 16 +++++
drivers/gpu/drm/i915/intel_uc.h | 2 +
6 files changed, 121 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2bb6307..c43007d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data, u64 *val)
static int i915_guc_log_control_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
- int ret;
if (!HAS_GUC(dev_priv))
return -ENODEV;
@@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
if (!dev_priv->guc.log.vma)
return -EINVAL;
- ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
- if (ret)
- return ret;
-
- intel_runtime_pm_get(dev_priv);
- ret = i915_guc_log_control(dev_priv, val);
- intel_runtime_pm_put(dev_priv);
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
- return ret;
+ return i915_guc_log_control(dev_priv, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1135a77..2abb6ffb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5191,6 +5191,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
+ ret = intel_uc_init_misc(dev_priv);
+ if (ret) {
+ intel_uc_fini_wq(dev_priv);
+ return ret;
+ }
+
/* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
* used by the CS may be stale, despite us poking the TLB reset. If
@@ -5283,6 +5289,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
+ intel_uc_fini_misc(dev_priv);
+
intel_uc_fini_wq(dev_priv);
if (ret != -EIO)
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 853426b..76c4374 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -356,12 +356,15 @@ static bool guc_log_has_runtime(struct intel_guc *guc)
return guc->log.runtime.buf_addr != NULL;
}
+static bool guc_log_has_relay(struct intel_guc *guc)
+{
+ return guc->log.runtime.relay_chan != NULL;
+}
+
int intel_guc_log_runtime_create(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
void *vaddr;
- struct rchan *guc_log_relay_chan;
- size_t n_subbufs, subbuf_size;
int ret;
if (i915_modparams.guc_log_level < 0)
@@ -369,6 +372,8 @@ int intel_guc_log_runtime_create(struct intel_guc *guc)
lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ GEM_BUG_ON(!guc_log_has_relay(guc));
+
GEM_BUG_ON(guc_log_has_runtime(guc));
/* We require SSE 4.1 for fast reads from the GuC log buffer and
@@ -382,7 +387,7 @@ int intel_guc_log_runtime_create(struct intel_guc *guc)
ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
if (ret)
- return ret;
+ goto err;
/* Create a WC (Uncached for read) vmalloc mapping of log
* buffer pages, so that we can directly get the data
@@ -391,13 +396,48 @@ int intel_guc_log_runtime_create(struct intel_guc *guc)
vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
- return PTR_ERR(vaddr);
+ ret = PTR_ERR(vaddr);
+ goto err;
}
guc->log.runtime.buf_addr = vaddr;
+ INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+ return 0;
+
+err:
+ /* logging will be off */
+ i915_modparams.guc_log_level = -1;
+ return ret;
+}
+
+void intel_guc_log_runtime_destroy(struct intel_guc *guc)
+{
+ /*
+ * It's possible that the runtime stuff was never allocated because
+ * guc_log_level was < 0 at the time
+ **/
+ if (!guc_log_has_runtime(guc))
+ return;
+
+ i915_gem_object_unpin_map(guc->log.vma->obj);
+ guc->log.runtime.buf_addr = NULL;
+}
+
+int intel_guc_log_relay_create(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct rchan *guc_log_relay_chan;
+ size_t n_subbufs, subbuf_size;
+ int ret;
+
+ if (i915_modparams.guc_log_level < 0)
+ return 0;
+
+ GEM_BUG_ON(guc_log_has_relay(guc));
+
/* Keep the size of sub buffers same as shared log buffer */
- subbuf_size = guc->log.vma->obj->base.size;
+ subbuf_size = GUC_LOG_SIZE;
/* Store up to 8 snapshots, which is large enough to buffer sufficient
* boot time logs and provides enough leeway to User, in terms of
@@ -416,36 +456,30 @@ int intel_guc_log_runtime_create(struct intel_guc *guc)
DRM_ERROR("Couldn't create relay chan for GuC logging\n");
ret = -ENOMEM;
- goto err_vaddr;
+ goto err;
}
GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
guc->log.runtime.relay_chan = guc_log_relay_chan;
- INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
return 0;
-err_vaddr:
- i915_gem_object_unpin_map(guc->log.vma->obj);
- guc->log.runtime.buf_addr = NULL;
err:
/* logging will be off */
i915_modparams.guc_log_level = -1;
return ret;
}
-void intel_guc_log_runtime_destroy(struct intel_guc *guc)
+void intel_guc_log_relay_destroy(struct intel_guc *guc)
{
/*
- * It's possible that the runtime stuff was never allocated because
+ * It's possible that the relay was never allocated because
* guc_log_level was < 0 at the time
**/
- if (!guc_log_has_runtime(guc))
+ if (!guc_log_has_relay(guc))
return;
relay_close(guc->log.runtime.relay_chan);
- i915_gem_object_unpin_map(guc->log.vma->obj);
- guc->log.runtime.buf_addr = NULL;
}
static int guc_log_late_setup(struct intel_guc *guc)
@@ -453,16 +487,25 @@ static int guc_log_late_setup(struct intel_guc *guc)
struct drm_i915_private *dev_priv = guc_to_i915(guc);
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
if (!guc_log_has_runtime(guc)) {
/* If log_level was set as -1 at boot time, then setup needed to
* handle log buffer flush interrupts would not have been done yet,
* so do that now.
*/
+ if (!guc_log_has_relay(guc)) {
+ ret = intel_guc_log_relay_create(guc);
+ if (ret)
+ goto err;
+ }
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
ret = intel_guc_log_runtime_create(guc);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
if (ret)
- goto err;
+ goto err_relay;
}
ret = guc_log_relay_file_create(guc);
@@ -472,7 +515,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
return 0;
err_runtime:
- intel_guc_log_runtime_destroy(guc);
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_guc_log_runtime_destroy(&dev_priv->guc);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+err_relay:
+ intel_guc_log_relay_destroy(guc);
err:
/* logging will remain off */
i915_modparams.guc_log_level = -1;
@@ -521,7 +568,6 @@ int intel_guc_log_create(struct intel_guc *guc)
struct i915_vma *vma;
unsigned long offset;
u32 flags;
- u32 size;
int ret;
GEM_BUG_ON(guc->log.vma);
@@ -529,13 +575,7 @@ int intel_guc_log_create(struct intel_guc *guc)
if (i915_modparams.guc_log_level > GUC_LOG_VERBOSITY_MAX)
i915_modparams.guc_log_level = GUC_LOG_VERBOSITY_MAX;
- /* The first page is to save log buffer state. Allocate one
- * extra page for others in case for overlap */
- size = (1 + GUC_LOG_DPC_PAGES + 1 +
- GUC_LOG_ISR_PAGES + 1 +
- GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
-
- vma = intel_guc_allocate_vma(guc, size);
+ vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@@ -568,7 +608,6 @@ void intel_guc_log_destroy(struct intel_guc *guc)
int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
{
struct intel_guc *guc = &dev_priv->guc;
-
union guc_log_control log_param;
int ret;
@@ -582,7 +621,14 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
if (!log_param.logging_enabled && (i915_modparams.guc_log_level < 0))
return 0;
+ ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
+ if (ret)
+ return ret;
+ intel_runtime_pm_get(dev_priv);
ret = guc_log_control(guc, log_param.value);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
if (ret < 0) {
DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
return ret;
@@ -602,14 +648,22 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
}
/* GuC logging is currently the only user of Guc2Host interrupts */
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
gen9_enable_guc_interrupts(dev_priv);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
} else {
/* Once logging is disabled, GuC won't generate logs & send an
* interrupt. But there could be some data in the log buffer
* which is yet to be captured. So request GuC to update the log
* buffer state and then collect the left over logs.
*/
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
guc_flush_logs(guc);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
/* As logging is disabled, update log level to reflect that */
i915_modparams.guc_log_level = -1;
@@ -624,19 +678,21 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
(i915_modparams.guc_log_level < 0))
return;
- mutex_lock(&dev_priv->drm.struct_mutex);
guc_log_late_setup(&dev_priv->guc);
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
{
+ struct intel_guc *guc = &dev_priv->guc;
+
if (!USES_GUC_SUBMISSION(dev_priv))
return;
mutex_lock(&dev_priv->drm.struct_mutex);
/* GuC logging is currently the only user of Guc2Host interrupts */
gen9_disable_guc_interrupts(dev_priv);
- intel_guc_log_runtime_destroy(&dev_priv->guc);
+ intel_guc_log_runtime_destroy(guc);
mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ intel_guc_log_relay_destroy(guc);
}
diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h
index 345447a..a1b629d 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/intel_guc_log.h
@@ -32,6 +32,11 @@
struct drm_i915_private;
struct intel_guc;
+/* The first page is to save log buffer state. Allocate one
+ * extra page for others in case for overlap */
+#define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \
+ 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT)
+
struct intel_guc_log {
u32 flags;
struct i915_vma *vma;
@@ -54,6 +59,8 @@ struct intel_guc_log {
void intel_guc_log_destroy(struct intel_guc *guc);
int intel_guc_log_runtime_create(struct intel_guc *guc);
void intel_guc_log_runtime_destroy(struct intel_guc *guc);
+int intel_guc_log_relay_create(struct intel_guc *guc);
+void intel_guc_log_relay_destroy(struct intel_guc *guc);
int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
void i915_guc_log_register(struct drm_i915_private *dev_priv);
void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index d82ca0f..876650c 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -212,6 +212,22 @@ void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
intel_guc_fini_wq(&dev_priv->guc);
}
+int intel_uc_init_misc(struct drm_i915_private *dev_priv)
+{
+ if (!USES_GUC(dev_priv))
+ return 0;
+
+ return intel_guc_log_relay_create(&dev_priv->guc);
+}
+
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
+{
+ if (!USES_GUC(dev_priv))
+ return;
+
+ intel_guc_log_relay_destroy(&dev_priv->guc);
+}
+
int intel_uc_init(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 8a72497..8a63544 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -35,6 +35,8 @@
void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
int intel_uc_init_wq(struct drm_i915_private *dev_priv);
void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
+int intel_uc_init_misc(struct drm_i915_private *dev_priv);
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
int intel_uc_init(struct drm_i915_private *dev_priv);
--
1.9.1
More information about the Intel-gfx-trybot
mailing list