[Intel-gfx] [PATCH 1/4] drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex
Sagar Arun Kamble
sagar.a.kamble at intel.com
Mon Jan 22 08:26:01 UTC 2018
This patch fixes lockdep issue due to circular locking dependency of
struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
For GuC log relay channel we create debugfs file that requires i_mutex_key
lock and we are doing that under struct_mutex. So we introduced newer
dependency as:
&dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the relay create/destroy operation from under struct_mutex.
======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
------------------------------------------------------
debugfs_test/1388 is trying to acquire lock:
(&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]
but task is already holding lock:
(&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #3 (&mm->mmap_sem){++++}:
_copy_to_user+0x1e/0x70
filldir+0x8c/0xf0
dcache_readdir+0xeb/0x160
iterate_dir+0xdc/0x140
SyS_getdents+0xa0/0x130
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #2 (&sb->s_type->i_mutex_key#3){++++}:
start_creating+0x59/0x110
__debugfs_create_file+0x2e/0xe0
relay_create_buf_file+0x62/0x80
relay_late_setup_files+0x84/0x250
guc_log_late_setup+0x4f/0x110 [i915]
i915_guc_log_register+0x32/0x40 [i915]
i915_driver_load+0x7b6/0x1720 [i915]
i915_pci_probe+0x2e/0x90 [i915]
pci_device_probe+0x9c/0x120
driver_probe_device+0x2a3/0x480
__driver_attach+0xd9/0xe0
bus_for_each_dev+0x57/0x90
bus_add_driver+0x168/0x260
driver_register+0x52/0xc0
do_one_initcall+0x39/0x150
do_init_module+0x56/0x1ef
load_module+0x231c/0x2d70
SyS_finit_module+0xa5/0xe0
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #1 (relay_channels_mutex){+.+.}:
relay_open+0x12c/0x2b0
intel_guc_log_runtime_create+0xab/0x230 [i915]
intel_guc_init+0x81/0x120 [i915]
intel_uc_init+0x29/0xa0 [i915]
i915_gem_init+0x182/0x530 [i915]
i915_driver_load+0xaa9/0x1720 [i915]
i915_pci_probe+0x2e/0x90 [i915]
pci_device_probe+0x9c/0x120
driver_probe_device+0x2a3/0x480
__driver_attach+0xd9/0xe0
bus_for_each_dev+0x57/0x90
bus_add_driver+0x168/0x260
driver_register+0x52/0xc0
do_one_initcall+0x39/0x150
do_init_module+0x56/0x1ef
load_module+0x231c/0x2d70
SyS_finit_module+0xa5/0xe0
entry_SYSCALL_64_fastpath+0x1c/0x89
-> #0 (&dev->struct_mutex){+.+.}:
__mutex_lock+0x81/0x9b0
i915_mutex_lock_interruptible+0x47/0x130 [i915]
i915_gem_fault+0x201/0x790 [i915]
__do_fault+0x15/0x70
__handle_mm_fault+0x677/0xdc0
handle_mm_fault+0x14f/0x2f0
__do_page_fault+0x2d1/0x560
page_fault+0x4c/0x60
other info that might help us debug this:
Chain exists of:
&dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&mm->mmap_sem);
lock(&sb->s_type->i_mutex_key#3);
lock(&mm->mmap_sem);
lock(&dev->struct_mutex);
*** DEADLOCK ***
1 lock held by debugfs_test/1388:
#0: (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560
stack backtrace:
CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
Call Trace:
dump_stack+0x5f/0x86
print_circular_bug.isra.18+0x1d0/0x2c0
__lock_acquire+0x14ae/0x1b60
? lock_acquire+0xaf/0x200
lock_acquire+0xaf/0x200
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
__mutex_lock+0x81/0x9b0
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
? i915_mutex_lock_interruptible+0x47/0x130 [i915]
i915_mutex_lock_interruptible+0x47/0x130 [i915]
? __pm_runtime_resume+0x4f/0x80
i915_gem_fault+0x201/0x790 [i915]
__do_fault+0x15/0x70
? _raw_spin_unlock+0x29/0x40
__handle_mm_fault+0x677/0xdc0
handle_mm_fault+0x14f/0x2f0
__do_page_fault+0x2d1/0x560
? page_fault+0x36/0x60
page_fault+0x4c/0x60
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693
Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Marta Lofstedt <marta.lofstedt at intel.com>
Cc: Michal Winiarski <michal.winiarski at intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 12 +---
drivers/gpu/drm/i915/i915_drv.c | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 4 +-
drivers/gpu/drm/i915/intel_guc.c | 6 +-
drivers/gpu/drm/i915/intel_guc_log.c | 111 ++++++++++++++++++++++++++---------
drivers/gpu/drm/i915/intel_guc_log.h | 9 +++
drivers/gpu/drm/i915/intel_uc.c | 26 ++++++--
drivers/gpu/drm/i915/intel_uc.h | 4 +-
8 files changed, 125 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 80dc679..b45be0d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data, u64 *val)
static int i915_guc_log_control_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
- int ret;
if (!HAS_GUC(dev_priv))
return -ENODEV;
@@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
if (!dev_priv->guc.log.vma)
return -EINVAL;
- ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
- if (ret)
- return ret;
-
- intel_runtime_pm_get(dev_priv);
- ret = i915_guc_log_control(dev_priv, val);
- intel_runtime_pm_put(dev_priv);
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
- return ret;
+ return i915_guc_log_control(dev_priv, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1fbe378..29048b9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -626,7 +626,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
i915_gem_contexts_fini(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
- intel_uc_fini_wq(dev_priv);
+ intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f0684c..34d0d51 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5193,7 +5193,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
- ret = intel_uc_init_wq(dev_priv);
+ ret = intel_uc_init_misc(dev_priv);
if (ret)
return ret;
@@ -5289,7 +5289,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
- intel_uc_fini_wq(dev_priv);
+ intel_uc_fini_misc(dev_priv);
if (ret != -EIO)
i915_gem_cleanup_userptr(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index ea30e7c..912c9c0 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -87,8 +87,10 @@ int intel_guc_init_wq(struct intel_guc *guc)
*/
guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
WQ_HIGHPRI | WQ_FREEZABLE);
- if (!guc->log.runtime.flush_wq)
+ if (!guc->log.runtime.flush_wq) {
+ DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
return -ENOMEM;
+ }
/*
* Even though both sending GuC action, and adding a new workitem to
@@ -109,6 +111,8 @@ int intel_guc_init_wq(struct intel_guc *guc)
WQ_HIGHPRI);
if (!guc->preempt_wq) {
destroy_workqueue(guc->log.runtime.flush_wq);
+ DRM_ERROR("Couldn't allocate workqueue for GuC "
+ "preemption\n");
return -ENOMEM;
}
}
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 2ffc966..9e5600355 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,16 +359,21 @@ static bool guc_log_has_runtime(struct intel_guc *guc)
return guc->log.runtime.buf_addr != NULL;
}
+static bool guc_log_has_relay(struct intel_guc *guc)
+{
+ return guc->log.runtime.relay_chan != NULL;
+}
+
static int guc_log_runtime_create(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
void *vaddr;
- struct rchan *guc_log_relay_chan;
- size_t n_subbufs, subbuf_size;
int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ GEM_BUG_ON(!guc_log_has_relay(guc));
+
GEM_BUG_ON(guc_log_has_runtime(guc));
ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
@@ -387,8 +392,38 @@ static int guc_log_runtime_create(struct intel_guc *guc)
guc->log.runtime.buf_addr = vaddr;
+ INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+
+ return 0;
+}
+
+static void guc_log_runtime_destroy(struct intel_guc *guc)
+{
+ /*
+ * It's possible that the runtime stuff was never allocated because
+ * GuC log was disabled at the boot time.
+ **/
+ if (!guc_log_has_runtime(guc))
+ return;
+
+ i915_gem_object_unpin_map(guc->log.vma->obj);
+ guc->log.runtime.buf_addr = NULL;
+}
+
+int intel_guc_log_relay_create(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct rchan *guc_log_relay_chan;
+ size_t n_subbufs, subbuf_size;
+ int ret;
+
+ if (!i915_modparams.guc_log_level)
+ return 0;
+
+ GEM_BUG_ON(guc_log_has_relay(guc));
+
/* Keep the size of sub buffers same as shared log buffer */
- subbuf_size = guc->log.vma->obj->base.size;
+ subbuf_size = GUC_LOG_SIZE;
/* Store up to 8 snapshots, which is large enough to buffer sufficient
* boot time logs and provides enough leeway to User, in terms of
@@ -407,33 +442,31 @@ static int guc_log_runtime_create(struct intel_guc *guc)
DRM_ERROR("Couldn't create relay chan for GuC logging\n");
ret = -ENOMEM;
- goto err_vaddr;
+ goto err;
}
GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
guc->log.runtime.relay_chan = guc_log_relay_chan;
- INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
return 0;
-err_vaddr:
- i915_gem_object_unpin_map(guc->log.vma->obj);
- guc->log.runtime.buf_addr = NULL;
+err:
+ /* logging will be off */
+ i915_modparams.guc_log_level = 0;
return ret;
}
-static void guc_log_runtime_destroy(struct intel_guc *guc)
+void intel_guc_log_relay_destroy(struct intel_guc *guc)
{
/*
- * It's possible that the runtime stuff was never allocated because
+ * It's possible that the relay was never allocated because
* GuC log was disabled at the boot time.
*/
- if (!guc_log_has_runtime(guc))
+ if (!guc_log_has_relay(guc))
return;
relay_close(guc->log.runtime.relay_chan);
- i915_gem_object_unpin_map(guc->log.vma->obj);
- guc->log.runtime.buf_addr = NULL;
+ guc->log.runtime.relay_chan = NULL;
}
static int guc_log_late_setup(struct intel_guc *guc)
@@ -441,17 +474,26 @@ static int guc_log_late_setup(struct intel_guc *guc)
struct drm_i915_private *dev_priv = guc_to_i915(guc);
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
if (!guc_log_has_runtime(guc)) {
/*
* If log was disabled at boot time, then setup needed to handle
* log buffer flush interrupts would not have been done yet, so
* do that now.
*/
- ret = guc_log_runtime_create(guc);
+ GEM_BUG_ON(guc_log_has_relay(guc));
+
+ ret = intel_guc_log_relay_create(guc);
if (ret)
goto err;
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
+ ret = guc_log_runtime_create(guc);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ if (ret)
+ goto err_relay;
}
ret = guc_log_relay_file_create(guc);
@@ -461,7 +503,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
return 0;
err_runtime:
+ mutex_lock(&dev_priv->drm.struct_mutex);
guc_log_runtime_destroy(guc);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+err_relay:
+ intel_guc_log_relay_destroy(guc);
err:
/* logging will remain off */
i915_modparams.guc_log_level = 0;
@@ -509,17 +555,10 @@ int intel_guc_log_create(struct intel_guc *guc)
struct i915_vma *vma;
unsigned long offset;
u32 flags;
- u32 size;
int ret;
GEM_BUG_ON(guc->log.vma);
- /* The first page is to save log buffer state. Allocate one
- * extra page for others in case for overlap */
- size = (1 + GUC_LOG_DPC_PAGES + 1 +
- GUC_LOG_ISR_PAGES + 1 +
- GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
-
/* We require SSE 4.1 for fast reads from the GuC log buffer and
* it should be present on the chipsets supporting GuC based
* submisssions.
@@ -529,7 +568,7 @@ int intel_guc_log_create(struct intel_guc *guc)
goto err;
}
- vma = intel_guc_allocate_vma(guc, size);
+ vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@@ -584,7 +623,15 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
return 0;
verbosity = enable_logging ? control_val - 1 : 0;
+
+ ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
+ if (ret)
+ return ret;
+ intel_runtime_pm_get(dev_priv);
ret = guc_log_control(guc, enable_logging, verbosity);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
if (ret < 0) {
DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
return ret;
@@ -605,7 +652,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
}
/* GuC logging is currently the only user of Guc2Host interrupts */
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
gen9_enable_guc_interrupts(dev_priv);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
} else {
/*
* Once logging is disabled, GuC won't generate logs & send an
@@ -613,7 +664,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
* which is yet to be captured. So request GuC to update the log
* buffer state and then collect the left over logs.
*/
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(dev_priv);
guc_flush_logs(guc);
+ intel_runtime_pm_put(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
/* As logging is disabled, update log level to reflect that */
i915_modparams.guc_log_level = 0;
@@ -627,19 +682,21 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
return;
- mutex_lock(&dev_priv->drm.struct_mutex);
guc_log_late_setup(&dev_priv->guc);
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
{
+ struct intel_guc *guc = &dev_priv->guc;
+
if (!USES_GUC_SUBMISSION(dev_priv))
return;
mutex_lock(&dev_priv->drm.struct_mutex);
/* GuC logging is currently the only user of Guc2Host interrupts */
gen9_disable_guc_interrupts(dev_priv);
- guc_log_runtime_destroy(&dev_priv->guc);
+ guc_log_runtime_destroy(guc);
mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ intel_guc_log_relay_destroy(guc);
}
diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h
index f512cf7..84915a8 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/intel_guc_log.h
@@ -32,6 +32,13 @@
struct drm_i915_private;
struct intel_guc;
+/*
+ * The first page is to save log buffer state. Allocate one
+ * extra page for others in case for overlap
+ */
+#define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \
+ 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT)
+
struct intel_guc_log {
u32 flags;
struct i915_vma *vma;
@@ -52,6 +59,8 @@ struct intel_guc_log {
int intel_guc_log_create(struct intel_guc *guc);
void intel_guc_log_destroy(struct intel_guc *guc);
+int intel_guc_log_relay_create(struct intel_guc *guc);
+void intel_guc_log_relay_destroy(struct intel_guc *guc);
int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
void i915_guc_log_register(struct drm_i915_private *dev_priv);
void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index f78a17b..e3f3509 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -236,28 +236,44 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop;
}
-int intel_uc_init_wq(struct drm_i915_private *dev_priv)
+int intel_uc_init_misc(struct drm_i915_private *dev_priv)
{
+ struct intel_guc *guc = &dev_priv->guc;
int ret;
if (!USES_GUC(dev_priv))
return 0;
- ret = intel_guc_init_wq(&dev_priv->guc);
+ ret = intel_guc_init_wq(guc);
if (ret) {
DRM_ERROR("Couldn't allocate workqueues for GuC\n");
- return ret;
+ goto err;
+ }
+
+ ret = intel_guc_log_relay_create(guc);
+ if (ret) {
+ DRM_ERROR("Couldn't allocate relay for GuC log\n");
+ goto err_relay;
}
return 0;
+
+err_relay:
+ intel_guc_fini_wq(guc);
+err:
+ return ret;
}
-void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
{
+ struct intel_guc *guc = &dev_priv->guc;
+
if (!USES_GUC(dev_priv))
return;
- intel_guc_fini_wq(&dev_priv->guc);
+ intel_guc_fini_wq(guc);
+
+ intel_guc_log_relay_destroy(guc);
}
int intel_uc_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 8a72497..f2984e0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -33,8 +33,8 @@
void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
void intel_uc_init_fw(struct drm_i915_private *dev_priv);
void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
-int intel_uc_init_wq(struct drm_i915_private *dev_priv);
-void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
+int intel_uc_init_misc(struct drm_i915_private *dev_priv);
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
int intel_uc_init(struct drm_i915_private *dev_priv);
--
1.9.1
More information about the Intel-gfx
mailing list