[Intel-gfx] [topic/core-for-CI] Revert "iommu/dma: Fix race condition during iova_domain initialization"
Karolina Drobnik
karolina.drobnik at intel.com
Wed Sep 14 12:40:45 UTC 2022
This reverts commit ac9a5d522bb80be50ea84965699e1c8257d745ce.
This change introduces a regression on Alder Lake that completely
blocks testing. To enable CI and avoid possible circular locking
warning, revert the patch.
kernel log:
======================================================
WARNING: possible circular locking dependency detected
6.0.0-rc5-CI_DRM_12132-g6c93e979e542+ #1 Not tainted
------------------------------------------------------
cpuhp/0/15 is trying to acquire lock:
ffff8881013df278 (&(&priv->bus_notifier)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x20/0x50
but task is already holding lock:
ffffffff826490c0 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x48/0x1f0
which lock already depends on the new loc
the existing dependency chain (in reverse order) is:
-> #3 (cpuhp_state-up){+.+.}-{0:0}:
lock_acquire+0xd3/0x310
cpuhp_thread_fun+0xa6/0x1f0
smpboot_thread_fn+0x1b5/0x260
kthread+0xed/0x120
ret_from_fork+0x1f/0x30
-> #2 (cpu_hotplug_lock){++++}-{0:0}:
lock_acquire+0xd3/0x310
__cpuhp_state_add_instance+0x43/0x1c0
iova_domain_init_rcaches+0x199/0x1c0
iommu_setup_dma_ops+0x130/0x440
bus_iommu_probe+0x26a/0x2d0
bus_set_iommu+0x82/0xd0
intel_iommu_init+0xe33/0x1039
pci_iommu_init+0x9/0x31
do_one_initcall+0x53/0x2f0
kernel_init_freeable+0x18f/0x1e1
kernel_init+0x11/0x120
ret_from_fork+0x1f/0x30
-> #1 (&domain->iova_cookie->mutex){+.+.}-{3:3}:
lock_acquire+0xd3/0x310
__mutex_lock+0x97/0xf10
iommu_setup_dma_ops+0xd7/0x440
iommu_probe_device+0xa4/0x180
iommu_bus_notifier+0x2d/0x40
notifier_call_chain+0x31/0x90
blocking_notifier_call_chain+0x3a/0x50
device_add+0x3c1/0x900
pci_device_add+0x255/0x580
pci_scan_single_device+0xa6/0xd0
pci_scan_slot+0x7a/0x1b0
pci_scan_child_bus_extend+0x35/0x2a0
vmd_probe+0x5cd/0x970
pci_device_probe+0x95/0x110
really_probe+0xd6/0x350
__driver_probe_device+0x73/0x170
driver_probe_device+0x1a/0x90
__driver_attach+0xbc/0x190
bus_for_each_dev+0x72/0xc0
bus_add_driver+0x1bb/0x210
driver_register+0x66/0xc0
do_one_initcall+0x53/0x2f0
kernel_init_freeable+0x18f/0x1e1
kernel_init+0x11/0x120
ret_from_fork+0x1f/0x30
-> #0 (&(&priv->bus_notifier)->rwsem){++++}-{3:3}:
validate_chain+0xb3f/0x2000
__lock_acquire+0x5a4/0xb70
lock_acquire+0xd3/0x310
down_read+0x39/0x140
blocking_notifier_call_chain+0x20/0x50
device_add+0x3c1/0x900
platform_device_add+0x108/0x240
coretemp_cpu_online+0xe1/0x15e [coretemp]
cpuhp_invoke_callback+0x181/0x8a0
cpuhp_thread_fun+0x188/0x1f0
smpboot_thread_fn+0x1b5/0x260
kthread+0xed/0x120
ret_from_fork+0x1f/0x30
other info that might help us debug thi
Chain exists of &(&priv->bus_notifier)->rwsem --> cpu_hotplug_lock --> cpuhp_state-
Possible unsafe locking scenari
CPU0 CPU1
---- ----
lock(cpuhp_state-up);
lock(cpu_hotplug_lock);
lock(cpuhp_state-up);
lock(&(&priv->bus_notifier)->rwsem);
*** DEADLOCK *
2 locks held by cpuhp/0/15:
#0: ffffffff82648f10 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x48/0x1f0
#1: ffffffff826490c0 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x48/0x1f0
stack backtrace:
CPU: 0 PID: 15 Comm: cpuhp/0 Not tainted 6.0.0-rc5-CI_DRM_12132-g6c93e979e542+ #1
Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR4 RVP, BIOS ADLPFWI1.R00.3135.A00.2203251419 03/25/2022
Call Trace:
<TASK>
dump_stack_lvl+0x56/0x7f
check_noncircular+0x132/0x150
validate_chain+0xb3f/0x2000
__lock_acquire+0x5a4/0xb70
lock_acquire+0xd3/0x310
? blocking_notifier_call_chain+0x20/0x50
down_read+0x39/0x140
? blocking_notifier_call_chain+0x20/0x50
blocking_notifier_call_chain+0x20/0x50
device_add+0x3c1/0x900
? dev_set_name+0x4e/0x70
platform_device_add+0x108/0x240
coretemp_cpu_online+0xe1/0x15e [coretemp]
? create_core_data+0x550/0x550 [coretemp]
cpuhp_invoke_callback+0x181/0x8a0
cpuhp_thread_fun+0x188/0x1f0
? smpboot_thread_fn+0x1e/0x260
smpboot_thread_fn+0x1b5/0x260
? sort_range+0x20/0x20
kthread+0xed/0x120
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
</TASK>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6641
Signed-off-by: Karolina Drobnik <karolina.drobnik at intel.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
---
drivers/iommu/dma-iommu.c | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 17dd683b2fce..9616b473e4c7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -65,7 +65,6 @@ struct iommu_dma_cookie {
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
- struct mutex mutex;
};
static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
@@ -312,7 +311,6 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
if (!domain->iova_cookie)
return -ENOMEM;
- mutex_init(&domain->iova_cookie->mutex);
return 0;
}
@@ -563,33 +561,26 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
}
/* start_pfn is always nonzero for an already-initialised domain */
- mutex_lock(&cookie->mutex);
if (iovad->start_pfn) {
if (1UL << order != iovad->granule ||
base_pfn != iovad->start_pfn) {
pr_warn("Incompatible range for DMA domain\n");
- ret = -EFAULT;
- goto done_unlock;
+ return -EFAULT;
}
- ret = 0;
- goto done_unlock;
+ return 0;
}
init_iova_domain(iovad, 1UL << order, base_pfn);
ret = iova_domain_init_rcaches(iovad);
if (ret)
- goto done_unlock;
+ return ret;
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
domain->type = IOMMU_DOMAIN_DMA;
- ret = iova_reserve_iommu_regions(dev, domain);
-
-done_unlock:
- mutex_unlock(&cookie->mutex);
- return ret;
+ return iova_reserve_iommu_regions(dev, domain);
}
/**
--
2.25.1
More information about the Intel-gfx
mailing list