[RFC] locking/lockdep: Don't bunch up all flush_workqueue callers into a single completion cross-release context
Tvrtko Ursulin
tursulin at ursulin.net
Fri Oct 13 10:24:04 UTC 2017
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
It looks like all completions as created by flush_workqueue map
into a single lock class which creates lockdep false positives.
Move flush_workqueue to a macro so that separate lock classes
get created.
Example of a false positive:
[ 20.805315] ======================================================
[ 20.805316] WARNING: possible circular locking dependency detected
[ 20.805319] 4.14.0-rc4-CI-CI_DRM_3225+ #1 Tainted: G U
[ 20.805320] ------------------------------------------------------
[ 20.805322] kworker/6:1H/1438 is trying to acquire lock:
[ 20.805324] (&mm->mmap_sem){++++}, at: [<ffffffffa01c8e01>] __i915_gem_userptr_get_pages_worker+0x141/0x240 [i915]
[ 20.805355] but now in release context of a crosslock acquired at the following:
[ 20.805357] ((complete)&this_flusher.done){+.+.}, at: [<ffffffff8190b06d>] wait_for_completion+0x1d/0x20
[ 20.805363] which lock already depends on the new lock.
[ 20.805365] the existing dependency chain (in reverse order) is:
[ 20.805367] -> #1 ((complete)&this_flusher.done){+.+.}:
[ 20.805372] __lock_acquire+0x1420/0x15e0
[ 20.805374] lock_acquire+0xb0/0x200
[ 20.805376] wait_for_common+0x58/0x210
[ 20.805378] wait_for_completion+0x1d/0x20
[ 20.805381] flush_workqueue+0x1af/0x540
[ 20.805400] i915_gem_userptr_mn_invalidate_range_start+0x13c/0x150 [i915]
[ 20.805404] __mmu_notifier_invalidate_range_start+0x76/0xc0
[ 20.805406] unmap_vmas+0x7d/0xa0
[ 20.805408] unmap_region+0xae/0x110
[ 20.805410] do_munmap+0x276/0x3f0
[ 20.805411] vm_munmap+0x67/0x90
[ 20.805413] SyS_munmap+0xe/0x20
[ 20.805415] entry_SYSCALL_64_fastpath+0x1c/0xb1
[ 20.805416] -> #0 (&mm->mmap_sem){++++}:
[ 20.805419] down_read+0x3e/0x70
[ 20.805435] __i915_gem_userptr_get_pages_worker+0x141/0x240 [i915]
[ 20.805438] process_one_work+0x233/0x660
[ 20.805440] worker_thread+0x4e/0x3b0
[ 20.805441] kthread+0x152/0x190
[ 20.805442] other info that might help us debug this:
[ 20.805445] Possible unsafe locking scenario by crosslock:
[ 20.805447] CPU0 CPU1
[ 20.805448] ---- ----
[ 20.805449] lock(&mm->mmap_sem);
[ 20.805451] lock((complete)&this_flusher.done);
[ 20.805453] lock(&mm->mmap_sem);
[ 20.805455] unlock((complete)&this_flusher.done);
[ 20.805457] *** DEADLOCK ***
[ 20.805460] 2 locks held by kworker/6:1H/1438:
[ 20.805461] #0: (&(&pool->lock)->rlock){-.-.}, at: [<ffffffff8109c94c>] process_one_work+0x2dc/0x660
[ 20.805465] #1: (&x->wait#10){....}, at: [<ffffffff810cd69d>] complete+0x1d/0x60
[ 20.805469] stack backtrace:
[ 20.805472] CPU: 6 PID: 1438 Comm: kworker/6:1H Tainted: G U 4.14.0-rc4-CI-CI_DRM_3225+ #1
[ 20.805474] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016
[ 20.805480] Call Trace:
[ 20.805483] dump_stack+0x68/0x9f
[ 20.805486] print_circular_bug+0x235/0x3c0
[ 20.805488] ? HARDIRQ_verbose+0x10/0x10
[ 20.805490] check_prev_add+0x430/0x840
[ 20.805492] ? ret_from_fork+0x27/0x40
[ 20.805494] lock_commit_crosslock+0x3ee/0x660
[ 20.805496] ? lock_commit_crosslock+0x3ee/0x660
[ 20.805498] complete+0x29/0x60
[ 20.805500] pwq_dec_nr_in_flight+0x9c/0xa0
[ 20.805502] ? _raw_spin_lock_irq+0x40/0x50
[ 20.805504] process_one_work+0x335/0x660
[ 20.805506] worker_thread+0x4e/0x3b0
[ 20.805508] kthread+0x152/0x190
[ 20.805509] ? process_one_work+0x660/0x660
[ 20.805511] ? kthread_create_on_node+0x40/0x40
[ 20.805513] ret_from_fork+0x27/0x40
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: linux-kernel at vger.kernel.org
Cc: Tejun Heo <tj at kernel.org>
Cc: Linus Torvalds <torvalds at linux-foundation.org>
Cc: Peter Zijlstra <peterz at infradead.org>
Cc: Thomas Gleixner <tglx at linutronix.de>
Cc: boqun.feng at gmail.com
Cc: byungchul.park at lge.com
Cc: david at fromorbit.com
Cc: johannes at sipsolutions.net
Cc: oleg at redhat.com
--
Borrowed the Cc list from a recent fix in the same area.
Apologies if it is too wide.
Another pair of eyes to check my thinking here is correct
would also be good.
---
include/linux/workqueue.h | 26 +++++++++++++++++++++++++-
kernel/workqueue.c | 41 ++++++++++++++---------------------------
2 files changed, 39 insertions(+), 28 deletions(-)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1c49431f3121..ca63d4b09110 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -12,6 +12,7 @@
#include <linux/threads.h>
#include <linux/atomic.h>
#include <linux/cpumask.h>
+#include <linux/completion.h>
struct workqueue_struct;
@@ -449,7 +450,30 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay);
-extern void flush_workqueue(struct workqueue_struct *wq);
+/*
+ * Structure used to wait for workqueue flush.
+ */
+struct wq_flusher {
+ struct list_head list; /* WQ: list of flushers */
+ int flush_color; /* WQ: flush color waiting for */
+ struct completion done; /* flush completion */
+};
+
+extern void __flush_workqueue(struct workqueue_struct *wq,
+ struct wq_flusher *flusher);
+
+/* Static inline wrapper so completion gets separate lockdep keys. */
+static inline void flush_workqueue(struct workqueue_struct *wq)
+{
+ struct wq_flusher this_flusher = {
+ .list = LIST_HEAD_INIT(this_flusher.list),
+ .flush_color = -1,
+ .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
+ };
+
+ __flush_workqueue(wq, &this_flusher);
+}
+
extern void drain_workqueue(struct workqueue_struct *wq);
extern int schedule_on_each_cpu(work_func_t func);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 64d0edf428f8..c2e8f61f6b95 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -221,15 +221,6 @@ struct pool_workqueue {
struct rcu_head rcu;
} __aligned(1 << WORK_STRUCT_FLAG_BITS);
-/*
- * Structure used to wait for workqueue flush.
- */
-struct wq_flusher {
- struct list_head list; /* WQ: list of flushers */
- int flush_color; /* WQ: flush color waiting for */
- struct completion done; /* flush completion */
-};
-
struct wq_device;
/*
@@ -2606,13 +2597,9 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
* This function sleeps until all work items which were queued on entry
* have finished execution, but it is not livelocked by new incoming ones.
*/
-void flush_workqueue(struct workqueue_struct *wq)
+void __flush_workqueue(struct workqueue_struct *wq,
+ struct wq_flusher *this_flusher)
{
- struct wq_flusher this_flusher = {
- .list = LIST_HEAD_INIT(this_flusher.list),
- .flush_color = -1,
- .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
- };
int next_color;
if (WARN_ON(!wq_online))
@@ -2635,14 +2622,14 @@ void flush_workqueue(struct workqueue_struct *wq)
* by one.
*/
WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
- this_flusher.flush_color = wq->work_color;
+ this_flusher->flush_color = wq->work_color;
wq->work_color = next_color;
if (!wq->first_flusher) {
/* no flush in progress, become the first flusher */
- WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
+ WARN_ON_ONCE(wq->flush_color != this_flusher->flush_color);
- wq->first_flusher = &this_flusher;
+ wq->first_flusher = this_flusher;
if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
wq->work_color)) {
@@ -2653,8 +2640,8 @@ void flush_workqueue(struct workqueue_struct *wq)
}
} else {
/* wait in queue */
- WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
- list_add_tail(&this_flusher.list, &wq->flusher_queue);
+ WARN_ON_ONCE(wq->flush_color == this_flusher->flush_color);
+ list_add_tail(&this_flusher->list, &wq->flusher_queue);
flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
}
} else {
@@ -2663,14 +2650,14 @@ void flush_workqueue(struct workqueue_struct *wq)
* The next flush completion will assign us
* flush_color and transfer to flusher_queue.
*/
- list_add_tail(&this_flusher.list, &wq->flusher_overflow);
+ list_add_tail(&this_flusher->list, &wq->flusher_overflow);
}
check_flush_dependency(wq, NULL);
mutex_unlock(&wq->mutex);
- wait_for_completion(&this_flusher.done);
+ wait_for_completion(&this_flusher->done);
/*
* Wake-up-and-cascade phase
@@ -2678,19 +2665,19 @@ void flush_workqueue(struct workqueue_struct *wq)
* First flushers are responsible for cascading flushes and
* handling overflow. Non-first flushers can simply return.
*/
- if (wq->first_flusher != &this_flusher)
+ if (wq->first_flusher != this_flusher)
return;
mutex_lock(&wq->mutex);
/* we might have raced, check again with mutex held */
- if (wq->first_flusher != &this_flusher)
+ if (wq->first_flusher != this_flusher)
goto out_unlock;
wq->first_flusher = NULL;
- WARN_ON_ONCE(!list_empty(&this_flusher.list));
- WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
+ WARN_ON_ONCE(!list_empty(&this_flusher->list));
+ WARN_ON_ONCE(wq->flush_color != this_flusher->flush_color);
while (true) {
struct wq_flusher *next, *tmp;
@@ -2755,7 +2742,7 @@ void flush_workqueue(struct workqueue_struct *wq)
out_unlock:
mutex_unlock(&wq->mutex);
}
-EXPORT_SYMBOL(flush_workqueue);
+EXPORT_SYMBOL(__flush_workqueue);
/**
* drain_workqueue - drain a workqueue
--
2.9.5
More information about the Intel-gfx-trybot
mailing list