[PATCH] locking/mutex: Add p->blocked_on wrappers for correctness checks

Mon Aug 11 14:39:13 UTC 2025

Hi Valentin,

On Sat, 2025-07-12 at 03:33 +0000, Valentin Schneider wrote:
> This lets us assert mutex::wait_lock is held whenever we access
> p->blocked_on, as well as warn us for unexpected state changes.
> 
> [fix conflicts, call in more places]
> [jstultz: tweaked commit subject, reworked a good bit]
> Signed-off-by: Valentin Schneider <valentin.schneider at arm.com>
> Signed-off-by: Connor O'Brien <connoro at google.com>
> Signed-off-by: John Stultz <jstultz at google.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
> Tested-by: K Prateek Nayak <kprateek.nayak at amd.com>
> Link:
> https://lkml.kernel.org/r/20250712033407.2383110-4-jstultz@google.com
> ---
>  include/linux/sched.h        | 64
> ++++++++++++++++++++++++++++++++++--
>  kernel/locking/mutex-debug.c |  4 +--
>  kernel/locking/mutex.c       | 32 ++++++++----------
>  kernel/locking/ww_mutex.h    |  8 ++---
>  4 files changed, 81 insertions(+), 27 deletions(-)
> 

Our CI is hitting a lot of these:

https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5826

>From looking at the code, the wound-wait code after wounding the mutex
owner, wakes it up opportunistically, so that *if* it is waiting on
another wound-wait mutex, should re-evaluate whether it should
rollback. Ideally in this case, the blocked-on status should be removed
by the process that was woken up?

Otherwise if the wound-wait code misbehaves in doing this we need to
fix it.

Thanks,
Thomas

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 33ad240ec900..5b4e1cd52e27 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -34,6 +34,7 @@
>  #include <linux/sched/prio.h>
>  #include <linux/sched/types.h>
>  #include <linux/signal_types.h>
> +#include <linux/spinlock.h>
>  #include <linux/syscall_user_dispatch_types.h>
>  #include <linux/mm_types_task.h>
>  #include <linux/netdevice_xmit.h>
> @@ -2129,6 +2130,67 @@ extern int
> __cond_resched_rwlock_write(rwlock_t *lock);
>   __cond_resched_rwlock_write(lock); \
>  })
>  
> +#ifndef CONFIG_PREEMPT_RT
> +static inline struct mutex *__get_task_blocked_on(struct task_struct
> *p)
> +{
> + struct mutex *m = p->blocked_on;
> +
> + if (m)
> + lockdep_assert_held_once(&m->wait_lock);
> + return m;
> +}
> +
> +static inline void __set_task_blocked_on(struct task_struct *p,
> struct mutex *m)
> +{
> + WARN_ON_ONCE(!m);
> + /* The task should only be setting itself as blocked */
> + WARN_ON_ONCE(p != current);
> + /* Currently we serialize blocked_on under the mutex::wait_lock */
> + lockdep_assert_held_once(&m->wait_lock);
> + /*
> + * Check ensure we don't overwrite existing mutex value
> + * with a different mutex. Note, setting it to the same
> + * lock repeatedly is ok.
> + */
> + WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
> + p->blocked_on = m;
> +}
> +
> +static inline void set_task_blocked_on(struct task_struct *p, struct
> mutex *m)
> +{
> + guard(raw_spinlock_irqsave)(&m->wait_lock);
> + __set_task_blocked_on(p, m);
> +}
> +
> +static inline void __clear_task_blocked_on(struct task_struct *p,
> struct mutex *m)
> +{
> + WARN_ON_ONCE(!m);
> + /* Currently we serialize blocked_on under the mutex::wait_lock */
> + lockdep_assert_held_once(&m->wait_lock);
> + /*
> + * There may be cases where we re-clear already cleared
> + * blocked_on relationships, but make sure we are not
> + * clearing the relationship with a different lock.
> + */
> + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
> + p->blocked_on = NULL;
> +}
> +
> +static inline void clear_task_blocked_on(struct task_struct *p,
> struct mutex *m)
> +{
> + guard(raw_spinlock_irqsave)(&m->wait_lock);
> + __clear_task_blocked_on(p, m);
> +}
> +#else
> +static inline void __clear_task_blocked_on(struct task_struct *p,
> struct rt_mutex *m)
> +{
> +}
> +
> +static inline void clear_task_blocked_on(struct task_struct *p,
> struct rt_mutex *m)
> +{
> +}
> +#endif /* !CONFIG_PREEMPT_RT */
> +
>  static __always_inline bool need_resched(void)
>  {
>   return unlikely(tif_need_resched());
> @@ -2168,8 +2230,6 @@ extern bool sched_task_on_rq(struct task_struct
> *p);
>  extern unsigned long get_wchan(struct task_struct *p);
>  extern struct task_struct *cpu_curr_snapshot(int cpu);
>  
> -#include <linux/spinlock.h>
> -
>  /*
>   * In order to reduce various lock holder preemption latencies
> provide an
>   * interface to see if a vCPU is currently running or not.
> diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-
> debug.c
> index 758b7a6792b0..949103fd8e9b 100644
> --- a/kernel/locking/mutex-debug.c
> +++ b/kernel/locking/mutex-debug.c
> @@ -54,13 +54,13 @@ void debug_mutex_add_waiter(struct mutex *lock,
> struct mutex_waiter *waiter,
>   lockdep_assert_held(&lock->wait_lock);
>  
>   /* Current thread can't be already blocked (since it's executing!)
> */
> - DEBUG_LOCKS_WARN_ON(task->blocked_on);
> + DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task));
>  }
>  
>  void debug_mutex_remove_waiter(struct mutex *lock, struct
> mutex_waiter *waiter,
>   struct task_struct *task)
>  {
> - struct mutex *blocked_on = READ_ONCE(task->blocked_on);
> + struct mutex *blocked_on = __get_task_blocked_on(task);
>  
>   DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
>   DEBUG_LOCKS_WARN_ON(waiter->task != task);
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index e2f59863a866..80d778fedd60 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -644,8 +644,7 @@ __mutex_lock_common(struct mutex *lock, unsigned
> int state, unsigned int subclas
>   goto err_early_kill;
>   }
>  
> - WARN_ON(current->blocked_on);
> - current->blocked_on = lock;
> + __set_task_blocked_on(current, lock);
>   set_current_state(state);
>   trace_contention_begin(lock, LCB_F_MUTEX);
>   for (;;) {
> @@ -685,9 +684,9 @@ __mutex_lock_common(struct mutex *lock, unsigned
> int state, unsigned int subclas
>   /*
>   * As we likely have been woken up by task
>   * that has cleared our blocked_on state, re-set
> - * it to the lock we are trying to aquire.
> + * it to the lock we are trying to acquire.
>   */
> - current->blocked_on = lock;
> + set_task_blocked_on(current, lock);
>   set_current_state(state);
>   /*
>   * Here we order against unlock; we must either see it change
> @@ -699,11 +698,15 @@ __mutex_lock_common(struct mutex *lock,
> unsigned int state, unsigned int subclas
>  
>   if (first) {
>   trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
> - /* clear blocked_on as mutex_optimistic_spin may schedule() */
> - current->blocked_on = NULL;
> + /*
> + * mutex_optimistic_spin() can call schedule(), so
> + * clear blocked on so we don't become unselectable
> + * to run.
> + */
> + clear_task_blocked_on(current, lock);
>   if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
>   break;
> - current->blocked_on = lock;
> + set_task_blocked_on(current, lock);
>   trace_contention_begin(lock, LCB_F_MUTEX);
>   }
>  
> @@ -711,7 +714,7 @@ __mutex_lock_common(struct mutex *lock, unsigned
> int state, unsigned int subclas
>   }
>   raw_spin_lock_irqsave(&lock->wait_lock, flags);
>  acquired:
> - current->blocked_on = NULL;
> + __clear_task_blocked_on(current, lock);
>   __set_current_state(TASK_RUNNING);
>  
>   if (ww_ctx) {
> @@ -741,11 +744,11 @@ __mutex_lock_common(struct mutex *lock,
> unsigned int state, unsigned int subclas
>   return 0;
>  
>  err:
> - current->blocked_on = NULL;
> + __clear_task_blocked_on(current, lock);
>   __set_current_state(TASK_RUNNING);
>   __mutex_remove_waiter(lock, &waiter);
>  err_early_kill:
> - WARN_ON(current->blocked_on);
> + WARN_ON(__get_task_blocked_on(current));
>   trace_contention_end(lock, ret);
>   raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
>   debug_mutex_free_waiter(&waiter);
> @@ -956,14 +959,7 @@ static noinline void __sched
> __mutex_unlock_slowpath(struct mutex *lock, unsigne
>   next = waiter->task;
>  
>   debug_mutex_wake_waiter(lock, waiter);
> - /*
> - * Unlock wakeups can be happening in parallel
> - * (when optimistic spinners steal and release
> - * the lock), so blocked_on may already be
> - * cleared here.
> - */
> - WARN_ON(next->blocked_on && next->blocked_on != lock);
> - next->blocked_on = NULL;
> + __clear_task_blocked_on(next, lock);
>   wake_q_add(&wake_q, next);
>   }
>  
> diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
> index 45fe05e51db1..086fd5487ca7 100644
> --- a/kernel/locking/ww_mutex.h
> +++ b/kernel/locking/ww_mutex.h
> @@ -283,15 +283,13 @@ __ww_mutex_die(struct MUTEX *lock, struct
> MUTEX_WAITER *waiter,
>   if (waiter->ww_ctx->acquired > 0 && __ww_ctx_less(waiter->ww_ctx,
> ww_ctx)) {
>  #ifndef WW_RT
>   debug_mutex_wake_waiter(lock, waiter);
> +#endif
>   /*
>   * When waking up the task to die, be sure to clear the
>   * blocked_on pointer. Otherwise we can see circular
>   * blocked_on relationships that can't resolve.
>   */
> - WARN_ON(waiter->task->blocked_on &&
> - waiter->task->blocked_on != lock);
> -#endif
> - waiter->task->blocked_on = NULL;
> + __clear_task_blocked_on(waiter->task, lock);
>   wake_q_add(wake_q, waiter->task);
>   }
>  
> @@ -345,7 +343,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
>   * blocked_on pointer. Otherwise we can see circular
>   * blocked_on relationships that can't resolve.
>   */
> - owner->blocked_on = NULL;
> + __clear_task_blocked_on(owner, lock);
>   wake_q_add(wake_q, owner);
>   }
>   return true;