[Intel-gfx] [PATCH 4/9] drm/i915: Mark up the calling context for intel_wakeref_put()

Wed Nov 20 12:46:57 UTC 2019

On 20/11/2019 09:32, Chris Wilson wrote:
> Previously, we assumed we could use mutex_trylock() within an atomic
> context, falling back to a worker if contended. However, such trickery
> is illegal inside interrupt context, and so we need to always use a
> worker under such circumstances. As we normally are in process context,
> we can typically use a plain mutex, and only defer to a work when we
> know we are caller from an interrupt path.
> 
> Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref")
> References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts")
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111626
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c    |  3 +-
>   drivers/gpu/drm/i915/gt/intel_engine_pm.h    | 10 +++++++
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c        |  1 -
>   drivers/gpu/drm/i915/gt/intel_gt_pm.h        |  5 ++++
>   drivers/gpu/drm/i915/gt/intel_lrc.c          |  2 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c        |  2 +-
>   drivers/gpu/drm/i915/gt/selftest_engine_pm.c |  7 +++--
>   drivers/gpu/drm/i915/i915_active.c           |  5 ++--
>   drivers/gpu/drm/i915/i915_pmu.c              |  6 ++--
>   drivers/gpu/drm/i915/intel_wakeref.c         |  8 +++---
>   drivers/gpu/drm/i915/intel_wakeref.h         | 30 ++++++++++++++------
>   11 files changed, 54 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 1f517357a268..f3035b3ab9fa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -210,7 +210,8 @@ static int __engine_park(struct intel_wakeref *wf)
>   
>   	engine->execlists.no_priolist = false;
>   
> -	intel_gt_pm_put(engine->gt);
> +	/* While we call i915_vma_parked, we have to break the lock cycle */
> +	intel_gt_pm_put_async(engine->gt);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> index 739c50fefcef..24e20344dc22 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
>   	intel_wakeref_put(&engine->wakeref);
>   }
>   
> +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_put_async(&engine->wakeref);
> +}
> +
> +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_unlock_wait(&engine->wakeref);
> +}
> +
>   void intel_engine_init__pm(struct intel_engine_cs *engine);
>   
>   #endif /* INTEL_ENGINE_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 470fbdc30e5a..f6b5169d623f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
>   static const struct intel_wakeref_ops wf_ops = {
>   	.get = __gt_unpark,
>   	.put = __gt_park,
> -	.flags = INTEL_WAKEREF_PUT_ASYNC,
>   };
>   
>   void intel_gt_pm_init_early(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index b3e17399be9b..990efc27a4e4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
>   	intel_wakeref_put(&gt->wakeref);
>   }
>   
> +static inline void intel_gt_pm_put_async(struct intel_gt *gt)
> +{
> +	intel_wakeref_put_async(&gt->wakeref);
> +}
> +
>   static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
>   {
>   	return intel_wakeref_wait_for_idle(&gt->wakeref);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 33ce258d484f..b65bc06855b0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1172,7 +1172,7 @@ __execlists_schedule_out(struct i915_request *rq,
>   
>   	intel_engine_context_out(engine);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> -	intel_gt_pm_put(engine->gt);
> +	intel_gt_pm_put_async(engine->gt);
>   
>   	/*
>   	 * If this is part of a virtual engine, its next request may
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index b7007cd78c6f..0388f9375366 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -1125,7 +1125,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
>   out:
>   	intel_engine_cancel_stop_cs(engine);
>   	reset_finish_engine(engine);
> -	intel_engine_pm_put(engine);
> +	intel_engine_pm_put_async(engine);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> index 20b9c83f43ad..cbf6b0735272 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
>   				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
>   				       engine->name, p->name);
>   			else
> -				intel_engine_pm_put(engine);
> -			intel_engine_pm_put(engine);
> +				intel_engine_pm_put_async(engine);
> +			intel_engine_pm_put_async(engine);
>   			p->critical_section_end();
>   
> -			/* engine wakeref is sync (instant) */
> +			intel_engine_pm_flush(engine);
> +
>   			if (intel_engine_pm_is_awake(engine)) {
>   				pr_err("%s is still awake after flushing pm\n",
>   				       engine->name);
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 5448f37c8102..dca15ace88f6 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   	 * populated by i915_request_add_active_barriers() to point to the
>   	 * request that will eventually release them.
>   	 */
> -	spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
>   	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
>   		struct active_node *node = barrier_from_ll(pos);
>   		struct intel_engine_cs *engine = barrier_to_engine(node);
>   		struct rb_node **p, *parent;
>   
> +		spin_lock_irqsave_nested(&ref->tree_lock, flags,
> +					 SINGLE_DEPTH_NESTING);
>   		parent = NULL;
>   		p = &ref->tree.rb_node;
>   		while (*p) {
> @@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   		}
>   		rb_link_node(&node->node, parent, p);
>   		rb_insert_color(&node->node, &ref->tree);
> +		spin_unlock_irqrestore(&ref->tree_lock, flags);
>   
>   		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
>   		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
>   		intel_engine_pm_put(engine);
>   	}
> -	spin_unlock_irqrestore(&ref->tree_lock, flags);
>   }
>   
>   void i915_request_add_active_barriers(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 9b02be0ad4e6..95e824a78d4d 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
>   	val = 0;
>   	if (intel_gt_pm_get_if_awake(gt)) {
>   		val = __get_rc6(gt);
> -		intel_gt_pm_put(gt);
> +		intel_gt_pm_put_async(gt);
>   	}
>   
>   	spin_lock_irqsave(&pmu->lock, flags);
> @@ -360,7 +360,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   skip:
>   		if (unlikely(mmio_lock))
>   			spin_unlock_irqrestore(mmio_lock, flags);
> -		intel_engine_pm_put(engine);
> +		intel_engine_pm_put_async(engine);
>   	}
>   }
>   
> @@ -398,7 +398,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>   			if (stat)
>   				val = intel_get_cagf(rps, stat);
>   
> -			intel_gt_pm_put(gt);
> +			intel_gt_pm_put_async(gt);
>   		}
>   
>   		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> index ad26d7f4ca3d..59aa1b6f1827 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
>   
>   static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
>   {
> -	if (!atomic_dec_and_test(&wf->count))
> +	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
> +	if (unlikely(!atomic_dec_and_test(&wf->count)))
>   		goto unlock;
>   
>   	/* ops->put() must reschedule its own release on error/deferral */
> @@ -67,13 +68,12 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
>   	mutex_unlock(&wf->mutex);
>   }
>   
> -void __intel_wakeref_put_last(struct intel_wakeref *wf)
> +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
>   {
>   	INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
>   
>   	/* Assume we are not in process context and so cannot sleep. */
> -	if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
> -	    !mutex_trylock(&wf->mutex)) {
> +	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
>   		schedule_work(&wf->work);
>   		return;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> index affe4de3746b..da6e8fd506e6 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -9,6 +9,7 @@
>   
>   #include <linux/atomic.h>
>   #include <linux/bits.h>
> +#include <linux/lockdep.h>
>   #include <linux/mutex.h>
>   #include <linux/refcount.h>
>   #include <linux/stackdepot.h>
> @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
>   struct intel_wakeref_ops {
>   	int (*get)(struct intel_wakeref *wf);
>   	int (*put)(struct intel_wakeref *wf);
> -
> -	unsigned long flags;
> -#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
>   };
>   
>   struct intel_wakeref {
> @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
>   } while (0)
>   
>   int __intel_wakeref_get_first(struct intel_wakeref *wf);
> -void __intel_wakeref_put_last(struct intel_wakeref *wf);
> +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
>   
>   /**
>    * intel_wakeref_get: Acquire the wakeref
> @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
>   }
>   
>   /**
> - * intel_wakeref_put: Release the wakeref
> - * @i915: the drm_i915_private device
> + * intel_wakeref_put_flags: Release the wakeref
>    * @wf: the wakeref
> - * @fn: callback for releasing the wakeref, called only on final release.
> + * @flags: control flags
>    *
>    * Release our hold on the wakeref. When there are no more users,
>    * the runtime pm wakeref will be released after the @fn callback is called
> @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
>    * code otherwise.
>    */
>   static inline void
> -intel_wakeref_put(struct intel_wakeref *wf)
> +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
> +#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
>   {
>   	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
>   	if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
> -		__intel_wakeref_put_last(wf);
> +		__intel_wakeref_put_last(wf, flags);
> +}
> +
> +static inline void
> +intel_wakeref_put(struct intel_wakeref *wf)
> +{
> +	might_sleep();
> +	__intel_wakeref_put(wf, 0);
> +}
> +
> +static inline void
> +intel_wakeref_put_async(struct intel_wakeref *wf)
> +{
> +	__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
>   }
>   
>   /**
> @@ -185,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
>   static inline void
>   __intel_wakeref_defer_park(struct intel_wakeref *wf)
>   {
> +	lockdep_assert_held(&wf->mutex);
>   	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
>   	atomic_set_release(&wf->count, 1);
>   }
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko