[Intel-gfx] [PATCH 22/43] drm/i915: Introduce intel_context.pin_mutex for pin management

Wed Mar 6 14:43:13 UTC 2019

On 06/03/2019 14:24, Chris Wilson wrote:
> Introduce a mutex to start locking the HW contexts independently of
> struct_mutex, with a view to reducing the coarse struct_mutex. The
> intel_context.pin_mutex is used to guard the transition to and from being
> pinned on the gpu, and so is required before starting to build any
> request. The intel_context will then remain pinned until the request
> completes, but the mutex can be released immediately unpin completion of
> pinning the context.
> 
> A slight variant of the above is used by per-context sseu that wants to
> inspect the pinned status of the context, and requires that it remains
> stable (either !pinned or pinned) across its operation. By using the
> pin_mutex to serialise operations while pin_count==0, we can take that
> pin_mutex for stabilise the boolean pin status.

Change log!

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c               |  2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c       | 76 +++++++------------
>   drivers/gpu/drm/i915/intel_context.c          | 56 ++++++++++++--
>   drivers/gpu/drm/i915/intel_context.h          | 38 ++++++----
>   drivers/gpu/drm/i915/intel_context_types.h    |  8 +-
>   drivers/gpu/drm/i915/intel_lrc.c              |  4 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c       |  4 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +-
>   9 files changed, 110 insertions(+), 82 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 71d8ce38f42c..8863dd414cde 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4662,7 +4662,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   		if (!state)
>   			continue;
>   
> -		GEM_BUG_ON(ce->pin_count);
> +		GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   		/*
>   		 * As we will hold a reference to the logical state, it will
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index f7a075efccad..35541f99b37d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1098,23 +1098,27 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
>   }
>   
>   static int
> -gen8_modify_rpcs_gpu(struct intel_context *ce,
> -		     struct intel_engine_cs *engine,
> -		     struct intel_sseu sseu)
> +gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   {
> -	struct drm_i915_private *i915 = engine->i915;
> +	struct drm_i915_private *i915 = ce->engine->i915;
>   	struct i915_request *rq, *prev;
>   	intel_wakeref_t wakeref;
>   	int ret;
>   
> -	GEM_BUG_ON(!ce->pin_count);
> +	lockdep_assert_held(&ce->pin_mutex);
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> +	/*
> +	 * If context is not idle we have to submit an ordered request to modify
> +	 * its context image via the kernel context. Pristine and idle contexts
> +	 * will be configured on pinning.
> +	 */
> +	if (!intel_context_is_pinned(ce))
> +		return 0;
>   
>   	/* Submitting requests etc needs the hw awake. */
>   	wakeref = intel_runtime_pm_get(i915);
>   
> -	rq = i915_request_alloc(engine, i915->kernel_context);
> +	rq = i915_request_alloc(ce->engine, i915->kernel_context);
>   	if (IS_ERR(rq)) {
>   		ret = PTR_ERR(rq);
>   		goto out_put;
> @@ -1158,53 +1162,30 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
>   }
>   
>   static int
> -__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> -				    struct intel_engine_cs *engine,
> -				    struct intel_sseu sseu)
> +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_sseu sseu)
>   {
>   	struct intel_context *ce;
>   	int ret = 0;
>   
> -	ce = intel_context_instance(ctx, engine);
> -	if (IS_ERR(ce))
> -		return PTR_ERR(ce);
> -
>   	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
>   	GEM_BUG_ON(engine->id != RCS0);
>   
> +	ce = intel_context_pin_lock(ctx, engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
>   	/* Nothing to do if unmodified. */
>   	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
> -		return 0;
> -
> -	/*
> -	 * If context is not idle we have to submit an ordered request to modify
> -	 * its context image via the kernel context. Pristine and idle contexts
> -	 * will be configured on pinning.
> -	 */
> -	if (ce->pin_count)
> -		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
> +		goto unlock;
>   
> +	ret = gen8_modify_rpcs(ce, sseu);
>   	if (!ret)
>   		ce->sseu = sseu;
>   
> -	return ret;
> -}
> -
> -static int
> -i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> -				  struct intel_engine_cs *engine,
> -				  struct intel_sseu sseu)
> -{
> -	int ret;
> -
> -	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> -
> -	mutex_unlock(&ctx->i915->drm.struct_mutex);
> -
> +unlock:
> +	intel_context_pin_unlock(ce);
>   	return ret;
>   }
>   
> @@ -1622,11 +1603,12 @@ static int clone_sseu(struct i915_gem_context *dst,
>   		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
>   			continue;
>   
> -		ce = intel_context_instance(dst, engine);
> +		ce = intel_context_pin_lock(dst, engine);
>   		if (IS_ERR(ce))
>   			return PTR_ERR(ce);
>   
>   		ce->sseu = sseu;
> +		intel_context_pin_unlock(ce);
>   	}
>   
>   	return 0;
> @@ -1806,7 +1788,6 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	struct intel_engine_cs *engine;
>   	struct intel_context *ce;
>   	unsigned long lookup;
> -	int ret;
>   
>   	if (args->size == 0)
>   		goto out;
> @@ -1833,21 +1814,16 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	if (!engine)
>   		return -EINVAL;
>   
> -	ce = intel_context_instance(ctx, engine);
> +	ce = intel_context_pin_lock(ctx, engine); /* serialise with set_sseu */
>   	if (IS_ERR(ce))
>   		return PTR_ERR(ce);
>   
> -	/* Only use for mutex here is to serialize get_param and set_param. */
> -	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	user_sseu.slice_mask = ce->sseu.slice_mask;
>   	user_sseu.subslice_mask = ce->sseu.subslice_mask;
>   	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
>   	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
>   
> -	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +	intel_context_pin_unlock(ce);
>   
>   	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
>   			 sizeof(user_sseu)))
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
> index 4c1dacb69f51..8bf0d38d3a08 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/intel_context.c
> @@ -102,7 +102,7 @@ void __intel_context_remove(struct intel_context *ce)
>   	spin_unlock(&ctx->hw_contexts_lock);
>   }
>   
> -struct intel_context *
> +static struct intel_context *
>   intel_context_instance(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine)
>   {
> @@ -126,6 +126,21 @@ intel_context_instance(struct i915_gem_context *ctx,
>   	return pos;
>   }
>   
> +struct intel_context *
> +intel_context_pin_lock(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine)
> +	__acquires(ce->pin_mutex)
> +{
> +	struct intel_context *ce;
> +
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ce;
> +
> +	mutex_lock(&ce->pin_mutex);
> +	return ce;
> +}
> +
>   struct intel_context *
>   intel_context_pin(struct i915_gem_context *ctx,
>   		  struct intel_engine_cs *engine)
> @@ -133,16 +148,20 @@ intel_context_pin(struct i915_gem_context *ctx,
>   	struct intel_context *ce;
>   	int err;
>   
> -	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> -
>   	ce = intel_context_instance(ctx, engine);
>   	if (IS_ERR(ce))
>   		return ce;
>   
> -	if (unlikely(!ce->pin_count++)) {
> +	if (likely(atomic_inc_not_zero(&ce->pin_count)))
> +		return ce;
> +
> +	if (mutex_lock_interruptible(&ce->pin_mutex))
> +		return ERR_PTR(-EINTR);
> +
> +	if (likely(!atomic_read(&ce->pin_count))) {
>   		err = ce->ops->pin(ce);
>   		if (err)
> -			goto err_unpin;
> +			goto err;
>   
>   		mutex_lock(&ctx->mutex);
>   		list_add(&ce->active_link, &ctx->active_engines);
> @@ -150,16 +169,35 @@ intel_context_pin(struct i915_gem_context *ctx,
>   
>   		i915_gem_context_get(ctx);
>   		GEM_BUG_ON(ce->gem_context != ctx);
> +
> +		smp_mb__before_atomic();
>   	}
> -	GEM_BUG_ON(!ce->pin_count); /* no overflow! */
>   
> +	atomic_inc(&ce->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
> +
> +	mutex_unlock(&ce->pin_mutex);
>   	return ce;
>   
> -err_unpin:
> -	ce->pin_count = 0;
> +err:
> +	mutex_unlock(&ce->pin_mutex);
>   	return ERR_PTR(err);
>   }
>   
> +void intel_context_unpin(struct intel_context *ce)
> +{
> +	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
> +		return;
> +
> +	/* We may be called from inside intel_context_pin() to evict another */
> +	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
> +
> +	if (likely(atomic_dec_and_test(&ce->pin_count)))
> +		ce->ops->unpin(ce);
> +
> +	mutex_unlock(&ce->pin_mutex);
> +}
> +
>   static void intel_context_retire(struct i915_active_request *active,
>   				 struct i915_request *rq)
>   {
> @@ -181,6 +219,8 @@ intel_context_init(struct intel_context *ce,
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
>   
> +	mutex_init(&ce->pin_mutex);
> +
>   	/* Use the whole device by default */
>   	ce->sseu = intel_device_default_sseu(ctx->i915);
>   
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
> index ea21473891f7..9546d932406a 100644
> --- a/drivers/gpu/drm/i915/intel_context.h
> +++ b/drivers/gpu/drm/i915/intel_context.h
> @@ -7,6 +7,8 @@
>   #ifndef __INTEL_CONTEXT_H__
>   #define __INTEL_CONTEXT_H__
>   
> +#include <linux/lockdep.h>
> +
>   #include "intel_context_types.h"
>   #include "intel_engine_types.h"
>   
> @@ -29,18 +31,30 @@ intel_context_lookup(struct i915_gem_context *ctx,
>   		     struct intel_engine_cs *engine);
>   
>   /**
> - * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
> + * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
>    * @ctx - the parent GEM context
>    * @engine - the target HW engine
>    *
> - * Returns the existing HW context for this pair of (GEM context, engine), or
> - * allocates and initialises a fresh context. Once allocated, the HW context
> - * remains resident until the GEM context is destroyed.
> + * Acquire a lock on the pinned status of the HW context, such that the context
> + * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
> + * intel_context_is_pinned() remains stable.
>    */
>   struct intel_context *
> -intel_context_instance(struct i915_gem_context *ctx,
> +intel_context_pin_lock(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine);
>   
> +static inline bool
> +intel_context_is_pinned(struct intel_context *ce)
> +{
> +	return atomic_read(&ce->pin_count);
> +}
> +
> +static inline void intel_context_pin_unlock(struct intel_context *ce)
> +__releases(ce->pin_mutex)
> +{
> +	mutex_unlock(&ce->pin_mutex);
> +}
> +
>   struct intel_context *
>   __intel_context_insert(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine,
> @@ -53,18 +67,10 @@ intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
>   
>   static inline void __intel_context_pin(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(!ce->pin_count);
> -	ce->pin_count++;
> +	GEM_BUG_ON(!intel_context_is_pinned(ce));
> +	atomic_inc(&ce->pin_count);
>   }
>   
> -static inline void intel_context_unpin(struct intel_context *ce)
> -{
> -	GEM_BUG_ON(!ce->pin_count);
> -	if (--ce->pin_count)
> -		return;
> -
> -	GEM_BUG_ON(!ce->ops);
> -	ce->ops->unpin(ce);
> -}
> +void intel_context_unpin(struct intel_context *ce);
>   
>   #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
> index 804fa93de853..6dc9b4b9067b 100644
> --- a/drivers/gpu/drm/i915/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/intel_context_types.h
> @@ -8,6 +8,7 @@
>   #define __INTEL_CONTEXT_TYPES__
>   
>   #include <linux/list.h>
> +#include <linux/mutex.h>
>   #include <linux/rbtree.h>
>   #include <linux/types.h>
>   
> @@ -38,14 +39,19 @@ struct intel_context {
>   	struct i915_gem_context *gem_context;
>   	struct intel_engine_cs *engine;
>   	struct intel_engine_cs *active;
> +
>   	struct list_head active_link;
>   	struct list_head signal_link;
>   	struct list_head signals;
> +
>   	struct i915_vma *state;
>   	struct intel_ring *ring;
> +
>   	u32 *lrc_reg_state;
>   	u64 lrc_desc;
> -	int pin_count;
> +
> +	atomic_t pin_count;
> +	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
>   
>   	/**
>   	 * active_tracker: Active tracker for the external rq activity
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f7134d650187..7b938eaff9c5 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1244,7 +1244,7 @@ static void __execlists_context_fini(struct intel_context *ce)
>   
>   static void execlists_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->state)
>   		__execlists_context_fini(ce);
> @@ -1481,7 +1481,7 @@ static int execlists_request_alloc(struct i915_request *request)
>   {
>   	int ret;
>   
> -	GEM_BUG_ON(!request->hw_context->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
>   
>   	/*
>   	 * Flush enough space to reduce the likelihood of waiting after
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 03656021d04c..cc4fcd89b845 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1357,7 +1357,7 @@ static void __ring_context_fini(struct intel_context *ce)
>   
>   static void ring_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->state)
>   		__ring_context_fini(ce);
> @@ -1918,7 +1918,7 @@ static int ring_request_alloc(struct i915_request *request)
>   {
>   	int ret;
>   
> -	GEM_BUG_ON(!request->hw_context->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
>   	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index df1380498612..b6ed055abf5c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1024,7 +1024,7 @@ __sseu_test(struct drm_i915_private *i915,
>   	if (ret)
>   		goto out_context;
>   
> -	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> +	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
>   	if (ret)
>   		goto out_spin;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 99d7463218d0..f6d120e05ee4 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -131,7 +131,7 @@ static void mock_context_unpin(struct intel_context *ce)
>   
>   static void mock_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->ring)
>   		mock_ring_free(ce->ring);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko