[Intel-gfx] [PATCH 32/38] drm/i915: Introduce intel_context.pin_mutex for pin management
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Mar 6 09:45:27 UTC 2019
On 01/03/2019 14:03, Chris Wilson wrote:
> Introduce a mutex to start locking the HW contexts independently of
> struct_mutex, with a view to reducing the coarse struct_mutex. The
> intel_context.pin_mutex is used to guard the transition to and from being
> pinned on the gpu, and so is required before starting to build any
> request, and released when the execution is completed.
execution is completed or construction/emission is completed?
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_context.c | 76 +++++++------------
> drivers/gpu/drm/i915/intel_context.c | 56 ++++++++++++--
> drivers/gpu/drm/i915/intel_context.h | 38 ++++++----
> drivers/gpu/drm/i915/intel_context_types.h | 8 +-
> drivers/gpu/drm/i915/intel_lrc.c | 4 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +-
> .../gpu/drm/i915/selftests/i915_gem_context.c | 2 +-
> drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +-
> 9 files changed, 110 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c6a25c3276ee..4babea524fc8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4706,7 +4706,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> if (!state)
> continue;
>
> - GEM_BUG_ON(ce->pin_count);
> + GEM_BUG_ON(intel_context_is_pinned(ce));
>
> /*
> * As we will hold a reference to the logical state, it will
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 60b17f6a727d..321b26e302e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1096,23 +1096,27 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
> }
>
> static int
> -gen8_modify_rpcs_gpu(struct intel_context *ce,
> - struct intel_engine_cs *engine,
> - struct intel_sseu sseu)
> +gen8_modify_rpcs_gpu(struct intel_context *ce, struct intel_sseu sseu)
> {
> - struct drm_i915_private *i915 = engine->i915;
> + struct drm_i915_private *i915 = ce->engine->i915;
> struct i915_request *rq, *prev;
> intel_wakeref_t wakeref;
> int ret;
>
> - GEM_BUG_ON(!ce->pin_count);
> + lockdep_assert_held(&ce->pin_mutex);
>
> - lockdep_assert_held(&i915->drm.struct_mutex);
> + /*
> + * If context is not idle we have to submit an ordered request to modify
> + * its context image via the kernel context. Pristine and idle contexts
> + * will be configured on pinning.
> + */
> + if (!intel_context_is_pinned(ce))
> + return 0;
>
> /* Submitting requests etc needs the hw awake. */
> wakeref = intel_runtime_pm_get(i915);
>
> - rq = i915_request_alloc(engine, i915->kernel_context);
> + rq = i915_request_alloc(ce->engine, i915->kernel_context);
> if (IS_ERR(rq)) {
> ret = PTR_ERR(rq);
> goto out_put;
> @@ -1156,53 +1160,30 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
> }
>
> static int
> -__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine,
> - struct intel_sseu sseu)
> +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> + struct intel_engine_cs *engine,
> + struct intel_sseu sseu)
> {
> struct intel_context *ce;
> int ret = 0;
>
> - ce = intel_context_instance(ctx, engine);
> - if (IS_ERR(ce))
> - return PTR_ERR(ce);
> -
> GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
> GEM_BUG_ON(engine->id != RCS);
>
> + ce = intel_context_pin_lock(ctx, engine);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> /* Nothing to do if unmodified. */
> if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
> - return 0;
> -
> - /*
> - * If context is not idle we have to submit an ordered request to modify
> - * its context image via the kernel context. Pristine and idle contexts
> - * will be configured on pinning.
> - */
> - if (ce->pin_count)
> - ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
> + goto unlock;
>
> + ret = gen8_modify_rpcs_gpu(ce, sseu);
The _gpu suffix is not true any more so I'd drop it.
> if (!ret)
> ce->sseu = sseu;
>
> - return ret;
> -}
> -
> -static int
> -i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine,
> - struct intel_sseu sseu)
> -{
> - int ret;
> -
> - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> - if (ret)
> - return ret;
> -
> - ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> -
> - mutex_unlock(&ctx->i915->drm.struct_mutex);
> -
> +unlock:
> + intel_context_pin_unlock(ce);
> return ret;
> }
>
> @@ -1620,11 +1601,12 @@ static int clone_sseu(struct i915_gem_context *dst,
> if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
> continue;
>
> - ce = intel_context_instance(dst, engine);
> + ce = intel_context_pin_lock(dst, engine);
> if (IS_ERR(ce))
> return PTR_ERR(ce);
>
> ce->sseu = sseu;
> + intel_context_pin_unlock(ce);
> }
>
> return 0;
> @@ -1790,7 +1772,6 @@ static int get_sseu(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine;
> struct intel_context *ce;
> unsigned long lookup;
> - int ret;
>
> if (args->size == 0)
> goto out;
> @@ -1817,21 +1798,16 @@ static int get_sseu(struct i915_gem_context *ctx,
> if (!engine)
> return -EINVAL;
>
> - ce = intel_context_instance(ctx, engine);
> + ce = intel_context_pin_lock(ctx, engine); /* serialise with set_sseu */
> if (IS_ERR(ce))
> return PTR_ERR(ce);
>
> - /* Only use for mutex here is to serialize get_param and set_param. */
> - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> - if (ret)
> - return ret;
> -
> user_sseu.slice_mask = ce->sseu.slice_mask;
> user_sseu.subslice_mask = ce->sseu.subslice_mask;
> user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
> user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
>
> - mutex_unlock(&ctx->i915->drm.struct_mutex);
> + intel_context_pin_unlock(ce);
>
> if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
> sizeof(user_sseu)))
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
> index 7de02be0b552..7c4171114e5b 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/intel_context.c
> @@ -86,7 +86,7 @@ void __intel_context_remove(struct intel_context *ce)
> spin_unlock(&ctx->hw_contexts_lock);
> }
>
> -struct intel_context *
> +static struct intel_context *
> intel_context_instance(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine)
> {
> @@ -110,6 +110,21 @@ intel_context_instance(struct i915_gem_context *ctx,
> return pos;
> }
>
> +struct intel_context *
> +intel_context_pin_lock(struct i915_gem_context *ctx,
> + struct intel_engine_cs *engine)
> + __acquires(ce->pin_mutex)
> +{
> + struct intel_context *ce;
> +
> + ce = intel_context_instance(ctx, engine);
> + if (IS_ERR(ce))
> + return ce;
> +
> + mutex_lock(&ce->pin_mutex);
> + return ce;
> +}
> +
> struct intel_context *
> intel_context_pin(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine)
> @@ -117,16 +132,20 @@ intel_context_pin(struct i915_gem_context *ctx,
> struct intel_context *ce;
> int err;
>
> - lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> -
> ce = intel_context_instance(ctx, engine);
> if (IS_ERR(ce))
> return ce;
>
> - if (unlikely(!ce->pin_count++)) {
> + if (likely(atomic_inc_not_zero(&ce->pin_count)))
> + return ce;
> +
> + if (mutex_lock_interruptible(&ce->pin_mutex))
> + return ERR_PTR(-EINTR);
> +
> + if (likely(!atomic_read(&ce->pin_count))) {
> err = ce->ops->pin(ce);
> if (err)
> - goto err_unpin;
> + goto err;
>
> mutex_lock(&ctx->mutex);
> list_add(&ce->active_link, &ctx->active_engines);
> @@ -134,16 +153,35 @@ intel_context_pin(struct i915_gem_context *ctx,
>
> i915_gem_context_get(ctx);
> GEM_BUG_ON(ce->gem_context != ctx);
> +
> + smp_mb__before_atomic();
Why is this needed?
> }
> - GEM_BUG_ON(!ce->pin_count); /* no overflow! */
>
> + atomic_inc(&ce->pin_count);
> + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
> +
> + mutex_unlock(&ce->pin_mutex);
> return ce;
>
> -err_unpin:
> - ce->pin_count = 0;
> +err:
> + mutex_unlock(&ce->pin_mutex);
> return ERR_PTR(err);
> }
>
> +void intel_context_unpin(struct intel_context *ce)
> +{
> + if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
> + return;
> +
> + /* We may be called from inside intel_context_pin() to evict another */
> + mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
> +
> + if (likely(atomic_dec_and_test(&ce->pin_count)))
> + ce->ops->unpin(ce);
> +
> + mutex_unlock(&ce->pin_mutex);
> +}
> +
> static void intel_context_retire(struct i915_active_request *active,
> struct i915_request *rq)
> {
> @@ -165,6 +203,8 @@ intel_context_init(struct intel_context *ce,
> INIT_LIST_HEAD(&ce->signal_link);
> INIT_LIST_HEAD(&ce->signals);
>
> + mutex_init(&ce->pin_mutex);
> +
> /* Use the whole device by default */
> ce->sseu = intel_device_default_sseu(ctx->i915);
>
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
> index aa34311a472e..3657f3a34f8e 100644
> --- a/drivers/gpu/drm/i915/intel_context.h
> +++ b/drivers/gpu/drm/i915/intel_context.h
> @@ -7,6 +7,8 @@
> #ifndef __INTEL_CONTEXT_H__
> #define __INTEL_CONTEXT_H__
>
> +#include <linux/lockdep.h>
> +
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
>
> @@ -26,18 +28,30 @@ intel_context_lookup(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine);
>
> /**
> - * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
> + * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
> * @ctx - the parent GEM context
> * @engine - the target HW engine
> *
> - * Returns the existing HW context for this pair of (GEM context, engine), or
> - * allocates and initialises a fresh context. Once allocated, the HW context
> - * remains resident until the GEM context is destroyed.
> + * Acquire a lock on the pinned status of the HW context, such that the context
> + * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
> + * intel_context_is_pinned() remains stable.
> */
> struct intel_context *
> -intel_context_instance(struct i915_gem_context *ctx,
> +intel_context_pin_lock(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine);
>
> +static inline bool
> +intel_context_is_pinned(struct intel_context *ce)
> +{
> + return atomic_read(&ce->pin_count);
> +}
> +
> +static inline void intel_context_pin_unlock(struct intel_context *ce)
> +__releases(ce->pin_mutex)
> +{
> + mutex_unlock(&ce->pin_mutex);
> +}
> +
> struct intel_context *
> __intel_context_insert(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine,
> @@ -50,18 +64,10 @@ intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
>
> static inline void __intel_context_pin(struct intel_context *ce)
> {
> - GEM_BUG_ON(!ce->pin_count);
> - ce->pin_count++;
> + GEM_BUG_ON(!intel_context_is_pinned(ce));
> + atomic_inc(&ce->pin_count);
This can be called without the mutex held but only if not zero, right?
BUG_ON(!atomic_inc_not_zero(&ce->pin_count)) ?
> }
>
> -static inline void intel_context_unpin(struct intel_context *ce)
> -{
> - GEM_BUG_ON(!ce->pin_count);
> - if (--ce->pin_count)
> - return;
> -
> - GEM_BUG_ON(!ce->ops);
> - ce->ops->unpin(ce);
> -}
> +void intel_context_unpin(struct intel_context *ce);
>
> #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
> index 804fa93de853..6dc9b4b9067b 100644
> --- a/drivers/gpu/drm/i915/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/intel_context_types.h
> @@ -8,6 +8,7 @@
> #define __INTEL_CONTEXT_TYPES__
>
> #include <linux/list.h>
> +#include <linux/mutex.h>
> #include <linux/rbtree.h>
> #include <linux/types.h>
>
> @@ -38,14 +39,19 @@ struct intel_context {
> struct i915_gem_context *gem_context;
> struct intel_engine_cs *engine;
> struct intel_engine_cs *active;
> +
> struct list_head active_link;
> struct list_head signal_link;
> struct list_head signals;
> +
> struct i915_vma *state;
> struct intel_ring *ring;
> +
> u32 *lrc_reg_state;
> u64 lrc_desc;
> - int pin_count;
> +
> + atomic_t pin_count;
> + struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
Not pinning but transition from pinned to unpinned state AFAIU.
>
> /**
> * active_tracker: Active tracker for the external rq activity
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 473cfea4c503..e69b52c4d3ce 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1238,7 +1238,7 @@ static void __execlists_context_fini(struct intel_context *ce)
>
> static void execlists_context_destroy(struct intel_context *ce)
> {
> - GEM_BUG_ON(ce->pin_count);
> + GEM_BUG_ON(intel_context_is_pinned(ce));
>
> if (ce->state)
> __execlists_context_fini(ce);
> @@ -1476,7 +1476,7 @@ static int execlists_request_alloc(struct i915_request *request)
> {
> int ret;
>
> - GEM_BUG_ON(!request->hw_context->pin_count);
> + GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
>
> /*
> * Flush enough space to reduce the likelihood of waiting after
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 4f137d0a5316..a2bdf188f3f6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1356,7 +1356,7 @@ static void __ring_context_fini(struct intel_context *ce)
>
> static void ring_context_destroy(struct intel_context *ce)
> {
> - GEM_BUG_ON(ce->pin_count);
> + GEM_BUG_ON(intel_context_is_pinned(ce));
>
> if (ce->state)
> __ring_context_fini(ce);
> @@ -1917,7 +1917,7 @@ static int ring_request_alloc(struct i915_request *request)
> {
> int ret;
>
> - GEM_BUG_ON(!request->hw_context->pin_count);
> + GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
> GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
>
> /*
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 15f016ca8e0d..c6c14d1b2ce8 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1029,7 +1029,7 @@ __sseu_test(struct drm_i915_private *i915,
> if (ret)
> goto out_context;
>
> - ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> + ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> if (ret)
> goto out_spin;
>
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 6434e968ecb0..c032c0cf20e6 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -131,7 +131,7 @@ static void mock_context_unpin(struct intel_context *ce)
>
> static void mock_context_destroy(struct intel_context *ce)
> {
> - GEM_BUG_ON(ce->pin_count);
> + GEM_BUG_ON(intel_context_is_pinned(ce));
>
> if (ce->ring)
> mock_ring_free(ce->ring);
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list