[Intel-gfx] [PATCH v2] drm/i915: Reduce context HW ID lifetime
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Sep 5 09:49:02 UTC 2018
On 04/09/2018 16:31, Chris Wilson wrote:
> Future gen reduce the number of bits we will have available to
> differentiate between contexts, so reduce the lifetime of the ID
> assignment from that of the context to its current active cycle (i.e.
> only while it is pinned for use by the HW, will it have a constant ID).
> This means that instead of a max of 2k allocated contexts (worst case
> before fun with bit twiddling), we instead have a limit of 2k in flight
> contexts (minus a few that have been pinned by the kernel or by perf).
>
> To reduce the number of contexts id we require, we allocate a context id
> on first and mark it as pinned for as long as the GEM context itself is,
> that is we keep it pinned it while active on each engine. If we exhaust
> our context id space, then we try to reclaim an id from an idle context.
> In the extreme case where all context ids are pinned by active contexts,
> we force the system to idle in order to recover ids.
>
> We cannot reduce the scope of an HW-ID to an engine (allowing the same
> gem_context to have different ids on each engine) as in the future we
> will need to preassign an id before we know which engine the
> context is being executed on.
>
> v2: Improved commentary (Tvrtko) [I tried at least]
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=107788
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 5 +-
> drivers/gpu/drm/i915/i915_drv.h | 2 +
> drivers/gpu/drm/i915/i915_gem_context.c | 222 +++++++++++++-----
> drivers/gpu/drm/i915/i915_gem_context.h | 23 ++
> drivers/gpu/drm/i915/intel_lrc.c | 8 +
> drivers/gpu/drm/i915/selftests/mock_context.c | 11 +-
> 6 files changed, 201 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 4ad0e2ed8610..1f7051e97afb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1953,7 +1953,10 @@ static int i915_context_status(struct seq_file *m, void *unused)
> return ret;
>
> list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> - seq_printf(m, "HW context %u ", ctx->hw_id);
> + seq_puts(m, "HW context ");
> + if (!list_empty(&ctx->hw_id_link))
> + seq_printf(m, "%x [pin %u]", ctx->hw_id,
> + atomic_read(&ctx->hw_id_pin_count));
Do you want to put some marker for the unallocated case here?
> if (ctx->pid) {
> struct task_struct *task;
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a4da5b723fd..767615ecdea5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1861,6 +1861,7 @@ struct drm_i915_private {
> struct mutex av_mutex;
>
> struct {
> + struct mutex mutex;
> struct list_head list;
> struct llist_head free_list;
> struct work_struct free_work;
> @@ -1873,6 +1874,7 @@ struct drm_i915_private {
> #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
> #define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
> #define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
> + struct list_head hw_id_list;
> } contexts;
>
> u32 fdi_rx_config;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index f15a039772db..747b8170a15a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -115,6 +115,95 @@ static void lut_close(struct i915_gem_context *ctx)
> rcu_read_unlock();
> }
>
> +static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
> +{
> + unsigned int max;
> +
> + lockdep_assert_held(&i915->contexts.mutex);
> +
> + if (INTEL_GEN(i915) >= 11)
> + max = GEN11_MAX_CONTEXT_HW_ID;
> + else if (USES_GUC_SUBMISSION(i915))
> + /*
> + * When using GuC in proxy submission, GuC consumes the
> + * highest bit in the context id to indicate proxy submission.
> + */
> + max = MAX_GUC_CONTEXT_HW_ID;
> + else
> + max = MAX_CONTEXT_HW_ID;
> +
> + return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
> +}
> +
> +static int steal_hw_id(struct drm_i915_private *i915)
> +{
> + struct i915_gem_context *ctx, *cn;
> + LIST_HEAD(pinned);
> + int id = -ENOSPC;
> +
> + lockdep_assert_held(&i915->contexts.mutex);
> +
> + list_for_each_entry_safe(ctx, cn,
> + &i915->contexts.hw_id_list, hw_id_link) {
> + if (atomic_read(&ctx->hw_id_pin_count)) {
> + list_move_tail(&ctx->hw_id_link, &pinned);
> + continue;
> + }
> +
> + GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
> + list_del_init(&ctx->hw_id_link);
> + id = ctx->hw_id;
> + break;
> + }
> +
> + /*
> + * Remember how far we got up on the last repossesion scan, so the
> + * list is kept in a "least recently scanned" order.
> + */
> + list_splice_tail(&pinned, &i915->contexts.hw_id_list);
> + return id;
> +}
> +
> +static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
> +{
> + int ret;
> +
> + lockdep_assert_held(&i915->contexts.mutex);
> +
> + /*
> + * We prefer to steal/stall ourselves and our users over that of the
> + * entire system. That may be a little unfair to our users, and
> + * even hurt high priority clients. The choice is whether to oomkill
> + * something else, or steal a context id.
> + */
> + ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> + if (unlikely(ret < 0)) {
> + ret = steal_hw_id(i915);
> + if (ret < 0) /* once again for the correct errno code */
> + ret = new_hw_id(i915, GFP_KERNEL);
> + if (ret < 0)
> + return ret;
> + }
> +
> + *out = ret;
> + return 0;
> +}
> +
> +static void release_hw_id(struct i915_gem_context *ctx)
> +{
> + struct drm_i915_private *i915 = ctx->i915;
> +
> + if (list_empty(&ctx->hw_id_link))
> + return;
> +
> + mutex_lock(&i915->contexts.mutex);
> + if (!list_empty(&ctx->hw_id_link)) {
> + ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
> + list_del_init(&ctx->hw_id_link);
> + }
> + mutex_unlock(&i915->contexts.mutex);
> +}
> +
> static void i915_gem_context_free(struct i915_gem_context *ctx)
> {
> unsigned int n;
> @@ -122,6 +211,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>
> + release_hw_id(ctx);
> i915_ppgtt_put(ctx->ppgtt);
>
> for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
> @@ -136,7 +226,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>
> list_del(&ctx->link);
>
> - ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id);
> kfree_rcu(ctx, rcu);
> }
>
> @@ -190,6 +279,12 @@ static void context_close(struct i915_gem_context *ctx)
> {
> i915_gem_context_set_closed(ctx);
>
> + /*
> + * This context will never again be assinged to HW, so we can
> + * reuse its ID for the next context.
> + */
> + release_hw_id(ctx);
> +
> /*
> * The LUT uses the VMA as a backpointer to unref the object,
> * so we need to clear the LUT before we close all the VMA (inside
> @@ -203,43 +298,6 @@ static void context_close(struct i915_gem_context *ctx)
> i915_gem_context_put(ctx);
> }
>
> -static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
> -{
> - int ret;
> - unsigned int max;
> -
> - if (INTEL_GEN(dev_priv) >= 11) {
> - max = GEN11_MAX_CONTEXT_HW_ID;
> - } else {
> - /*
> - * When using GuC in proxy submission, GuC consumes the
> - * highest bit in the context id to indicate proxy submission.
> - */
> - if (USES_GUC_SUBMISSION(dev_priv))
> - max = MAX_GUC_CONTEXT_HW_ID;
> - else
> - max = MAX_CONTEXT_HW_ID;
> - }
> -
> -
> - ret = ida_simple_get(&dev_priv->contexts.hw_ida,
> - 0, max, GFP_KERNEL);
> - if (ret < 0) {
> - /* Contexts are only released when no longer active.
> - * Flush any pending retires to hopefully release some
> - * stale contexts and try again.
> - */
> - i915_retire_requests(dev_priv);
> - ret = ida_simple_get(&dev_priv->contexts.hw_ida,
> - 0, max, GFP_KERNEL);
> - if (ret < 0)
> - return ret;
> - }
> -
> - *out = ret;
> - return 0;
> -}
> -
> static u32 default_desc_template(const struct drm_i915_private *i915,
> const struct i915_hw_ppgtt *ppgtt)
> {
> @@ -276,12 +334,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
> if (ctx == NULL)
> return ERR_PTR(-ENOMEM);
>
> - ret = assign_hw_id(dev_priv, &ctx->hw_id);
> - if (ret) {
> - kfree(ctx);
> - return ERR_PTR(ret);
> - }
> -
> kref_init(&ctx->ref);
> list_add_tail(&ctx->link, &dev_priv->contexts.list);
> ctx->i915 = dev_priv;
> @@ -295,6 +347,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>
> INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> INIT_LIST_HEAD(&ctx->handles_list);
> + INIT_LIST_HEAD(&ctx->hw_id_link);
>
> /* Default context will never have a file_priv */
> ret = DEFAULT_CONTEXT_HANDLE;
> @@ -421,15 +474,35 @@ i915_gem_context_create_gvt(struct drm_device *dev)
> return ctx;
> }
>
> +static void
> +destroy_kernel_context(struct i915_gem_context **ctxp)
> +{
> + struct i915_gem_context *ctx;
> +
> + /* Keep the context ref so that we can free it immediately ourselves */
> + ctx = i915_gem_context_get(fetch_and_zero(ctxp));
> + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> +
> + context_close(ctx);
> + i915_gem_context_free(ctx);
> +}
> +
> struct i915_gem_context *
> i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
> {
> struct i915_gem_context *ctx;
> + int err;
>
> ctx = i915_gem_create_context(i915, NULL);
> if (IS_ERR(ctx))
> return ctx;
>
> + err = i915_gem_context_pin_hw_id(ctx);
> + if (err) {
> + destroy_kernel_context(&ctx);
> + return ERR_PTR(err);
> + }
> +
> i915_gem_context_clear_bannable(ctx);
> ctx->sched.priority = prio;
> ctx->ring_size = PAGE_SIZE;
> @@ -439,17 +512,19 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
> return ctx;
> }
>
> -static void
> -destroy_kernel_context(struct i915_gem_context **ctxp)
> +static void init_contexts(struct drm_i915_private *i915)
> {
> - struct i915_gem_context *ctx;
> + mutex_init(&i915->contexts.mutex);
> + INIT_LIST_HEAD(&i915->contexts.list);
>
> - /* Keep the context ref so that we can free it immediately ourselves */
> - ctx = i915_gem_context_get(fetch_and_zero(ctxp));
> - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> + /* Using the simple ida interface, the max is limited by sizeof(int) */
> + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
> + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
> + ida_init(&i915->contexts.hw_ida);
> + INIT_LIST_HEAD(&i915->contexts.hw_id_list);
>
> - context_close(ctx);
> - i915_gem_context_free(ctx);
> + INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
> + init_llist_head(&i915->contexts.free_list);
> }
>
> static bool needs_preempt_context(struct drm_i915_private *i915)
> @@ -470,14 +545,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> if (ret)
> return ret;
>
> - INIT_LIST_HEAD(&dev_priv->contexts.list);
> - INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
> - init_llist_head(&dev_priv->contexts.free_list);
> -
> - /* Using the simple ida interface, the max is limited by sizeof(int) */
> - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
> - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
> - ida_init(&dev_priv->contexts.hw_ida);
> + init_contexts(dev_priv);
>
> /* lowest priority; idle task */
> ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
> @@ -487,9 +555,13 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> }
> /*
> * For easy recognisablity, we want the kernel context to be 0 and then
> - * all user contexts will have non-zero hw_id.
> + * all user contexts will have non-zero hw_id. Kernel contexts are
> + * permanently pinned, so that we never suffer a stall and can
> + * use them from any allocation context (e.g. for evicting other
> + * contexts and from inside the shrinker).
> */
> GEM_BUG_ON(ctx->hw_id);
> + GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
> dev_priv->kernel_context = ctx;
>
> /* highest priority; preempting task */
> @@ -527,6 +599,7 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
> destroy_kernel_context(&i915->kernel_context);
>
> /* Must free all deferred contexts (via flush_workqueue) first */
> + GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
> ida_destroy(&i915->contexts.hw_ida);
> }
>
> @@ -932,6 +1005,33 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
> return ret;
> }
>
> +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> +{
> + struct drm_i915_private *i915 = ctx->i915;
> + int err = 0;
> +
> + mutex_lock(&i915->contexts.mutex);
> +
> + GEM_BUG_ON(i915_gem_context_is_closed(ctx));
> +
> + if (list_empty(&ctx->hw_id_link)) {
> + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
> +
> + err = assign_hw_id(i915, &ctx->hw_id);
> + if (err)
> + goto out_unlock;
> +
> + list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
> + }
> +
> + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
> + atomic_inc(&ctx->hw_id_pin_count);
> +
> +out_unlock:
> + mutex_unlock(&i915->contexts.mutex);
> + return err;
> +}
> +
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> #include "selftests/mock_context.c"
> #include "selftests/i915_gem_context.c"
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 851dad6decd7..e09673ca731d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -134,8 +134,16 @@ struct i915_gem_context {
> * functions like fault reporting, PASID, scheduling. The
> * &drm_i915_private.context_hw_ida is used to assign a unqiue
> * id for the lifetime of the context.
> + *
> + * @hw_id_pin_count: - number of times this context had been pinned
> + * for use (should be, at most, once per engine).
> + *
> + * @hw_id_link: - all contexts with an assigned id are tracked
> + * for possible repossession.
> */
> unsigned int hw_id;
> + atomic_t hw_id_pin_count;
> + struct list_head hw_id_link;
>
> /**
> * @user_handle: userspace identifier
> @@ -254,6 +262,21 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
> __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
> }
>
> +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
> +static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> +{
> + if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
> + return 0;
> +
> + return __i915_gem_context_pin_hw_id(ctx);
> +}
> +
> +static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
> +{
> + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
> + atomic_dec(&ctx->hw_id_pin_count);
> +}
> +
> static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
> {
> return c->user_handle == DEFAULT_CONTEXT_HANDLE;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index def467c2451b..9b1f0e5211a0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1272,6 +1272,8 @@ static void execlists_context_destroy(struct intel_context *ce)
>
> static void execlists_context_unpin(struct intel_context *ce)
> {
> + i915_gem_context_unpin_hw_id(ce->gem_context);
> +
> intel_ring_unpin(ce->ring);
>
> ce->state->obj->pin_global--;
> @@ -1330,6 +1332,10 @@ __execlists_context_pin(struct intel_engine_cs *engine,
> if (ret)
> goto unpin_map;
>
> + ret = i915_gem_context_pin_hw_id(ctx);
> + if (ret)
> + goto unpin_ring;
> +
> intel_lr_context_descriptor_update(ctx, engine, ce);
>
> ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> @@ -1342,6 +1348,8 @@ __execlists_context_pin(struct intel_engine_cs *engine,
> i915_gem_context_get(ctx);
> return ce;
>
> +unpin_ring:
> + intel_ring_unpin(ce->ring);
> unpin_map:
> i915_gem_object_unpin_map(ce->state->obj);
> unpin_vma:
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 8904f1ce64e3..d937bdff26f9 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -43,6 +43,7 @@ mock_context(struct drm_i915_private *i915,
>
> INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> INIT_LIST_HEAD(&ctx->handles_list);
> + INIT_LIST_HEAD(&ctx->hw_id_link);
>
> for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
> struct intel_context *ce = &ctx->__engine[n];
> @@ -50,11 +51,9 @@ mock_context(struct drm_i915_private *i915,
> ce->gem_context = ctx;
> }
>
> - ret = ida_simple_get(&i915->contexts.hw_ida,
> - 0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> + ret = i915_gem_context_pin_hw_id(ctx);
> if (ret < 0)
> goto err_handles;
> - ctx->hw_id = ret;
>
> if (name) {
> ctx->name = kstrdup(name, GFP_KERNEL);
> @@ -85,11 +84,7 @@ void mock_context_close(struct i915_gem_context *ctx)
>
> void mock_init_contexts(struct drm_i915_private *i915)
> {
> - INIT_LIST_HEAD(&i915->contexts.list);
> - ida_init(&i915->contexts.hw_ida);
> -
> - INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
> - init_llist_head(&i915->contexts.free_list);
> + init_contexts(i915);
> }
>
> struct i915_gem_context *
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list