[Intel-gfx] [PATCH] drm/i915: Reduce context HW ID lifetime

Mon Sep 3 09:59:01 UTC 2018

On 31/08/2018 13:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-30 17:23:43)
>>
>> On 30/08/2018 11:24, Chris Wilson wrote:
>>> +static int steal_hw_id(struct drm_i915_private *i915)
>>> +{
>>> +     struct i915_gem_context *ctx, *cn;
>>> +     LIST_HEAD(pinned);
>>> +     int id = -ENOSPC;
>>> +
>>> +     lockdep_assert_held(&i915->contexts.mutex);
>>> +
>>> +     list_for_each_entry_safe(ctx, cn,
>>> +                              &i915->contexts.hw_id_list, hw_id_link) {
>>> +             if (atomic_read(&ctx->pin_hw_id)) {
>>> +                     list_move_tail(&ctx->hw_id_link, &pinned);
>>> +                     continue;
>>> +             }
>>> +
>>> +             GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
>>> +             list_del_init(&ctx->hw_id_link);
>>> +             id = ctx->hw_id;
>>> +             break;
>>> +     }
>>> +
>>> +     list_splice_tail(&pinned, &i915->contexts.hw_id_list);
>>
>> Put a comment what is this code doing please. Trying to create some sort
>> of LRU order?
> 
> LRSearched. Same as the shrinker, and eviction code if you would also
> review that ;)

Two things are infinite, the universe and your stream of patches! :)

>>
>>> +     return id;
>>> +}
>>> +
>>> +static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
>>> +{
>>> +     int ret;
>>> +
>>> +     lockdep_assert_held(&i915->contexts.mutex);
>>> +
>>> +     ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>>> +     if (unlikely(ret < 0)) {
>>> +             ret = steal_hw_id(i915);
>>> +             if (ret < 0) /* once again for the correct erro code */
>>
>> errno
>>
>>> +                     ret = new_hw_id(i915, GFP_KERNEL);
>>
>> Hmm.. shouldn't you try GFP_KERNEL before attempting to steal? Actually
>> I think you should branch based on -ENOSPC (steal) vs -ENOMEM (retry
>> with GFP_KERNEL). Which would actually mean something like:
> 
> I was applying the same strategy as we use elsewhere. Penalise any
> driver cache before hitting reclaim.
> 
> I think that is fair from an application of soft backpressure point of
> view. (Lack of backpressure is probably a sore point for many.)

My concern was lack of a phase which avoids hw id stealing for loads 
with few contexts but heavy memory pressure. Sounded like a thing worth 
"robustifying" against - you don't think so?

> 
>>> -     ret = ida_simple_get(&dev_priv->contexts.hw_ida,
>>> -                          0, max, GFP_KERNEL);
>>
>> Although now that I see this I am struggling not to say the change to
>> try a lighter weight allocation strategy first (gfp may fail) needs to
>> be split out to a separate patch.
> 
> Pardon? I appear to suddenly be hard of hearing.
> 
> The patch was all about the steal_hw_id().

Yes, but you could't have kept the GFP_KERNEL ida_simple_get and only 
then fall back to stealing. Or as I said, GFP_MAYFAIL, then GFP_KERNEL, 
then steal.

> 
>>> -     if (ret < 0) {
>>> -             /* Contexts are only released when no longer active.
>>> -              * Flush any pending retires to hopefully release some
>>> -              * stale contexts and try again.
>>> -              */
>>> -             i915_retire_requests(dev_priv);
>>> -             ret = ida_simple_get(&dev_priv->contexts.hw_ida,
>>> -                                  0, max, GFP_KERNEL);
>>> -             if (ret < 0)
>>> -                     return ret;
>>> -     }
>>> -
>>> -     *out = ret;
>>> -     return 0;
>>> -}
>>> -
>>>    static u32 default_desc_template(const struct drm_i915_private *i915,
>>>                                 const struct i915_hw_ppgtt *ppgtt)
>>>    {
>>> @@ -276,12 +324,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>>>        if (ctx == NULL)
>>>                return ERR_PTR(-ENOMEM);
>>>    
>>> -     ret = assign_hw_id(dev_priv, &ctx->hw_id);
>>> -     if (ret) {
>>> -             kfree(ctx);
>>> -             return ERR_PTR(ret);
>>> -     }
>>> -
>>>        kref_init(&ctx->ref);
>>>        list_add_tail(&ctx->link, &dev_priv->contexts.list);
>>>        ctx->i915 = dev_priv;
>>> @@ -295,6 +337,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>>>    
>>>        INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>>>        INIT_LIST_HEAD(&ctx->handles_list);
>>> +     INIT_LIST_HEAD(&ctx->hw_id_link);
>>>    
>>>        /* Default context will never have a file_priv */
>>>        ret = DEFAULT_CONTEXT_HANDLE;
>>> @@ -421,15 +464,35 @@ i915_gem_context_create_gvt(struct drm_device *dev)
>>>        return ctx;
>>>    }
>>>    
>>> +static void
>>> +destroy_kernel_context(struct i915_gem_context **ctxp)
>>> +{
>>> +     struct i915_gem_context *ctx;
>>> +
>>> +     /* Keep the context ref so that we can free it immediately ourselves */
>>> +     ctx = i915_gem_context_get(fetch_and_zero(ctxp));
>>> +     GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
>>> +
>>> +     context_close(ctx);
>>> +     i915_gem_context_free(ctx);
>>> +}
>>> +
>>>    struct i915_gem_context *
>>>    i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>>>    {
>>>        struct i915_gem_context *ctx;
>>> +     int err;
>>>    
>>>        ctx = i915_gem_create_context(i915, NULL);
>>>        if (IS_ERR(ctx))
>>>                return ctx;
>>>    
>>> +     err = i915_gem_context_pin_hw_id(ctx);
>>> +     if (err) {
>>> +             destroy_kernel_context(&ctx);
>>> +             return ERR_PTR(err);
>>> +     }
>>> +
>>>        i915_gem_context_clear_bannable(ctx);
>>>        ctx->sched.priority = prio;
>>>        ctx->ring_size = PAGE_SIZE;
>>> @@ -439,17 +502,19 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>>>        return ctx;
>>>    }
>>>    
>>> -static void
>>> -destroy_kernel_context(struct i915_gem_context **ctxp)
>>> +static void init_contexts(struct drm_i915_private *i915)
>>>    {
>>> -     struct i915_gem_context *ctx;
>>> +     mutex_init(&i915->contexts.mutex);
>>> +     INIT_LIST_HEAD(&i915->contexts.list);
>>>    
>>> -     /* Keep the context ref so that we can free it immediately ourselves */
>>> -     ctx = i915_gem_context_get(fetch_and_zero(ctxp));
>>> -     GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
>>> +     /* Using the simple ida interface, the max is limited by sizeof(int) */
>>> +     BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
>>> +     BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
>>> +     ida_init(&i915->contexts.hw_ida);
>>> +     INIT_LIST_HEAD(&i915->contexts.hw_id_list);
>>>    
>>> -     context_close(ctx);
>>> -     i915_gem_context_free(ctx);
>>> +     INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
>>> +     init_llist_head(&i915->contexts.free_list);
>>
>> ugh diff.. :) looks like pure movement from perspective of
>> destroy_kernel_context.
>>
>>>    }
>>>    
>>>    static bool needs_preempt_context(struct drm_i915_private *i915)
>>> @@ -470,14 +535,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>>        if (ret)
>>>                return ret;
>>>    
>>> -     INIT_LIST_HEAD(&dev_priv->contexts.list);
>>> -     INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
>>> -     init_llist_head(&dev_priv->contexts.free_list);
>>> -
>>> -     /* Using the simple ida interface, the max is limited by sizeof(int) */
>>> -     BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
>>> -     BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
>>> -     ida_init(&dev_priv->contexts.hw_ida);
>>> +     init_contexts(dev_priv);
>>>    
>>>        /* lowest priority; idle task */
>>>        ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
>>> @@ -490,6 +548,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>>         * all user contexts will have non-zero hw_id.
>>>         */
>>>        GEM_BUG_ON(ctx->hw_id);
>>> +     GEM_BUG_ON(!atomic_read(&ctx->pin_hw_id));
>>
>> /* Kernel context is perma-pinned */
>>
>>>        dev_priv->kernel_context = ctx;
>>>    
>>>        /* highest priority; preempting task */
>>> @@ -527,6 +586,7 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
>>>        destroy_kernel_context(&i915->kernel_context);
>>>    
>>>        /* Must free all deferred contexts (via flush_workqueue) first */
>>> +     GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
>>>        ida_destroy(&i915->contexts.hw_ida);
>>>    }
>>>    
>>> @@ -932,6 +992,33 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
>>>        return ret;
>>>    }
>>>    
>>> +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
>>> +{
>>> +     struct drm_i915_private *i915 = ctx->i915;
>>> +     int err = 0;
>>> +
>>> +     mutex_lock(&i915->contexts.mutex);
>>> +
>>> +     GEM_BUG_ON(i915_gem_context_is_closed(ctx));
>>> +
>>> +     if (list_empty(&ctx->hw_id_link)) {
>>> +             GEM_BUG_ON(atomic_read(&ctx->pin_hw_id));
>>> +
>>> +             err = assign_hw_id(i915, &ctx->hw_id);
>>> +             if (err)
>>> +                     goto out_unlock;
>>> +
>>> +             list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
>>> +     }
>>> +
>>> +     GEM_BUG_ON(atomic_read(&ctx->pin_hw_id) == ~0u);
>>> +     atomic_inc(&ctx->pin_hw_id);
>>> +
>>> +out_unlock:
>>> +     mutex_unlock(&i915->contexts.mutex);
>>> +     return err;
>>> +}
>>> +
>>>    #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>>    #include "selftests/mock_context.c"
>>>    #include "selftests/i915_gem_context.c"
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 851dad6decd7..c73ac614f58c 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -136,6 +136,8 @@ struct i915_gem_context {
>>>         * id for the lifetime of the context.
>>>         */
>>>        unsigned int hw_id;
>>> +     atomic_t pin_hw_id;
>>
>> I think now we need short comments describing the difference between the
>> two.
> 
> One is 32bits unsigned, unserialised. The other is 32bits signed, and
> very loosely serialised :)

And pin_hw_id is really hw_id_pin_count, no? :)

> 
>>> +     struct list_head hw_id_link;
>>
>> And for this one.
> 
> [snip]
> 
>> So in essence there will be a little bit more cost when pinning in the
>> normal case, or a bit bit more in the stealing/pathological case, but as
>> long as we stay below over-subscription the cost is only on first pin.
>> No complaints there. Debug also won't be confusing in the normal case
>> since numbers will be stable.
> 
> Yup. Nice addition to the changelog, thanks.
>   
>> Does it have any negative connotations in the world of OA is the
>> question for Lionel?
> 
> Lionel kept promising me this was ok, that he/gputop was quite ready for
> shorter lived ctx id, and reuse.

Cool.

Regards,

Tvrtko