[Intel-gfx] [RFC PATCH v2 2/2] drm/i915: Introduce a i915_gem_do_ww(){} utility

Tue Sep 22 13:31:31 UTC 2020

On 9/22/20 11:12 AM, Tvrtko Ursulin wrote:
>
> On 17/09/2020 19:59, Thomas Hellström (Intel) wrote:
>> From: Thomas Hellström <thomas.hellstrom at intel.com>
>>
>> With the huge number of sites where multiple-object locking is
>> needed in the driver, it becomes difficult to avoid recursive
>> ww_acquire_ctx initialization, and the function prototypes become
>> bloated passing the ww_acquire_ctx around. Furthermore it's not
>> always easy to get the -EDEADLK handling correct and to follow it.
>>
>> Introduce a i915_gem_do_ww utility that tries to remedy all these 
>> problems
>> by enclosing parts of a ww transaction in the following way:
>>
>> my_function() {
>>     struct i915_gem_ww_ctx *ww, template;
>>     int err;
>>     bool interruptible = true;
>>
>>     i915_do_ww(ww, &template, err, interruptible) {
>>         err = ww_transaction_part(ww);
>>     }
>>     return err;
>> }
>>
>> The utility will automatically look up an active ww_acquire_ctx if one
>> is initialized previously in the call chain, and if one found will
>> forward the -EDEADLK instead of handling it, which takes care of the
>> recursive initalization. Using the utility also discourages nested ww
>> unlocking / relocking that is both very fragile and hard to follow.
>>
>> To look up and register an active ww_acquire_ctx, use a
>> driver-wide hash table for now. But noting that a task could only have
>> a single active ww_acqurie_ctx per ww_class, the active CTX is really
>> task state and a generic version of this utility in the ww_mutex code
>> could thus probably use a quick lookup from a list in the
>> struct task_struct.
>
> Maybe a stupid question, but is it safe to assume process context is 
> the only entry point to a ww transaction? I guess I was thinking about 
> things like background scrub/migrate threads, but yes, they would be 
> threads so would work. Other than those I have no ideas who could need 
> locking multiple objects so from this aspect it looks okay.
>
> But it is kind of neat to avoid changing deep hierarchies of function 
> prototypes.
>
> My concern is that the approach isn't to "magicky"? I mean too hidden 
> and too stateful, and that some unwanted surprises could be coming in 
> use with this model. But it is a very vague feeling at this point so 
> don't know.
>
> I also worry that if the graphics subsystem would start thinking it is 
> so special that it needs dedicated handling in task_struct, that it 
> might make it (subsystem) sound a bit pretentious. I had a quick 
> browse through struct task_struct and couldn't spot any precedent to 
> such things so I don't know what core kernel would say.

Thanks for taking a look Tvrtko.

Yes, I've discussed with Maarten and we'll favour changing prototype for 
now, although for code clarity and simplicity we could still use the 
i915_do_ww() notation.

For any upsteam move we would have to move the utility to locking / 
ww_mutex, (with driver-specific adaption still possible, so any 
additional task struct fields will not belong to graphics but rather to 
locking) so I'll post a RFC for a generic utility on dri-devel, time 
permitting, to see if there is any interest.

/Thomas

>
> Regards,
>
> Tvrtko
>
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_ww.c  | 74 ++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/i915/i915_gem_ww.h  | 56 +++++++++++++++++++++-
>>   drivers/gpu/drm/i915/i915_globals.c |  1 +
>>   drivers/gpu/drm/i915/i915_globals.h |  1 +
>>   4 files changed, 130 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c 
>> b/drivers/gpu/drm/i915/i915_gem_ww.c
>> index 3490b72cf613..6247af1dba87 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_ww.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_ww.c
>> @@ -1,10 +1,12 @@
>>   // SPDX-License-Identifier: MIT
>>   /*
>> - * Copyright © 2020 Intel Corporation
>> + * Copyright © 2019 Intel Corporation
>>    */
>> +#include <linux/rhashtable.h>
>>   #include <linux/dma-resv.h>
>>   #include <linux/stacktrace.h>
>>   #include "i915_gem_ww.h"
>> +#include "i915_globals.h"
>>   #include "gem/i915_gem_object.h"
>>     void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
>> @@ -70,3 +72,73 @@ int __must_check i915_gem_ww_ctx_backoff(struct 
>> i915_gem_ww_ctx *ww)
>>         return ret;
>>   }
>> +
>> +static struct rhashtable ww_ht;
>> +static const struct rhashtable_params ww_params = {
>> +    .key_len = sizeof(struct task_struct *),
>> +    .key_offset = offsetof(struct i915_gem_ww_ctx, ctx.task),
>> +    .head_offset = offsetof(struct i915_gem_ww_ctx, head),
>> +    .min_size = 128,
>> +};
>> +
>> +static void i915_ww_item_free(void *ptr, void *arg)
>> +{
>> +    WARN_ON_ONCE(1);
>> +}
>> +
>> +static void i915_global_ww_exit(void)
>> +{
>> +    rhashtable_free_and_destroy(&ww_ht, i915_ww_item_free, NULL);
>> +}
>> +
>> +static void i915_global_ww_shrink(void)
>> +{
>> +}
>> +
>> +static struct i915_global global = {
>> +    .shrink = i915_global_ww_shrink,
>> +    .exit = i915_global_ww_exit,
>> +};
>> +
>> +int __init i915_global_ww_init(void)
>> +{
>> +    int ret = rhashtable_init(&ww_ht, &ww_params);
>> +
>> +    if (ret)
>> +        return ret;
>> +
>> +    i915_global_register(&global);
>> +
>> +    return 0;
>> +}
>> +
>> +void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww)
>> +{
>> +    GEM_WARN_ON(rhashtable_remove_fast(&ww_ht, &ww->head, ww_params));
>> +}
>> +
>> +/**
>> + * __i915_gem_ww_locate_or_use - return the task's i915_gem_ww_ctx 
>> context
>> + * to use.
>> + *
>> + * @template: The context to use if there was none initialized 
>> previously
>> + * in the call chain.
>> + *
>> + * RETURN: The task's i915_gem_ww_ctx context.
>> + */
>> +struct i915_gem_ww_ctx *
>> +__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template)
>> +{
>> +    struct i915_gem_ww_ctx *tmp;
>> +
>> +    /* ctx.task is the hash key, so set it first. */
>> +    template->ctx.task = current;
>> +
>> +    /*
>> +     * Ideally we'd just hook the active context to the
>> +     * struct task_struct. But for now use a hash table.
>> +     */
>> +    tmp = rhashtable_lookup_get_insert_fast(&ww_ht, &template->head,
>> +                        ww_params);
>> +    return tmp;
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h 
>> b/drivers/gpu/drm/i915/i915_gem_ww.h
>> index 94fdf8c5f89b..b844596067c7 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_ww.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_ww.h
>> @@ -6,18 +6,72 @@
>>   #define __I915_GEM_WW_H__
>>     #include <linux/stackdepot.h>
>> +#include <linux/rhashtable-types.h>
>>   #include <drm/drm_drv.h>
>>     struct i915_gem_ww_ctx {
>>       struct ww_acquire_ctx ctx;
>> +    struct rhash_head head;
>>       struct list_head obj_list;
>>       struct drm_i915_gem_object *contended;
>>       depot_stack_handle_t contended_bt;
>> -    bool intr;
>> +    u32 call_depth;
>> +    unsigned short intr;
>> +    unsigned short loop;
>>   };
>>     void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
>>   void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
>>   int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
>>   void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
>> +
>> +/* Internal functions used by the inlines! Don't use. */
>> +void __i915_gem_ww_mark_unused(struct i915_gem_ww_ctx *ww);
>> +struct i915_gem_ww_ctx *
>> +__i915_gem_ww_locate_or_use(struct i915_gem_ww_ctx *template);
>> +
>> +static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int 
>> err)
>> +{
>> +    ww->loop = 0;
>> +    if (ww->call_depth) {
>> +        ww->call_depth--;
>> +        return err;
>> +    }
>> +
>> +    if (err == -EDEADLK) {
>> +        err = i915_gem_ww_ctx_backoff(ww);
>> +        if (!err)
>> +            ww->loop = 1;
>> +    }
>> +
>> +    if (!ww->loop) {
>> +        i915_gem_ww_ctx_fini(ww);
>> +        __i915_gem_ww_mark_unused(ww);
>> +    }
>> +
>> +    return err;
>> +}
>> +
>> +static inline struct i915_gem_ww_ctx *
>> +__i915_gem_ww_init(struct i915_gem_ww_ctx *template, bool intr)
>> +{
>> +    struct i915_gem_ww_ctx *ww = __i915_gem_ww_locate_or_use(template);
>> +
>> +    if (!ww) {
>> +        ww = template;
>> +        ww->call_depth = 0;
>> +        i915_gem_ww_ctx_init(ww, intr);
>> +    } else {
>> +        ww->call_depth++;
>> +    }
>> +
>> +    ww->loop = 1;
>> +
>> +    return ww;
>> +}
>> +
>> +#define i915_gem_do_ww(_ww, _template, _err, _intr) \
>> +    for ((_ww) = __i915_gem_ww_init(_template, _intr); (_ww)->loop; \
>> +         _err = __i915_gem_ww_fini(_ww, _err))
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/i915_globals.c 
>> b/drivers/gpu/drm/i915/i915_globals.c
>> index 3aa213684293..9087cc8c2ee3 100644
>> --- a/drivers/gpu/drm/i915/i915_globals.c
>> +++ b/drivers/gpu/drm/i915/i915_globals.c
>> @@ -94,6 +94,7 @@ static __initconst int (* const initfn[])(void) = {
>>       i915_global_request_init,
>>       i915_global_scheduler_init,
>>       i915_global_vma_init,
>> +    i915_global_ww_init,
>>   };
>>     int __init i915_globals_init(void)
>> diff --git a/drivers/gpu/drm/i915/i915_globals.h 
>> b/drivers/gpu/drm/i915/i915_globals.h
>> index b2f5cd9b9b1a..5976b460ee39 100644
>> --- a/drivers/gpu/drm/i915/i915_globals.h
>> +++ b/drivers/gpu/drm/i915/i915_globals.h
>> @@ -34,5 +34,6 @@ int i915_global_objects_init(void);
>>   int i915_global_request_init(void);
>>   int i915_global_scheduler_init(void);
>>   int i915_global_vma_init(void);
>> +int i915_global_ww_init(void);
>>     #endif /* _I915_GLOBALS_H_ */
>>