[Intel-gfx] [PATCH 08/13] drm/i915: Allow a context to define its set of engines

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Mar 11 09:23:44 UTC 2019


On 08/03/2019 16:47, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>>
>> On 08/03/2019 14:12, Chris Wilson wrote:
>>> Over the last few years, we have debated how to extend the user API to
>>> support an increase in the number of engines, that may be sparse and
>>> even be heterogeneous within a class (not all video decoders created
>>> equal). We settled on using (class, instance) tuples to identify a
>>> specific engine, with an API for the user to construct a map of engines
>>> to capabilities. Into this picture, we then add a challenge of virtual
>>> engines; one user engine that maps behind the scenes to any number of
>>> physical engines. To keep it general, we want the user to have full
>>> control over that mapping. To that end, we allow the user to constrain a
>>> context to define the set of engines that it can access, order fully
>>> controlled by the user via (class, instance). With such precise control
>>> in context setup, we can continue to use the existing execbuf uABI of
>>> specifying a single index; only now it doesn't automagically map onto
>>> the engines, it uses the user defined engine map from the context.
>>>
>>> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
>>> execbuf. It's use will be revealed in the next patch.
>>>
>>> v2: Fixup freeing of local on success of get_engines()
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c       | 204 +++++++++++++++++-
>>>    drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
>>>    drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  22 +-
>>>    include/uapi/drm/i915_drm.h                   |  42 +++-
>>>    4 files changed, 259 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 2cfc68b66944..86d9bea6f275 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -101,6 +101,21 @@ static struct i915_global_gem_context {
>>>        struct kmem_cache *slab_luts;
>>>    } global;
>>>    
>>> +static struct intel_engine_cs *
>>> +lookup_user_engine(struct i915_gem_context *ctx,
>>> +                unsigned long flags, u16 class, u16 instance)
>>> +#define LOOKUP_USER_INDEX BIT(0)
>>> +{
>>> +     if (flags & LOOKUP_USER_INDEX) {
>>> +             if (instance >= ctx->nengine)
>>> +                     return NULL;
>>> +
>>> +             return ctx->engines[instance];
>>> +     }
>>> +
>>> +     return intel_engine_lookup_user(ctx->i915, class, instance);
>>> +}
>>> +
>>>    struct i915_lut_handle *i915_lut_handle_alloc(void)
>>>    {
>>>        return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
>>> @@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>>>        release_hw_id(ctx);
>>>        i915_ppgtt_put(ctx->ppgtt);
>>>    
>>> +     kfree(ctx->engines);
>>> +
>>>        rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>>>                it->ops->destroy(it);
>>>    
>>> @@ -1311,9 +1328,9 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>        if (user_sseu.flags || user_sseu.rsvd)
>>>                return -EINVAL;
>>>    
>>> -     engine = intel_engine_lookup_user(i915,
>>> -                                       user_sseu.engine_class,
>>> -                                       user_sseu.engine_instance);
>>> +     engine = lookup_user_engine(ctx, 0,
>>> +                                 user_sseu.engine_class,
>>> +                                 user_sseu.engine_instance);
>>>        if (!engine)
>>>                return -EINVAL;
>>>    
>>> @@ -1331,9 +1348,154 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>    
>>>        args->size = sizeof(user_sseu);
>>>    
>>> +     return 0;
>>> +};
>>> +
>>> +struct set_engines {
>>> +     struct i915_gem_context *ctx;
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +};
>>> +
>>> +static const i915_user_extension_fn set_engines__extensions[] = {
>>> +};
>>> +
>>> +static int
>>> +set_engines(struct i915_gem_context *ctx,
>>> +         const struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines __user *user;
>>> +     struct set_engines set = { .ctx = ctx };
>>> +     u64 size, extensions;
>>> +     unsigned int n;
>>> +     int err;
>>> +
>>> +     user = u64_to_user_ptr(args->value);
>>> +     size = args->size;
>>> +     if (!size)
>>> +             goto out;
>>
>> This prevents a hypothetical extension with empty map data.
> 
> No... This is required for resetting and I think that's covered in what
> little docs there are. It's the set.nengine==0 test later
> that you mean to object to. But we can't do that as that's how we
> differentiate between modes at the moment.
> 
> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.

size == sizeof(struct i915_context_param_engines) could mean reset - 
meaning no map array provided.

Meaning one could reset the map and still pass in extensions.

> 
>>> +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
>>> +     if (size < sizeof(*user) || size % sizeof(*user->class_instance))
>>
>> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
>>
>>> +             return -EINVAL;
>>> +
>>> +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
>>> +     if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
>>
>> I would prefer we drop the size restriction since it doesn't apply to
>> the engine map per se.
> 
> u64 is a limit that will be non-trivial to lift. Marking the limits of
> the kernel doesn't restrict it being lifted later.

My thinking is that u64 limit applies to the load balancing extension, 
and the 64 engine limit applies to execbuf. Engine map itself is not 
limited. But I guess it is a theoretical/pointless discussion at this point.

> 
>>> +             return -EINVAL;
>>> +
>>> +     set.engines = kmalloc_array(set.nengine,
>>> +                                 sizeof(*set.engines),
>>> +                                 GFP_KERNEL);
>>> +     if (!set.engines)
>>> +             return -ENOMEM;
>>> +
>>> +     for (n = 0; n < set.nengine; n++) {
>>> +             u16 class, inst;
>>> +
>>> +             if (get_user(class, &user->class_instance[n].engine_class) ||
>>> +                 get_user(inst, &user->class_instance[n].engine_instance)) {
>>> +                     kfree(set.engines);
>>> +                     return -EFAULT;
>>> +             }
>>> +
>>> +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
>>> +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
>>> +                     set.engines[n] = NULL;
>>> +                     continue;
>>> +             }
>>> +
>>> +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
>>> +             if (!set.engines[n]) {
>>> +                     kfree(set.engines);
>>> +                     return -ENOENT;
>>> +             }
>>> +     }
>>> +
>>> +     err = -EFAULT;
>>> +     if (!get_user(extensions, &user->extensions))
>>> +             err = i915_user_extensions(u64_to_user_ptr(extensions),
>>> +                                        set_engines__extensions,
>>> +                                        ARRAY_SIZE(set_engines__extensions),
>>> +                                        &set);
>>> +     if (err) {
>>> +             kfree(set.engines);
>>> +             return err;
>>> +     }
>>> +
>>> +out:
>>> +     mutex_lock(&ctx->i915->drm.struct_mutex);
>>> +     kfree(ctx->engines);
>>> +     ctx->engines = set.engines;
>>> +     ctx->nengine = set.nengine;
>>> +     mutex_unlock(&ctx->i915->drm.struct_mutex);
>>> +
>>>        return 0;
>>>    }
>>>    
>>> +static int
>>> +get_engines(struct i915_gem_context *ctx,
>>> +         struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines *local;
>>> +     unsigned int n, count, size;
>>> +     int err = 0;
>>> +
>>> +restart:
>>> +     count = READ_ONCE(ctx->nengine);
>>> +     if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
>>> +             return -ENOMEM; /* unrepresentable! */
>>
>> Probably overly paranoid since we can't end up with this state set.
> 
> And I thought you wanted many engines! Paranoia around kmalloc/user
> oveflows is always useful, because you know someone will send a patch
> later (and smatch doesn't really care as it only checks the limits of
> types and local constraints).

Put a comment on what it is checking then. Why INT_MAX and not U32_MAX btw?

Regards,

Tvrtko


More information about the Intel-gfx mailing list