[Intel-gfx] [PATCH 22/50] drm/i915: Allow a context to define its set of engines

Mon Apr 15 12:31:40 UTC 2019

Quoting Tvrtko Ursulin (2019-04-15 13:19:45)
> > +     set.engines->i915 = ctx->i915;
> > +     for (n = 0; n < num_engines; n++) {
> > +             struct i915_engine_class_instance ci;
> > +             struct intel_engine_cs *engine;
> > +
> > +             if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
> > +                     __free_engines(set.engines, n);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
> > +                 ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> > +                     set.engines->engines[n] = NULL;
> > +                     continue;
> > +             }
> > +
> > +             engine = intel_engine_lookup_user(ctx->i915,
> > +                                               ci.engine_class,
> > +                                               ci.engine_instance);
> > +             if (!engine) {
> > +                     DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
> 
> Standardize early on "%u:%u" for class:instance? Although in this 
> specific case I am not the debug is so useful. (There are many other 
> ways the ioctl can fail.) Same goes for the earlier one.

I'm quietly adopting yaml inside messages :)

It's the -EINVAL that are a nightmare. -EFAULT, -ENOMEM, other
specialised errno are easy to recognise, but -EINVAL is anything and
usually what devs hit in practice.

I can take out the DRM_DEBUG again -- I really don't like using dmesg
for user error messages :)

> > +                               n, ci.engine_class, ci.engine_instance);
> > +                     __free_engines(set.engines, n);
> > +                     return -ENOENT;
> > +             }
> > +
> > +             set.engines->engines[n] = intel_context_create(ctx, engine);
> > +             if (!set.engines->engines[n]) {
> > +                     __free_engines(set.engines, n);
> > +                     return -ENOMEM;
> > +             }
> > +     }
> > +     set.engines->num_engines = num_engines;
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> > +             free_engines(set.engines);
> > +             return err;
> > +     }
> > +
> > +replace:
> > +     mutex_lock(&ctx->i915->drm.struct_mutex);
> > +     if (args->size)
> > +             i915_gem_context_set_user_engines(ctx);
> > +     else
> > +             i915_gem_context_clear_user_engines(ctx);
> > +     rcu_swap_protected(ctx->engines, set.engines, 1);
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> 
> Hm so what is the new engine list mutex for?

I missed replacing this with engine_mutex. It used to be required for
execbuf serialisation, however that is fine now with the RCU lookup so
we only need mutex around the assignment and prolonged access.

> > +     INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
> > +     queue_rcu_work(system_wq, &set.engines->rcu);
> > +
> > +     return 0;
> > +}
> > +
> > +static int
> > +get_engines(struct i915_gem_context *ctx,
> > +         struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct i915_gem_engines *e;
> > +     size_t n, count, size;
> > +     int err = 0;
> > +
> > +     err = mutex_lock_interruptible(&ctx->engines_mutex);
> > +     if (err)
> > +             return err;
> > +
> > +     if (!i915_gem_context_user_engines(ctx)) {
> > +             args->size = 0;
> > +             goto unlock;
> > +     }
> > +
> > +     e = i915_gem_context_engine_list(ctx);
> > +     count = e->num_engines;
> > +
> > +     /* Be paranoid in case we have an impedance mismatch */
> > +     if (!check_struct_size(user, engines, count, &size)) {
> > +             err = -ENOMEM;
> > +             goto unlock;
> > +     }
> > +     if (unlikely(overflows_type(size, args->size))) {
> 
> I wouldn't bother with unlikely, none of the other failure modes does.

check_struct_size is unlikely, we can just push it into overflows_type I
guess.

> 
> > +             err = -ENOMEM;
> > +             goto unlock;
> 
> Or abuse a bit -E2BIG for the two?

No, the other choice here is -EINVAL.

> > +     }
> > +
> > +     if (!args->size) {
> > +             args->size = size;
> > +             goto unlock;
> > +     }
> > +
> > +     if (args->size < size) {
> > +             err = -EINVAL;
> > +             goto unlock;
> > +     }
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     if (!access_ok(user, size)) {
> > +             err = -EFAULT;
> > +             goto unlock;
> > +     }
> > +
> > +     if (put_user(0, &user->extensions)) {
> > +             err = -EFAULT;
> 
> Hm why?

Why an error for failing to write into the user struct? Or why are there
no extensions?

> > +             goto unlock;
> > +     }
> > +
> > +     for (n = 0; n < count; n++) {
> > +             struct i915_engine_class_instance ci = {
> > +                     .engine_class = I915_ENGINE_CLASS_INVALID,
> > +                     .engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
> > +             };
> > +
> > +             if (e->engines[n]) {
> > +                     ci.engine_class = e->engines[n]->engine->uabi_class;
> > +                     ci.engine_instance = e->engines[n]->engine->instance;
> > +             }
> 
> I would be tempted to put the invalid initializers into the else block.

I'd be tempted too, but I liked the look of the 3 stanzas :)

> > +
> > +             if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
> 
> __copy_to_user after access_ok passed? Or there were some changes in 
> this area?

Just not expecting a hot ioctl, so I wasn't worried about a bit of
redundancy so chose not to argue why the "unsafe" variants are ok.

> > diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> > index 33e5a0e36e75..5284b50a4751 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> > @@ -139,6 +139,14 @@ struct i915_gem_context {
> >   #define CONTEXT_BANNED                      0
> >   #define CONTEXT_CLOSED                      1
> >   #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> > +#define CONTEXT_USER_ENGINES         3
> > +
> > +     /**
> > +      * @num_engines: Number of user defined engines for this context
> > +      *
> > +      * See @engines for the elements.
> > +      */
> > +     unsigned int num_engines;
> 
> Where is this set or used?

Tasty leftovers.
-Chris