[Intel-gfx] [PATCH 12/39] drm/i915: Allow a context to define its set of engines

Chris Wilson chris at chris-wilson.co.uk
Thu Mar 14 17:15:30 UTC 2019


Quoting Tvrtko Ursulin (2019-03-14 16:47:05)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> > 
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> > 
> > v2: Fixup freeing of local on success of get_engines()
> > v3: Allow empty engines[]
> > 
> > Testcase: igt/gem_ctx_engines
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c       | 223 +++++++++++++++++-
> >   drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  19 +-
> >   drivers/gpu/drm/i915/i915_utils.h             |  23 ++
> >   include/uapi/drm/i915_drm.h                   |  42 +++-
> >   5 files changed, 298 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index bac548584091..07377b75b563 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -86,7 +86,9 @@
> >    */
> >   
> >   #include <linux/log2.h>
> > +
> >   #include <drm/i915_drm.h>
> > +
> >   #include "i915_drv.h"
> >   #include "i915_globals.h"
> >   #include "i915_trace.h"
> > @@ -101,6 +103,21 @@ static struct i915_global_gem_context {
> >       struct kmem_cache *slab_luts;
> >   } global;
> >   
> > +static struct intel_engine_cs *
> > +lookup_user_engine(struct i915_gem_context *ctx,
> > +                unsigned long flags, u16 class, u16 instance)
> 
> You didn't like reducing flags to 32-bit for now?
> 
> > +#define LOOKUP_USER_INDEX BIT(0)

No, it's just a register and ^ implies unsigned long.

> > +{
> > +     if (flags & LOOKUP_USER_INDEX) {
> > +             if (instance >= ctx->nengine)
> > +                     return NULL;
> > +
> > +             return ctx->engines[instance];
> > +     }
> > +
> > +     return intel_engine_lookup_user(ctx->i915, class, instance);
> > +}
> > +
> >   struct i915_lut_handle *i915_lut_handle_alloc(void)
> >   {
> >       return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> > @@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >       release_hw_id(ctx);
> >       i915_ppgtt_put(ctx->ppgtt);
> >   
> > +     kfree(ctx->engines);
> > +
> >       rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> >               intel_context_put(it);
> >   
> > @@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
> >       if (user_sseu.flags || user_sseu.rsvd)
> >               return -EINVAL;
> >   
> > -     engine = intel_engine_lookup_user(i915,
> > -                                       user_sseu.engine_class,
> > -                                       user_sseu.engine_instance);
> > +     engine = lookup_user_engine(ctx, 0,
> > +                                 user_sseu.engine_class,
> > +                                 user_sseu.engine_instance);
> >       if (!engine)
> >               return -EINVAL;
> >   
> > @@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
> >   
> >       args->size = sizeof(user_sseu);
> >   
> > +     return 0;
> > +};
> > +
> > +struct set_engines {
> > +     struct i915_gem_context *ctx;
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> > +
> > +static int
> > +set_engines(struct i915_gem_context *ctx,
> > +         const struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct set_engines set = { .ctx = ctx };
> > +     u64 size, extensions;
> > +     unsigned int n;
> > +     int err;
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     size = args->size;
> > +     if (!size)
> > +             goto out;
> > +
> > +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> > +     if (size < sizeof(*user) ||
> > +         !IS_ALIGNED(size, sizeof(*user->class_instance)))
> > +             return -EINVAL;
> > +
> > +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> > +     if (set.nengine > I915_EXEC_RING_MASK + 1)
> > +             return -EINVAL;
> > +
> > +     set.engines = kmalloc_array(set.nengine,
> > +                                 sizeof(*set.engines),
> > +                                 GFP_KERNEL);
> > +     if (!set.engines)
> > +             return -ENOMEM;
> > +
> > +     for (n = 0; n < set.nengine; n++) {
> > +             u16 class, inst;
> > +
> > +             if (get_user(class, &user->class_instance[n].engine_class) ||
> > +                 get_user(inst, &user->class_instance[n].engine_instance)) {
> > +                     kfree(set.engines);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> > +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> > +                     set.engines[n] = NULL;
> > +                     continue;
> > +             }
> > +
> > +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> > +             if (!set.engines[n]) {
> > +                     kfree(set.engines);
> > +                     return -ENOENT;
> > +             }
> > +     }
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> > +             kfree(set.engines);
> > +             return err;
> > +     }
> > +
> > +out:
> > +     mutex_lock(&ctx->i915->drm.struct_mutex);
> > +     kfree(ctx->engines);
> > +     ctx->engines = set.engines;
> > +     ctx->nengine = set.nengine;
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> >       return 0;
> >   }
> >   
> > +static int
> > +get_engines(struct i915_gem_context *ctx,
> > +         struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines *local;
> > +     size_t n, count, size;
> > +     int err = 0;
> > +
> > +restart:
> > +     if (!READ_ONCE(ctx->engines)) {
> 
> ZERO_OR_NULL_PTR?

NULL => default, size = 0;

ZERO_SIZE_PTR => nengines == 0, size = sizeof(base)

> > +             args->size = 0;
> > +             return 0;
> > +     }
> > +
> > +     count = READ_ONCE(ctx->nengine);
> > +
> > +     /* Be paranoid in case we have an impedance mismatch */
> > +     if (!check_struct_size(local, class_instance, count, &size))
> > +             return -ENOMEM;
> > +     if (unlikely(overflows_type(size, args->size)))
> > +             return -ENOMEM;
> > +
> > +     if (!args->size) {
> > +             args->size = size;
> > +             return 0;
> > +     }
> > +     if (args->size < size)
> > +             return -EINVAL;
> 
> else if would read nicer if no blank line.

Blank line it is.

> > +
> > +     local = kmalloc(size, GFP_KERNEL);
> > +     if (!local)
> > +             return -ENOMEM;
> > +
> > +     if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
> > +             err = -EINTR;
> > +             goto out;
> > +     }
> > +
> > +     if (!ctx->engines || ctx->nengine != count) {
> 
> Count check would be sufficient AFAICT.

Not quite. We may have gone from no engines back to default config
before taking the lock, so count would be the same, but args->size
should report differently.

> > +             mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +             kfree(local);
> > +             goto restart;
> > +     }
> > +
> > +     local->extensions = 0;
> > +     for (n = 0; n < count; n++) {
> > +             if (ctx->engines[n]) {
> > +                     local->class_instance[n].engine_class =
> > +                             ctx->engines[n]->uabi_class;
> > +                     local->class_instance[n].engine_instance =
> > +                             ctx->engines[n]->instance;
> > +             } else {
> > +                     local->class_instance[n].engine_class =
> > +                             I915_ENGINE_CLASS_INVALID;
> > +                     local->class_instance[n].engine_instance =
> > +                             I915_ENGINE_CLASS_INVALID_NONE;
> > +             }
> > +     }
> > +
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> > +     if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
> > +             err = -EFAULT;
> > +             goto out;
> > +     }
> > +     args->size = size;
> > +
> > +out:
> > +     kfree(local);
> > +     return err;
> > +}
> > +
> >   static int ctx_setparam(struct i915_gem_context *ctx,
> >                       struct drm_i915_gem_context_param *args)
> >   {
> > @@ -1466,6 +1639,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
> >               ret = set_ppgtt(ctx, args);
> >               break;
> >   
> > +     case I915_CONTEXT_PARAM_ENGINES:
> > +             ret = set_engines(ctx, args);
> > +             break;
> > +
> >       case I915_CONTEXT_PARAM_BAN_PERIOD:
> >       default:
> >               ret = -EINVAL;
> > @@ -1519,6 +1696,30 @@ static int clone_sseu(struct i915_gem_context *dst,
> >       return 0;
> >   }
> >   
> > +static int clone_engines(struct i915_gem_context *dst,
> > +                      struct i915_gem_context *src)
> > +{
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +
> > +     mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
> > +     nengine = src->nengine;
> > +     if (!ZERO_OR_NULL_PTR(src->engines))
> 
> A comment taking note of the reason for the ZERO_PTR trick.

I must have deleted /* XXX why kmemdup, why? */

> > +             engines = kmemdup(src->engines,
> > +                               sizeof(*src->engines) * nengine,
> > +                               GFP_KERNEL);
> > +     else
> > +             engines = src->engines;
> > +     mutex_unlock(&src->i915->drm.struct_mutex);
> > +     if (!engines && nengine)
> > +             return -ENOMEM;
> > +
> > +     kfree(dst->engines);
> > +     dst->engines = engines;
> > +     dst->nengine = nengine;
> > +     return 0;
> > +}
> > +
> >   static int create_clone(struct i915_user_extension __user *ext, void *data)
> >   {
> >       struct drm_i915_gem_context_create_ext_clone local;
> > @@ -1587,6 +1788,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
> >               }
> >       }
> >   
> > +     if (local.flags & I915_CONTEXT_CLONE_ENGINES) {
> > +             err = clone_engines(dst, src);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> >       return 0;
> >   }
> >   
> > @@ -1705,9 +1912,9 @@ static int get_sseu(struct i915_gem_context *ctx,
> >       if (user_sseu.flags || user_sseu.rsvd)
> >               return -EINVAL;
> >   
> > -     engine = intel_engine_lookup_user(ctx->i915,
> > -                                       user_sseu.engine_class,
> > -                                       user_sseu.engine_instance);
> > +     engine = lookup_user_engine(ctx, 0,
> > +                                 user_sseu.engine_class,
> > +                                 user_sseu.engine_instance);
> 
> Belongs to a later patch but OK.

Hmm, you'd rather have it later. I felt introducing the concept that we
have ctx->engines[] implied we should also be aware when looking up
engines in a context related fashion. I felt they were connected enough
that the interface deserved to be in this patch with use through the
uAPI later.

> > @@ -110,6 +112,8 @@ struct i915_gem_context {
> >   #define CONTEXT_CLOSED                      1
> >   #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> >   
> > +     unsigned int nengine;
> 
> Can I tempt you to pluralise this? I at least find it more obvious in 
> plural.

Bah, historical precedent is
int   nengine;
void *engines;

> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index ee6d301a9627..70a26f0a9f1e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -2090,13 +2090,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> >   };
> >   
> >   static struct intel_engine_cs *
> > -eb_select_engine(struct drm_i915_private *dev_priv,
> > +eb_select_engine(struct i915_execbuffer *eb,
> >                struct drm_file *file,
> >                struct drm_i915_gem_execbuffer2 *args)
> >   {
> >       unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> >       struct intel_engine_cs *engine;
> >   
> > +     if (eb->ctx->engines) {
> > +             if (user_ring_id >= eb->ctx->nengine)
> 
> So empty engine array blocks off submission. It's probably okay, just 
> need to remember throughout the code not to check "if (ctx->engines)" 
> only. I guess someone will get burnt on that sooner or later, but I 
> don't think it's a big deal.

But as you say, it should only matter at the uAPI boundary. After this
point, we have our intel_engine_cs assigned; and hopefully we end up
with intel_context as being our primarily source of truth internally.

The gem_context should only be used in the GEM boundary, so expect some
more cleanup!

> > +static inline bool
> > +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> > +{
> > +     size_t sz;
> > +
> > +     if (check_mul_overflow(count, arr, &sz))
> > +             return false;
> > +
> > +     if (check_add_overflow(sz, base, &sz))
> > +             return false;
> > +
> > +     *size = sz;
> > +     return true;
> > +}
> > +
> > +#define check_struct_size(T, A, C, SZ) \
> > +     likely(__check_struct_size(sizeof(*(T)), \
> > +                                sizeof(*(T)->A) + __must_be_array((T)->A), \
> > +                                C, SZ))
> 
> A comment explaining usage would be nice.

/* see struct_size() */
-Chris


More information about the Intel-gfx mailing list