[Intel-gfx] [PATCH 08/13] drm/i915: Allow a context to define its set of engines
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 8 16:47:38 UTC 2019
Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>
> On 08/03/2019 14:12, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> >
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> >
> > v2: Fixup freeing of local on success of get_engines()
> >
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem_context.c | 204 +++++++++++++++++-
> > drivers/gpu/drm/i915/i915_gem_context_types.h | 4 +
> > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 +-
> > include/uapi/drm/i915_drm.h | 42 +++-
> > 4 files changed, 259 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 2cfc68b66944..86d9bea6f275 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -101,6 +101,21 @@ static struct i915_global_gem_context {
> > struct kmem_cache *slab_luts;
> > } global;
> >
> > +static struct intel_engine_cs *
> > +lookup_user_engine(struct i915_gem_context *ctx,
> > + unsigned long flags, u16 class, u16 instance)
> > +#define LOOKUP_USER_INDEX BIT(0)
> > +{
> > + if (flags & LOOKUP_USER_INDEX) {
> > + if (instance >= ctx->nengine)
> > + return NULL;
> > +
> > + return ctx->engines[instance];
> > + }
> > +
> > + return intel_engine_lookup_user(ctx->i915, class, instance);
> > +}
> > +
> > struct i915_lut_handle *i915_lut_handle_alloc(void)
> > {
> > return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> > @@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> > release_hw_id(ctx);
> > i915_ppgtt_put(ctx->ppgtt);
> >
> > + kfree(ctx->engines);
> > +
> > rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> > it->ops->destroy(it);
> >
> > @@ -1311,9 +1328,9 @@ static int set_sseu(struct i915_gem_context *ctx,
> > if (user_sseu.flags || user_sseu.rsvd)
> > return -EINVAL;
> >
> > - engine = intel_engine_lookup_user(i915,
> > - user_sseu.engine_class,
> > - user_sseu.engine_instance);
> > + engine = lookup_user_engine(ctx, 0,
> > + user_sseu.engine_class,
> > + user_sseu.engine_instance);
> > if (!engine)
> > return -EINVAL;
> >
> > @@ -1331,9 +1348,154 @@ static int set_sseu(struct i915_gem_context *ctx,
> >
> > args->size = sizeof(user_sseu);
> >
> > + return 0;
> > +};
> > +
> > +struct set_engines {
> > + struct i915_gem_context *ctx;
> > + struct intel_engine_cs **engines;
> > + unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> > +
> > +static int
> > +set_engines(struct i915_gem_context *ctx,
> > + const struct drm_i915_gem_context_param *args)
> > +{
> > + struct i915_context_param_engines __user *user;
> > + struct set_engines set = { .ctx = ctx };
> > + u64 size, extensions;
> > + unsigned int n;
> > + int err;
> > +
> > + user = u64_to_user_ptr(args->value);
> > + size = args->size;
> > + if (!size)
> > + goto out;
>
> This prevents a hypothetical extension with empty map data.
No... This is required for resetting and I think that's covered in what
little docs there are. It's the set.nengine==0 test later
that you mean to object to. But we can't do that as that's how we
differentiate between modes at the moment.
We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
> > + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> > + if (size < sizeof(*user) || size % sizeof(*user->class_instance))
>
> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
>
> > + return -EINVAL;
> > +
> > + set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> > + if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
>
> I would prefer we drop the size restriction since it doesn't apply to
> the engine map per se.
u64 is a limit that will be non-trivial to lift. Marking the limits of
the kernel doesn't restrict it being lifted later.
> > + return -EINVAL;
> > +
> > + set.engines = kmalloc_array(set.nengine,
> > + sizeof(*set.engines),
> > + GFP_KERNEL);
> > + if (!set.engines)
> > + return -ENOMEM;
> > +
> > + for (n = 0; n < set.nengine; n++) {
> > + u16 class, inst;
> > +
> > + if (get_user(class, &user->class_instance[n].engine_class) ||
> > + get_user(inst, &user->class_instance[n].engine_instance)) {
> > + kfree(set.engines);
> > + return -EFAULT;
> > + }
> > +
> > + if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> > + inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> > + set.engines[n] = NULL;
> > + continue;
> > + }
> > +
> > + set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> > + if (!set.engines[n]) {
> > + kfree(set.engines);
> > + return -ENOENT;
> > + }
> > + }
> > +
> > + err = -EFAULT;
> > + if (!get_user(extensions, &user->extensions))
> > + err = i915_user_extensions(u64_to_user_ptr(extensions),
> > + set_engines__extensions,
> > + ARRAY_SIZE(set_engines__extensions),
> > + &set);
> > + if (err) {
> > + kfree(set.engines);
> > + return err;
> > + }
> > +
> > +out:
> > + mutex_lock(&ctx->i915->drm.struct_mutex);
> > + kfree(ctx->engines);
> > + ctx->engines = set.engines;
> > + ctx->nengine = set.nengine;
> > + mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> > return 0;
> > }
> >
> > +static int
> > +get_engines(struct i915_gem_context *ctx,
> > + struct drm_i915_gem_context_param *args)
> > +{
> > + struct i915_context_param_engines *local;
> > + unsigned int n, count, size;
> > + int err = 0;
> > +
> > +restart:
> > + count = READ_ONCE(ctx->nengine);
> > + if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
> > + return -ENOMEM; /* unrepresentable! */
>
> Probably overly paranoid since we can't end up with this state set.
And I thought you wanted many engines! Paranoia around kmalloc/user
oveflows is always useful, because you know someone will send a patch
later (and smatch doesn't really care as it only checks the limits of
types and local constraints).
-Chris
More information about the Intel-gfx
mailing list