[Intel-gfx] [PATCH 13/18] drm/i915: Allow a context to define its set of engines
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Mar 20 13:20:25 UTC 2019
On 19/03/2019 11:57, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
>
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
>
> v2: Fixup freeing of local on success of get_engines()
> v3: Allow empty engines[]
> v4: s/nengine/num_engines/
>
> Testcase: igt/gem_ctx_engines
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_context.c | 226 +++++++++++++++++-
> drivers/gpu/drm/i915/i915_gem_context_types.h | 21 ++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +-
> drivers/gpu/drm/i915/i915_utils.h | 36 +++
> include/uapi/drm/i915_drm.h | 42 +++-
> 5 files changed, 331 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index f36648329074..f038c15e73d8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,7 +86,9 @@
> */
>
> #include <linux/log2.h>
> +
> #include <drm/i915_drm.h>
> +
> #include "i915_drv.h"
> #include "i915_globals.h"
> #include "i915_trace.h"
> @@ -101,6 +103,21 @@ static struct i915_global_gem_context {
> struct kmem_cache *slab_luts;
> } global;
>
> +static struct intel_engine_cs *
> +lookup_user_engine(struct i915_gem_context *ctx,
> + unsigned long flags, u16 class, u16 instance)
> +#define LOOKUP_USER_INDEX BIT(0)
> +{
> + if (flags & LOOKUP_USER_INDEX) {
> + if (instance >= ctx->num_engines)
> + return NULL;
> +
> + return ctx->engines[instance];
> + }
> +
> + return intel_engine_lookup_user(ctx->i915, class, instance);
> +}
> +
> struct i915_lut_handle *i915_lut_handle_alloc(void)
> {
> return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> @@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> release_hw_id(ctx);
> i915_ppgtt_put(ctx->ppgtt);
>
> + kfree(ctx->engines);
> +
> rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> intel_context_put(it);
>
> @@ -1377,9 +1396,9 @@ static int set_sseu(struct i915_gem_context *ctx,
> if (user_sseu.flags || user_sseu.rsvd)
> return -EINVAL;
>
> - engine = intel_engine_lookup_user(i915,
> - user_sseu.engine_class,
> - user_sseu.engine_instance);
> + engine = lookup_user_engine(ctx, 0,
> + user_sseu.engine_class,
> + user_sseu.engine_instance);
> if (!engine)
> return -EINVAL;
>
> @@ -1397,9 +1416,166 @@ static int set_sseu(struct i915_gem_context *ctx,
>
> args->size = sizeof(user_sseu);
>
> + return 0;
> +};
> +
> +struct set_engines {
> + struct i915_gem_context *ctx;
> + struct intel_engine_cs **engines;
> + unsigned int num_engines;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};
> +
> +static int
> +set_engines(struct i915_gem_context *ctx,
> + const struct drm_i915_gem_context_param *args)
> +{
> + struct i915_context_param_engines __user *user;
> + struct set_engines set = { .ctx = ctx };
> + u64 size, extensions;
> + unsigned int n;
> + int err;
> +
> + user = u64_to_user_ptr(args->value);
> + size = args->size;
> + if (!size)
> + goto out;
> +
> + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> + if (size < sizeof(*user) ||
> + !IS_ALIGNED(size, sizeof(*user->class_instance)))
> + return -EINVAL;
> +
> + /* Internal limitation of u64 bitmaps + a few bits of u64 in the uABI */
> + set.num_engines =
> + (size - sizeof(*user)) / sizeof(*user->class_instance);
> + if (set.num_engines > I915_EXEC_RING_MASK + 1)
> + return -EINVAL;
> +
> + set.engines = kmalloc_array(set.num_engines,
> + sizeof(*set.engines),
> + GFP_KERNEL);
> + if (!set.engines)
> + return -ENOMEM;
> +
> + for (n = 0; n < set.num_engines; n++) {
> + u16 class, inst;
> +
> + if (get_user(class, &user->class_instance[n].engine_class) ||
> + get_user(inst, &user->class_instance[n].engine_instance)) {
> + kfree(set.engines);
> + return -EFAULT;
> + }
> +
> + if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> + inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> + set.engines[n] = NULL;
> + continue;
> + }
> +
> + set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> + if (!set.engines[n]) {
> + kfree(set.engines);
> + return -ENOENT;
> + }
> + }
> +
> + err = -EFAULT;
> + if (!get_user(extensions, &user->extensions))
> + err = i915_user_extensions(u64_to_user_ptr(extensions),
> + set_engines__extensions,
> + ARRAY_SIZE(set_engines__extensions),
> + &set);
> + if (err) {
> + kfree(set.engines);
> + return err;
> + }
> +
> +out:
> + mutex_lock(&ctx->i915->drm.struct_mutex);
> + kfree(ctx->engines);
> + ctx->engines = set.engines;
> + ctx->num_engines = set.num_engines;
> + mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> return 0;
> }
>
> +static int
> +get_engines(struct i915_gem_context *ctx,
> + struct drm_i915_gem_context_param *args)
> +{
> + struct i915_context_param_engines *local;
> + size_t n, count, size;
> + int err = 0;
> +
> +restart:
> + if (!READ_ONCE(ctx->engines)) {
> + args->size = 0;
> + return 0;
> + }
> +
> + count = READ_ONCE(ctx->num_engines);
> +
> + /* Be paranoid in case we have an impedance mismatch */
> + if (!check_struct_size(local, class_instance, count, &size))
> + return -ENOMEM;
> + if (unlikely(overflows_type(size, args->size)))
> + return -ENOMEM;
> +
> + if (!args->size) {
> + args->size = size;
> + return 0;
> + }
> +
> + if (args->size < size)
> + return -EINVAL;
> +
> + local = kmalloc(size, GFP_KERNEL);
> + if (!local)
> + return -ENOMEM;
> +
> + if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
> + err = -EINTR;
> + goto out;
> + }
> +
> + if (!ctx->engines || ctx->num_engines != count) {
> + mutex_unlock(&ctx->i915->drm.struct_mutex);
> + kfree(local);
> + goto restart;
> + }
> +
> + local->extensions = 0;
> + for (n = 0; n < count; n++) {
> + if (ctx->engines[n]) {
> + local->class_instance[n].engine_class =
> + ctx->engines[n]->uabi_class;
> + local->class_instance[n].engine_instance =
> + ctx->engines[n]->instance;
> + } else {
> + local->class_instance[n].engine_class =
> + I915_ENGINE_CLASS_INVALID;
> + local->class_instance[n].engine_instance =
> + I915_ENGINE_CLASS_INVALID_NONE;
> + }
> + }
> +
> + mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> + if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
> + err = -EFAULT;
> + goto out;
> + }
> + args->size = size;
> +
> +out:
> + kfree(local);
> + return err;
> +}
> +
> static int ctx_setparam(struct i915_gem_context *ctx,
> struct drm_i915_gem_context_param *args)
> {
> @@ -1472,6 +1648,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
> ret = set_ppgtt(ctx, args);
> break;
>
> + case I915_CONTEXT_PARAM_ENGINES:
> + ret = set_engines(ctx, args);
> + break;
> +
> case I915_CONTEXT_PARAM_BAN_PERIOD:
> default:
> ret = -EINVAL;
> @@ -1500,6 +1680,35 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
> return ctx_setparam(arg->ctx, &local.param);
> }
>
> +static int clone_engines(struct i915_gem_context *dst,
> + struct i915_gem_context *src)
> +{
> + struct intel_engine_cs **engines;
> + unsigned int num_engines;
> +
> + mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
> +
> + /* handle ZERO_SIZE_PTR on behalf of kmemdup */
> + num_engines = src->num_engines;
> + engines = src->engines;
> + if (!ZERO_OR_NULL_PTR(engines)) {
> + engines = kmemdup(engines,
> + sizeof(*engines) * num_engines,
> + GFP_KERNEL);
> + if (!engines) {
> + mutex_unlock(&src->i915->drm.struct_mutex);
> + return -ENOMEM;
> + }
> + }
> +
> + mutex_unlock(&src->i915->drm.struct_mutex);
> +
> + kfree(dst->engines);
> + dst->engines = engines;
> + dst->num_engines = num_engines;
> + return 0;
> +}
> +
> static int clone_flags(struct i915_gem_context *dst,
> struct i915_gem_context *src)
> {
> @@ -1608,6 +1817,7 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
> static int (* const fn[])(struct i915_gem_context *dst,
> struct i915_gem_context *src) = {
> #define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
> + MAP(ENGINES, clone_engines),
> MAP(FLAGS, clone_flags),
> MAP(SCHEDATTR, clone_schedattr),
> MAP(SSEU, clone_sseu),
> @@ -1770,9 +1980,9 @@ static int get_sseu(struct i915_gem_context *ctx,
> if (user_sseu.flags || user_sseu.rsvd)
> return -EINVAL;
>
> - engine = intel_engine_lookup_user(ctx->i915,
> - user_sseu.engine_class,
> - user_sseu.engine_instance);
> + engine = lookup_user_engine(ctx, 0,
> + user_sseu.engine_class,
> + user_sseu.engine_instance);
> if (!engine)
> return -EINVAL;
>
> @@ -1853,6 +2063,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> ret = get_ppgtt(ctx, args);
> break;
>
> + case I915_CONTEXT_PARAM_ENGINES:
> + ret = get_engines(ctx, args);
> + break;
> +
> case I915_CONTEXT_PARAM_BAN_PERIOD:
> default:
> ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index e2ec58b10fb2..46b6080b2240 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -41,6 +41,20 @@ struct i915_gem_context {
> /** file_priv: owning file descriptor */
> struct drm_i915_file_private *file_priv;
>
> + /**
> + * @engines: User defined engines for this context
> + *
> + * NULL means to use legacy definitions (including random meaning of
> + * I915_EXEC_BSD with I915_EXEC_BSD_SELECTOR overrides).
> + *
> + * If defined, execbuf uses the I915_EXEC_MASK as an index into
> + * array, and various uAPI other the ability to lookup up an
> + * index from this array to select an engine operate on.
> + *
> + * User defined by I915_CONTEXT_PARAM_ENGINE.
> + */
> + struct intel_engine_cs **engines;
> +
> struct i915_timeline *timeline;
>
> /**
> @@ -110,6 +124,13 @@ struct i915_gem_context {
> #define CONTEXT_CLOSED 1
> #define CONTEXT_FORCE_SINGLE_SUBMISSION 2
>
> + /**
> + * @num_engines: Number of user defined engines for this context
> + *
> + * See @engines for the elements.
> + */
> + unsigned int num_engines;
> +
> /**
> * @hw_id: - unique identifier for the context
> *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d672c9edb94..66b3921cc8bd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2089,13 +2089,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> };
>
> static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
> struct drm_file *file,
> struct drm_i915_gem_execbuffer2 *args)
> {
> unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> struct intel_engine_cs *engine;
>
> + if (eb->ctx->engines) {
> + if (user_ring_id >= eb->ctx->num_engines)
> + return NULL;
> +
> + return eb->ctx->engines[user_ring_id];
> + }
> +
> if (user_ring_id > I915_USER_RINGS) {
> DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> return NULL;
> @@ -2108,11 +2115,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> return NULL;
> }
>
> - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
> + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
> unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>
> if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> - bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> + bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
> } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
> bsd_idx <= I915_EXEC_BSD_RING2) {
> bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2123,9 +2130,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> return NULL;
> }
>
> - engine = dev_priv->engine[_VCS(bsd_idx)];
> + engine = eb->i915->engine[_VCS(bsd_idx)];
> } else {
> - engine = dev_priv->engine[user_ring_map[user_ring_id]];
> + engine = eb->i915->engine[user_ring_map[user_ring_id]];
> }
>
> if (!engine) {
> @@ -2335,7 +2342,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> if (unlikely(err))
> goto err_destroy;
>
> - eb.engine = eb_select_engine(eb.i915, file, args);
> + eb.engine = eb_select_engine(&eb, file, args);
> if (!eb.engine) {
> err = -EINVAL;
> goto err_engine;
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 2dbe8933b50a..1436fe2fb5f8 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -25,6 +25,9 @@
> #ifndef __I915_UTILS_H
> #define __I915_UTILS_H
>
> +#include <linux/kernel.h>
> +#include <linux/overflow.h>
> +
> #undef WARN_ON
> /* Many gcc seem to no see through this and fall over :( */
> #if 0
> @@ -73,6 +76,39 @@
> #define overflows_type(x, T) \
> (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
>
> +static inline bool
> +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> +{
> + size_t sz;
> +
> + if (check_mul_overflow(count, arr, &sz))
> + return false;
> +
> + if (check_add_overflow(sz, base, &sz))
> + return false;
> +
> + *size = sz;
> + return true;
> +}
> +
> +/**
> + * check_struct_size() - Calculate size of structure with trailing array.
> + * @p: Pointer to the structure.
> + * @member: Name of the array member.
> + * @n: Number of elements in the array.
> + * @sz: Total size of structure and array
> + *
> + * Calculates size of memory needed for structure @p followed by an
> + * array of @n @member elements, like struct_size() but reports
> + * whether it overflowed, and the resultant size in @sz
> + *
> + * Return: false if the calculation overflowed.
> + */
> +#define check_struct_size(p, member, n, sz) \
> + likely(__check_struct_size(sizeof(*(p)), \
> + sizeof(*(p)->member) + __must_be_array((p)->member), \
> + n, sz))
> +
> #define ptr_mask_bits(ptr, n) ({ \
> unsigned long __v = (unsigned long)(ptr); \
> (typeof(ptr))(__v & -BIT(n)); \
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index a5bdb86858f6..4e67c2395b46 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -126,6 +126,8 @@ enum drm_i915_gem_engine_class {
> I915_ENGINE_CLASS_INVALID = -1
> };
>
> +#define I915_ENGINE_CLASS_INVALID_NONE -1
> +
> /**
> * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
> *
> @@ -1511,6 +1513,26 @@ struct drm_i915_gem_context_param {
> * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
> */
> #define I915_CONTEXT_PARAM_VM 0x9
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. Slots 0...N are filled in using the specified (class, instance).
> + * Use
> + * engine_class: I915_ENGINE_CLASS_INVALID,
> + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE
> + * to specify a gap in the array that can be filled in later, e.g. by a
> + * virtual engine used for load balancing.
> + *
> + * Setting the number of engines bound to the context to 0, by passing a zero
> + * sized argument, will revert back to default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES 0xa
> /* Must be kept compact -- no holes and well documented */
>
> __u64 value;
> @@ -1575,6 +1597,23 @@ struct drm_i915_gem_context_param_sseu {
> __u32 rsvd;
> };
>
> +struct i915_context_param_engines {
> + __u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +
> + struct {
> + __u16 engine_class; /* see enum drm_i915_gem_engine_class */
> + __u16 engine_instance;
> + } class_instance[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
> + __u64 extensions; \
> + struct { \
> + __u16 engine_class; \
> + __u16 engine_instance; \
> + } class_instance[N__]; \
> +} __attribute__((packed)) name__
> +
> struct drm_i915_gem_context_create_ext_setparam {
> #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
> struct i915_user_extension base;
> @@ -1591,7 +1630,8 @@ struct drm_i915_gem_context_create_ext_clone {
> #define I915_CONTEXT_CLONE_SSEU (1u << 2)
> #define I915_CONTEXT_CLONE_TIMELINE (1u << 3)
> #define I915_CONTEXT_CLONE_VM (1u << 4)
> -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> +#define I915_CONTEXT_CLONE_ENGINES (1u << 5)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
> __u64 rsvd;
> };
>
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list