[Intel-gfx] [PATCH 37/40] drm/i915: Allow a context to define its set of engines

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Sep 27 11:28:47 UTC 2018


On 19/09/2018 20:55, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
>   include/uapi/drm/i915_drm.h                | 27 +++++++
>   4 files changed, 135 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a8570a07b3b7..313471253f51 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,6 +90,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "i915_user_extensions.h"
>   #include "intel_workarounds.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> +	kfree(ctx->engines);
> +
>   	if (ctx->timeline)
>   		i915_timeline_put(ctx->timeline);
>   
> @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};

This is OK unless someone one day gets the desire to make the extension 
namespace sparse. I was thinking on how to put some warnings in the code 
to warn about it, but I think that's for later. Namespace is also per 
parent ioctl, another thing which would perhaps need enforcing.

> +
> +static int set_engines(struct i915_gem_context *ctx,
> +		       struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct set_engines set = {
> +		.ctx = ctx,
> +		.engines = NULL,
> +		.nengine = -1,
> +	};
> +	unsigned int n;
> +	u64 size, extensions;

Size is u32 in the uAPI, so either that or unsigned int would do here.

> +	int err;
> +
> +	user = u64_to_user_ptr(args->value);
> +	size = args->size;
> +	if (!size)

args->sizes = sizeof(*user);

... if you want to allow size probing via set param, or we add a 
get_param (too much work for nothing?) and only allow probing from there?

> +		goto out;
> +
> +	if (size < sizeof(struct i915_context_param_engines))
> +		return -EINVAL;
> +
> +	size -= sizeof(struct i915_context_param_engines);
> +	if (size % sizeof(*user->class_instance))
> +		return -EINVAL;
> +
> +	set.nengine = size / sizeof(*user->class_instance);
> +	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
> +		return -EINVAL;
> +
> +	set.engines = kmalloc_array(set.nengine + 1,
> +				    sizeof(*set.engines),
> +				    GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	set.engines[0] = NULL;

/* Reserve the I915_EXEC_DEFAULT slot. */

> +	for (n = 0; n < set.nengine; n++) {
> +		u32 class, instance;

I will later recommend we use u16 for class/instance. I think we settled 
for that in the meantime.

> +
> +		if (get_user(class, &user->class_instance[n].class) ||
> +		    get_user(instance, &user->class_instance[n].instance)) {
> +			kfree(set.engines);
> +			return -EFAULT;
> +		}
> +
> +		set.engines[n + 1] =
> +			intel_engine_lookup_user(ctx->i915, class, instance);
> +		if (!set.engines[n + 1]) {
> +			kfree(set.engines);
> +			return -ENOENT;
> +		}
> +	}
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {

Do we need an user extensions destructor table for a more 
compartmentalized design? This actually applies to the 
i915_user_extensions() implementation.. it would need to take two 
function tables I think and unwind until the one that failed.

> +		kfree(set.engines);
> +		return err;
> +	}
> +
> +out:
> +	kfree(ctx->engines);
> +	ctx->engines = set.engines;
> +	ctx->nengine = set.nengine + 1;
> +
> +	return 0;
> +}
> +
>   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 770043449ba6..9f89119a6566 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -67,6 +67,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct intel_engine_cs **engines;
> +
>   	struct i915_timeline *timeline;
>   
>   	/**
> @@ -135,6 +137,8 @@ struct i915_gem_context {
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   
> +	unsigned int nengine;

Why put related fields apart?

> +
>   	/**
>   	 * @hw_id: - unique identifier for the context
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 109486a6ef07..faedfdca875d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>   };
>   
>   static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
>   		 struct drm_file *file,
>   		 struct drm_i915_gem_execbuffer2 *args)
>   {
>   	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>   	struct intel_engine_cs *engine;
>   
> +	if (eb->ctx->engines) {
> +		if (user_ring_id >= eb->ctx->nengine) {
> +			DRM_DEBUG("execbuf with unknown ring: %u\n",
> +				  user_ring_id);
> +			return NULL;
> +		}
> +
> +		return eb->ctx->engines[user_ring_id];
> +	}
> +
>   	if (user_ring_id > I915_USER_RINGS) {
>   		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>   		return NULL;
> @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   		return NULL;
>   	}
>   
> -	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
>   		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>   
>   		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> -			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> +			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>   		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>   			   bsd_idx <= I915_EXEC_BSD_RING2) {
>   			bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   			return NULL;
>   		}
>   
> -		engine = dev_priv->engine[_VCS(bsd_idx)];
> +		engine = eb->i915->engine[_VCS(bsd_idx)];
>   	} else {
> -		engine = dev_priv->engine[user_ring_map[user_ring_id]];
> +		engine = eb->i915->engine[user_ring_map[user_ring_id]];
>   	}
>   
>   	if (!engine) {
> @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_destroy;
>   
> -	eb.engine = eb_select_engine(eb.i915, file, args);
> +	eb.engine = eb_select_engine(&eb, file, args);
>   	if (!eb.engine) {
>   		err = -EINVAL;
>   		goto err_engine;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 65b0b84419f3..d41b4c673af4 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. The array created is offset by 1, such that by default
> + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
> + * are then filled in using the specified (class, instance).

Should we allow specifying the default index? I am wondering if that 
would make life easier to any userspace.

> + *
> + * Setting the number of engines bound to the context will revert back to
> + * default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0x7
> +
>   	__u64 value;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions;
> +
> +	struct {
> +		__u32 class; /* see enum drm_i915_gem_engine_class */
> +		__u32 instance;

u16 here as mentioned earlier.

> +	} class_instance[0];
> +};
> +
>   enum drm_i915_oa_format {
>   	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
>   	I915_OA_FORMAT_A29,	    /* HSW only */
> 

Regards,

Tvrtko


More information about the Intel-gfx mailing list