[Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Mon Nov 18 11:14:12 UTC 2019


Hi Chris,

Only some minor comments from me, mostly out of my curiosity.

On Friday, November 15, 2019 5:05:45 PM CET Chris Wilson wrote:
> No good reason why we must always use a static ringsize, so let
> userspace select one during construction.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c | 139 +++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_lrc.c         |   1 +
>  include/uapi/drm/i915_drm.h                 |  19 +++
>  3 files changed, 153 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 1284f47303fa..ee9a4634fed8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -597,23 +597,30 @@ __create_context(struct drm_i915_private *i915)
>  	return ERR_PTR(err);
>  }
>  
> -static void
> +static int
>  context_apply_all(struct i915_gem_context *ctx,
> -		  void (*fn)(struct intel_context *ce, void *data),
> +		  int (*fn)(struct intel_context *ce, void *data),
>  		  void *data)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> +	int err = 0;
>  
> -	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
> -		fn(ce, data);
> +	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> +		err = fn(ce, data);
> +		if (err)
> +			break;
> +	}
>  	i915_gem_context_unlock_engines(ctx);
> +
> +	return err;
>  }
>  
> -static void __apply_ppgtt(struct intel_context *ce, void *vm)
> +static int __apply_ppgtt(struct intel_context *ce, void *vm)
>  {
>  	i915_vm_put(ce->vm);
>  	ce->vm = i915_vm_get(vm);
> +	return 0;
>  }
>  
>  static struct i915_address_space *
> @@ -651,9 +658,10 @@ static void __set_timeline(struct intel_timeline **dst,
>  		intel_timeline_put(old);
>  }
>  
> -static void __apply_timeline(struct intel_context *ce, void *timeline)
> +static int __apply_timeline(struct intel_context *ce, void *timeline)
>  {
>  	__set_timeline(&ce->timeline, timeline);
> +	return 0;
>  }
>  
>  static void __assign_timeline(struct i915_gem_context *ctx,
> @@ -1223,6 +1231,104 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
>  	return err;
>  }
>  
> +static int __apply_ringsize(struct intel_context *ce, void *sz)
> +{
> +	int err;
> +
> +	err = i915_active_wait(&ce->active);
> +	if (err < 0)
> +		return err;
> +
> +	if (intel_context_lock_pinned(ce))
> +		return -EINTR;
> +
> +	if (intel_context_is_pinned(ce)) {
> +		err = -EBUSY; /* In active use, come back later! */
> +		goto unlock;
> +	}
> +
> +	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
> +		struct intel_ring *ring;
> +
> +		/* Replace the existing ringbuffer */
> +		ring = intel_engine_create_ring(ce->engine,
> +						(unsigned long)sz);
> +		if (IS_ERR(ring)) {
> +			err = PTR_ERR(ring);
> +			goto unlock;
> +		}
> +
> +		intel_ring_put(ce->ring);
> +		ce->ring = ring;
> +
> +		/* Context image will be updated on next pin */
> +	} else {
> +		ce->ring = sz;
> +	}
> +
> +unlock:
> +	intel_context_unlock_pinned(ce);
> +	return err;
> +}

I'm wondering if this function (and __get_ringsize() below as well), with its 
dependency on intel_context internals, especially on that dual meaning of 
ce->ring which depends on (ce->flags & CONTEXT_ALLOC_BIT), would better fit 
into drivers/gpu/drm/i915/gt/intel_context.c.

> +
> +static int set_ringsize(struct i915_gem_context *ctx,
> +			struct drm_i915_gem_context_param *args)
> +{
> +	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> +		return -ENODEV;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
> +		return -EINVAL;
> +
> +	if (args->value < I915_GTT_PAGE_SIZE)
> +		return -EINVAL;
> +
> +	if (args->value > 128 * I915_GTT_PAGE_SIZE)
> +		return -EINVAL;
> +
> +	return context_apply_all(ctx,
> +				 __apply_ringsize,
> +				 __intel_context_ring_size(args->value));
> +}
> +
> +static int __get_ringsize(struct intel_context *ce, void *arg)
> +{
> +	int num_pages;
> +
> +	if (intel_context_lock_pinned(ce))
> +		return -EINTR;
> +
> +	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
> +		num_pages = ce->ring->size / I915_GTT_PAGE_SIZE;
> +	else
> +		num_pages = (uintptr_t)ce->ring / I915_GTT_PAGE_SIZE;
> +
> +	intel_context_unlock_pinned(ce);
> +	return num_pages; /* stop on first engine */

Location of this comment seems not perfect to me as it is not quite obvious 
how that works without examining how this function is used, but having spent a 
while looking around, I'm not able to suggest a better place.

> +}
> +
> +static int get_ringsize(struct i915_gem_context *ctx,
> +			struct drm_i915_gem_context_param *args)
> +{
> +	int num_pages;
> +
> +	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> +		return -ENODEV;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	num_pages = context_apply_all(ctx, __get_ringsize, NULL);
> +	if (num_pages < 0)
> +		return num_pages;
> +
> +	args->value = (u64)num_pages * I915_GTT_PAGE_SIZE;

Do you convert to num_pages inside __get_ringsize() then back to size in bytes 
to avoid an overflow?  Or any other reason?  Something that may be useful in 
the future?

> +	return 0;
> +}
> +
>  static int gen8_emit_rpcs_config(struct i915_request *rq,
>  				 struct intel_context *ce,
>  				 struct intel_sseu sseu)
> @@ -1935,6 +2041,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
>  		ret = set_persistence(ctx, args);
>  		break;
>  
> +	case I915_CONTEXT_PARAM_RINGSIZE:
> +		ret = set_ringsize(ctx, args);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> @@ -2003,6 +2113,19 @@ static int clone_engines(struct i915_gem_context *dst,
>  			__free_engines(clone, n);
>  			goto err_unlock;
>  		}
> +
> +		/* Copy across the preferred ringsize */
> +		clone->engines[n]->ring = e->engines[n]->ring;
> +		if (test_bit(CONTEXT_ALLOC_BIT, &e->engines[n]->flags)) {
> +			if (intel_context_lock_pinned(e->engines[n])) {
> +				__free_engines(clone, n + 1);
> +				goto err_unlock;
> +			}
> +
> +			clone->engines[n]->ring =
> +				__intel_context_ring_size(e->engines[n]->ring->size);
> +			intel_context_unlock_pinned(e->engines[n]);
> +		}

Another candidate for a helper located in 
drivers/gpu/drm/i915/gt/intel_context.c?

>  	}
>  	clone->num_engines = n;
>  
> @@ -2366,6 +2489,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>  		args->value = i915_gem_context_is_persistent(ctx);
>  		break;
>  
> +	case I915_CONTEXT_PARAM_RINGSIZE:
> +		ret = get_ringsize(ctx, args);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 33ce258d484f..2649690a951e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2417,6 +2417,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
>  	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
>  	regs[CTX_RING_HEAD] = ring->head;
>  	regs[CTX_RING_TAIL] = ring->tail;
> +	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
>  
>  	/* RPCS */
>  	if (engine->class == RENDER_CLASS) {
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5400d7e057f1..ae7cd681b075 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1587,6 +1587,25 @@ struct drm_i915_gem_context_param {
>   * By default, new contexts allow persistence.
>   */
>  #define I915_CONTEXT_PARAM_PERSISTENCE	0xb
> +
> +/*
> + * I915_CONTEXT_PARAM_RINGSIZE:
> + *
> + * Sets the size of the CS ringbuffer to use for logical ring contexts. This
> + * applies a limit of how many batches can be queued to HW before the caller
> + * is blocked due to lack of space for more commands.
> + *
> + * Only reliably possible to be set prior to first use, i.e. during
> + * construction. At any later point, the current execution must be flushed as
> + * the ring can only be changed while the context is idle.
> + *
> + * Only applies to the current set of engine and lost when those engines
> + * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
> + *
> + * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
> + * Default is 16 KiB.
> + */
> +#define I915_CONTEXT_PARAM_RINGSIZE	0xc

I know it looked like that already before, but having other documented flags 
separated by blank lines from each other, Is there any reason for not putting 
another blank line after the last one?

>  /* Must be kept compact -- no holes and well documented */
>  
>  	__u64 value;
> 

None of the above comments are essential so with or without them addressed:
Reviewed-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>

Thanks,
Janusz







More information about the Intel-gfx mailing list