[Intel-gfx] [PATCH v2] drm/i915: Sanitize engine context sizes

Wed Apr 26 13:16:09 UTC 2017

On 26/04/2017 13:20, Joonas Lahtinen wrote:
> Pre-calculate engine context size based on engine class and device
> generation and store it in the engine instance.
>
> v2:
> 	- Squash and get rid of hw_context_size (Chris)
>
> Signed-off-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
> Cc: intel-gvt-dev at lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/gvt/scheduler.c       |  6 +--
>  drivers/gpu/drm/i915/i915_drv.h            |  1 -
>  drivers/gpu/drm/i915/i915_gem_context.c    | 59 +++-------------------
>  drivers/gpu/drm/i915/i915_guc_submission.c |  3 +-
>  drivers/gpu/drm/i915/i915_reg.h            | 10 ----
>  drivers/gpu/drm/i915/intel_engine_cs.c     | 78 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c           | 54 +--------------------
>  drivers/gpu/drm/i915/intel_lrc.h           |  2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |  7 +--
>  9 files changed, 93 insertions(+), 127 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index a77db23..ac538dc 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>  	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
>  			workload->ctx_desc.lrca);
>
> -	context_page_num = intel_lr_context_size(
> -			gvt->dev_priv->engine[ring_id]);
> +	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
>
>  	context_page_num = context_page_num >> PAGE_SHIFT;
>
> @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
>  	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
>  			workload->ctx_desc.lrca);
>
> -	context_page_num = intel_lr_context_size(
> -			gvt->dev_priv->engine[ring_id]);
> +	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
>
>  	context_page_num = context_page_num >> PAGE_SHIFT;
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 357b6c6..4b54b92 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2359,7 +2359,6 @@ struct drm_i915_private {
>  	 */
>  	struct mutex av_mutex;
>
> -	uint32_t hw_context_size;
>  	struct list_head context_list;
>
>  	u32 fdi_rx_config;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 8bd0c49..b69a026 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -92,33 +92,6 @@
>
>  #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>
> -static int get_context_size(struct drm_i915_private *dev_priv)
> -{
> -	int ret;
> -	u32 reg;
> -
> -	switch (INTEL_GEN(dev_priv)) {
> -	case 6:
> -		reg = I915_READ(CXT_SIZE);
> -		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
> -		break;
> -	case 7:
> -		reg = I915_READ(GEN7_CXT_SIZE);
> -		if (IS_HASWELL(dev_priv))
> -			ret = HSW_CXT_TOTAL_SIZE;
> -		else
> -			ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
> -		break;
> -	case 8:
> -		ret = GEN8_CXT_TOTAL_SIZE;
> -		break;
> -	default:
> -		BUG();
> -	}
> -
> -	return ret;
> -}
> -
>  void i915_gem_context_free(struct kref *ctx_ref)
>  {
>  	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
> @@ -266,11 +239,12 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>  	list_add_tail(&ctx->link, &dev_priv->context_list);
>  	ctx->i915 = dev_priv;
>
> -	if (dev_priv->hw_context_size) {
> +	if (dev_priv->engine[RCS]->context_size) {
>  		struct drm_i915_gem_object *obj;
>  		struct i915_vma *vma;
>
> -		obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
> +		obj = alloc_context_obj(dev_priv,
> +					dev_priv->engine[RCS]->context_size);
>  		if (IS_ERR(obj)) {
>  			ret = PTR_ERR(obj);
>  			goto err_out;
> @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
>  	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
>  	ida_init(&dev_priv->context_hw_ida);
>
> -	if (i915.enable_execlists) {
> -		/* NB: intentionally left blank. We will allocate our own
> -		 * backing objects as we need them, thank you very much */
> -		dev_priv->hw_context_size = 0;
> -	} else if (HAS_HW_CONTEXTS(dev_priv)) {
> -		dev_priv->hw_context_size =
> -			round_up(get_context_size(dev_priv),
> -				 I915_GTT_PAGE_SIZE);

Is this rounding up lost when used from __create_hw_context?

> -		if (dev_priv->hw_context_size > (1<<20)) {
> -			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
> -					 dev_priv->hw_context_size);
> -			dev_priv->hw_context_size = 0;
> -		}
> -	}
> -
>  	ctx = i915_gem_create_context(dev_priv, NULL);
>  	if (IS_ERR(ctx)) {
>  		DRM_ERROR("Failed to create default global context (error %ld)\n",
> @@ -478,7 +437,7 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
>
>  	DRM_DEBUG_DRIVER("%s context support initialized\n",
>  			i915.enable_execlists ? "LR" :
> -			dev_priv->hw_context_size ? "HW" : "fake");
> +			dev_priv->engine[RCS]->context_size ? "HW" : "fake");
>  	return 0;
>  }
>
> @@ -941,11 +900,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>
> -static bool contexts_enabled(struct drm_device *dev)
> -{
> -	return i915.enable_execlists || to_i915(dev)->hw_context_size;
> -}
> -
>  static bool client_is_banned(struct drm_i915_file_private *file_priv)
>  {
>  	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
> @@ -954,12 +908,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  				  struct drm_file *file)
>  {
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_gem_context_create *args = data;
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	struct i915_gem_context *ctx;
>  	int ret;
>
> -	if (!contexts_enabled(dev))
> +	if (!dev_priv->engine[RCS]->context_size)
>  		return -ENODEV;
>
>  	if (args->pad != 0)
> @@ -977,7 +932,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  	if (ret)
>  		return ret;
>
> -	ctx = i915_gem_create_context(to_i915(dev), file_priv);
> +	ctx = i915_gem_create_context(dev_priv, file_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ab5140b..6c78637 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc)
>  		dev_priv->engine[RCS]->status_page.ggtt_offset;
>
>  	for_each_engine(engine, dev_priv, id)
> -		blob->ads.eng_state_size[engine->guc_id] =
> -			intel_lr_context_size(engine);
> +		blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
>
>  	base = guc_ggtt_offset(vma);
>  	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 4c72ada..ee8170c 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3370,16 +3370,6 @@ enum skl_disp_power_wells {
>  #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
>  #define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
>  					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
> -/* Haswell does have the CXT_SIZE register however it does not appear to be
> - * valid. Now, docs explain in dwords what is in the context object. The full
> - * size is 70720 bytes, however, the power context and execlist context will
> - * never be saved (power context is stored elsewhere, and execlists don't work
> - * on HSW) - so the final size, including the extra state required for the
> - * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
> - */
> -#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
> -/* Same as Haswell, but 72064 bytes now. */
> -#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
>
>  enum {
>  	INTEL_ADVANCED_CONTEXT = 0,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 82a274b..24bcd9a 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -26,6 +26,22 @@
>  #include "intel_ringbuffer.h"
>  #include "intel_lrc.h"
>
> +/* Haswell does have the CXT_SIZE register however it does not appear to be
> + * valid. Now, docs explain in dwords what is in the context object. The full
> + * size is 70720 bytes, however, the power context and execlist context will
> + * never be saved (power context is stored elsewhere, and execlists don't work
> + * on HSW) - so the final size, including the extra state required for the
> + * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
> + */
> +#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
> +/* Same as Haswell, but 72064 bytes now. */
> +#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
> +
> +#define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
> +#define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
> +
> +#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
> +
>  struct engine_class_info {
>  	const char *name;
>  	int (*init_legacy)(struct intel_engine_cs *engine);
> @@ -107,6 +123,64 @@ static const struct engine_info intel_engines[] = {
>  	},
>  };
>
> +/**
> + * ___intel_engine_context_size() - return the size of the context for an engine
> + * @dev_priv: i915 device private
> + * @class: engine class
> + *
> + * Each engine class may require a different amount of space for a context
> + * image.
> + *
> + * Return: size (in bytes) of an engine class specific context image
> + *
> + * Note: this size includes the HWSP, which is part of the context image
> + * in LRC mode, but does not include the "shared data page" used with
> + * GuC submission. The caller should account for this if using the GuC.
> + */
> +static u32
> +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)

Very minor, but Chris has been trying to establish i915 instead of 
dev_priv in new code.

> +{
> +	u32 reg;

Also very minor, I think we normally use val for register values and reg 
for the register itself. Quick grep suggests we are not 100% consistent 
already so feel free to ignore. :)

> +
> +	switch (class) {
> +	case RENDER_CLASS:
> +		switch (INTEL_GEN(dev_priv)) {
> +		default:
> +			MISSING_CASE(INTEL_GEN(dev_priv));
> +		case 9:
> +			return GEN9_LR_CONTEXT_RENDER_SIZE;
> +		case 8:
> +			return i915.enable_execlists ?
> +			       GEN8_LR_CONTEXT_RENDER_SIZE :
> +			       GEN8_CXT_TOTAL_SIZE;
> +		case 7:
> +			if (IS_HASWELL(dev_priv))
> +				return HSW_CXT_TOTAL_SIZE;
> +
> +			reg = I915_READ(GEN7_CXT_SIZE);
> +			return GEN7_CXT_TOTAL_SIZE(reg) * 64;
> +		case 6:
> +			reg = I915_READ(CXT_SIZE);
> +			return GEN6_CXT_TOTAL_SIZE(reg) * 64;
> +		case 5:
> +		case 4:
> +		case 3:
> +		case 2:
> +		/* For the special day when i810 gets merged. */
> +		case 1:
> +			return 0;
> +		}
> +		break;
> +	default:
> +		MISSING_CASE(class);
> +	case VIDEO_DECODE_CLASS:
> +	case VIDEO_ENHANCEMENT_CLASS:
> +	case COPY_ENGINE_CLASS:
> +		WARN_ON(INTEL_GEN(dev_priv) < 8);
> +		return GEN8_LR_CONTEXT_OTHER_SIZE;
> +	}
> +}
> +
>  static int
>  intel_engine_setup(struct drm_i915_private *dev_priv,
>  		   enum intel_engine_id id)
> @@ -134,6 +208,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>  	engine->irq_shift = info->irq_shift;
>  	engine->class = info->class;
>  	engine->instance = info->instance;
> +	engine->context_size = __intel_engine_context_size(dev_priv,
> +							   engine->class);
> +	if (WARN_ON(engine->context_size > BIT(20)))
> +		engine->context_size = 0;

Don't know the history to tell whether upgrade of DRM_DEBUG_DRIVER to a 
WARN_ON is ok.

>
>  	/* Nothing to do here, execute in order of dependencies */
>  	engine->schedule = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5ec064a..0909549 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -138,10 +138,6 @@
>  #include "i915_drv.h"
>  #include "intel_mocs.h"
>
> -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
> -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
> -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
> -
>  #define RING_EXECLIST_QFULL		(1 << 0x2)
>  #define RING_EXECLIST1_VALID		(1 << 0x3)
>  #define RING_EXECLIST0_VALID		(1 << 0x4)
> @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx,
>  	return 0;
>  }
>
> -/**
> - * intel_lr_context_size() - return the size of the context for an engine
> - * @engine: which engine to find the context size for
> - *
> - * Each engine may require a different amount of space for a context image,
> - * so when allocating (or copying) an image, this function can be used to
> - * find the right size for the specific engine.
> - *
> - * Return: size (in bytes) of an engine-specific context image
> - *
> - * Note: this size includes the HWSP, which is part of the context image
> - * in LRC mode, but does not include the "shared data page" used with
> - * GuC submission. The caller should account for this if using the GuC.
> - */
> -uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	int ret;
> -
> -	WARN_ON(INTEL_GEN(dev_priv) < 8);
> -
> -	switch (engine->class) {
> -	case RENDER_CLASS:
> -		switch (INTEL_GEN(dev_priv)) {
> -		default:
> -			MISSING_CASE(INTEL_GEN(dev_priv));
> -		case 9:
> -			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
> -			break;
> -		case 8:
> -			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
> -			break;
> -		}
> -		break;
> -
> -	default:
> -		MISSING_CASE(engine->class);
> -	case VIDEO_DECODE_CLASS:
> -	case VIDEO_ENHANCEMENT_CLASS:
> -	case COPY_ENGINE_CLASS:
> -		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
> -		break;
> -	}
> -
> -	return ret;
> -}
> -
>  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>  					    struct intel_engine_cs *engine)
>  {
> @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>
>  	WARN_ON(ce->state);
>
> -	context_size = round_up(intel_lr_context_size(engine),
> -				I915_GTT_PAGE_SIZE);
> +	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
>
>  	/* One extra page as the sharing data between driver and GuC */
>  	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index e8015e7..52b3a1f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
>  struct drm_i915_private;
>  struct i915_gem_context;
>
> -uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
> -
>  void intel_lr_context_resume(struct drm_i915_private *dev_priv);
>  uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
>  				     struct intel_engine_cs *engine);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 96710b6..a64d18c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -196,13 +196,14 @@ struct intel_engine_cs {
>  	enum intel_engine_id id;
>  	unsigned int uabi_id;
>  	unsigned int hw_id;
> +	unsigned int guc_id;
>
>  	u8 class;
>  	u8 instance;
> -
> -	unsigned int guc_id;
> -	u32		mmio_base;
> +	u32 context_size;
> +	u32 mmio_base;
>  	unsigned int irq_shift;
> +
>  	struct intel_ring *buffer;
>  	struct intel_timeline *timeline;
>
>

Looks good to me in principle, just unsure about those two items form above.

Acked-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko