[Intel-gfx] [PATCH 4/6] drm/i915: Record the default hw state after reset upon load

Thu Nov 2 13:56:50 UTC 2017

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Take a copy of the HW state after a reset upon module loading by
> executing a context switch from a blank context to the kernel context,
> thus saving the default hw state over the blank context image.
> We can then use the default hw state to initialise any future context,
> ensuring that each starts with the default view of hw state.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gvt/scheduler.c    |  2 -
>  drivers/gpu/drm/i915/i915_debugfs.c     |  1 -
>  drivers/gpu/drm/i915/i915_gem.c         | 69 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_context.c | 50 +++---------------------
>  drivers/gpu/drm/i915/i915_gem_context.h |  4 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  3 ++
>  drivers/gpu/drm/i915/intel_lrc.c        | 35 ++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++++++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  9 files changed, 137 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index f6ded475bb2c..42cc61230ca7 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
>  	if (IS_ERR(vgpu->shadow_ctx))
>  		return PTR_ERR(vgpu->shadow_ctx);
>  
> -	vgpu->shadow_ctx->engine[RCS].initialised = true;
> -
>  	bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..cfcef1899da8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
>  			struct intel_context *ce = &ctx->engine[engine->id];
>  
>  			seq_printf(m, "%s: ", engine->name);
> -			seq_putc(m, ce->initialised ? 'I' : 'i');
>  			if (ce->state)
>  				describe_obj(m, ce->state->obj);
>  			if (ce->ring)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e36a3a840552..ed4b1708fec5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4967,6 +4967,71 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
>  	return true;
>  }
>  
> +static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> +{
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err;
> +
> +	/*
> +	 * As we reset the gpu during very early sanitisation, the current
> +	 * register state on the GPU should reflect its defaults values.
> +	 * We load a context onto the hw (with restore-inhibit), then switch
> +	 * over to a second context to save that default register state. We
> +	 * can then prime every new context with that state so they all start
> +	 * from defaults.
> +	 */
> +
> +	ctx = i915_gem_context_create_kernel(i915, 0);
> +	if (IS_ERR(ctx))
> +		return PTR_ERR(ctx);
> +
> +	for_each_engine(engine, i915, id) {
> +		struct drm_i915_gem_request *rq;
> +
> +		rq = i915_gem_request_alloc(engine, ctx);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto out_ctx;
> +		}
> +
> +		err = i915_switch_context(rq);
> +		if (engine->init_context)
> +			err = engine->init_context(rq);
> +
> +		__i915_add_request(rq, true);
> +		if (err)
> +			goto out_ctx;
> +	}
> +
> +	err = i915_gem_switch_to_kernel_context(i915);
> +	if (err)
> +		goto out_ctx;
> +
> +	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
> +	if (err)
> +		goto out_ctx;
> +
> +	for_each_engine(engine, i915, id) {
> +		if (!ctx->engine[id].state)
> +			continue;
> +
> +		engine->default_state =
> +			i915_gem_object_get(ctx->engine[id].state->obj);
> +
> +		err = i915_gem_object_set_to_cpu_domain(engine->default_state,
> +							false);
> +		if (err)
> +			goto out_ctx;
> +	}
> +
> +out_ctx:
> +	i915_gem_context_set_closed(ctx);
> +	i915_gem_context_put(ctx);
> +	return err;
> +}
> +
>  int i915_gem_init(struct drm_i915_private *dev_priv)
>  {
>  	int ret;
> @@ -5022,6 +5087,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>  
>  	intel_init_gt_powersave(dev_priv);
>  
> +	ret = __intel_engines_record_defaults(dev_priv);
> +	if (ret)
> +		goto out_unlock;
> +
>  out_unlock:
>  	if (ret == -EIO) {
>  		/* Allow engine initialisation to fail by marking the GPU as
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 10affb35ac56..499cc0f25653 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -343,7 +343,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   * context state of the GPU for applications that don't utilize HW contexts, as
>   * well as an idle case.
>   */
> -static struct i915_gem_context *
> +struct i915_gem_context *
>  i915_gem_create_context(struct drm_i915_private *dev_priv,
>  			struct drm_i915_file_private *file_priv)
>  {
> @@ -418,8 +418,8 @@ i915_gem_context_create_gvt(struct drm_device *dev)
>  	return ctx;
>  }
>  
> -static struct i915_gem_context *
> -create_kernel_context(struct drm_i915_private *i915, int prio)
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>  {
>  	struct i915_gem_context *ctx;
>  
> @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>  	ida_init(&dev_priv->contexts.hw_ida);
>  
>  	/* lowest priority; idle task */
> -	ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
> +	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
>  	if (IS_ERR(ctx)) {
>  		DRM_ERROR("Failed to create default global context\n");
>  		err = PTR_ERR(ctx);
> @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>  	dev_priv->kernel_context = ctx;
>  
>  	/* highest priority; preempting task */
> -	ctx = create_kernel_context(dev_priv, INT_MAX);
> +	ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
>  	if (IS_ERR(ctx)) {
>  		DRM_ERROR("Failed to create default preempt context\n");
>  		err = PTR_ERR(ctx);
> @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
>  		engine->context_unpin(engine, engine->last_retired_context);
>  		engine->last_retired_context = NULL;
>  	}
> -
> -	/* Force the GPU state to be restored on enabling */
> -	if (!i915_modparams.enable_execlists) {
> -		struct i915_gem_context *ctx;
> -
> -		list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> -			if (!i915_gem_context_is_default(ctx))
> -				continue;
> -
> -			for_each_engine(engine, dev_priv, id)
> -				ctx->engine[engine->id].initialised = false;
> -
> -			ctx->remap_slice = ALL_L3_SLICES(dev_priv);
> -		}
> -
> -		for_each_engine(engine, dev_priv, id) {
> -			struct intel_context *kce =
> -				&dev_priv->kernel_context->engine[engine->id];
> -
> -			kce->initialised = true;
> -		}
> -	}
>  }
>  
>  void i915_gem_contexts_fini(struct drm_i915_private *i915)
> @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
>  	if (to->remap_slice)
>  		return false;
>  
> -	if (!to->engine[RCS].initialised)
> -		return false;
> -
>  	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
>  		return false;
>  
> @@ -795,11 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  			return ret;
>  	}
>  
> -	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
> -		/* NB: If we inhibit the restore, the context is not allowed to
> -		 * die because future work may end up depending on valid address
> -		 * space. This means we must enforce that a page table load
> -		 * occur when this occurs. */
> +	if (i915_gem_context_is_kernel(to))
>  		hw_flags = MI_RESTORE_INHIBIT;
>  	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
>  		hw_flags = MI_FORCE_RESTORE;
> @@ -843,15 +814,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		to->remap_slice &= ~(1<<i);
>  	}
>  
> -	if (!to->engine[RCS].initialised) {
> -		if (engine->init_context) {
> -			ret = engine->init_context(req);
> -			if (ret)
> -				return ret;
> -		}
> -		to->engine[RCS].initialised = true;
> -	}
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 44688e22a5c2..4bfb72f8e1cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -157,7 +157,6 @@ struct i915_gem_context {
>  		u32 *lrc_reg_state;
>  		u64 lrc_desc;
>  		int pin_count;
> -		bool initialised;
>  	} engine[I915_NUM_ENGINES];
>  
>  	/** ring_size: size for allocating the per-engine ring buffer */
> @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>  				       struct drm_file *file);
>  
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> +
>  static inline struct i915_gem_context *
>  i915_gem_context_get(struct i915_gem_context *ctx)
>  {
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c15e288bed02..6b8519401d4d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -679,6 +679,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  	intel_engine_cleanup_cmd_parser(engine);
>  	i915_gem_batch_pool_fini(&engine->batch_pool);
>  
> +	if (engine->default_state)
> +		i915_gem_object_put(engine->default_state);
> +
>  	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
>  		engine->context_unpin(engine, engine->i915->preempt_context);
>  	engine->context_unpin(engine, engine->i915->kernel_context);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6840ec8db037..297ce0327273 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1147,7 +1147,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
>  	struct intel_engine_cs *engine = request->engine;
>  	struct intel_context *ce = &request->ctx->engine[engine->id];
>  	u32 *cs;
> -	int ret;
>  
>  	GEM_BUG_ON(!ce->pin_count);
>  
> @@ -1161,14 +1160,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	if (!ce->initialised) {
> -		ret = engine->init_context(request);
> -		if (ret)
> -			return ret;
> -
> -		ce->initialised = true;
> -	}
> -
>  	/* Note that after this point, we have committed to using
>  	 * this request as it is being used to both track the
>  	 * state of engine initialisation and liveness of the
> @@ -2100,7 +2091,6 @@ static void execlists_init_reg_state(u32 *regs,
>  
>  	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
>  		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
> -				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>  				   (HAS_RESOURCE_STREAMER(dev_priv) ?
>  				   CTX_CTRL_RS_CTX_ENABLE : 0)));
>  	CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> @@ -2177,6 +2167,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>  		    struct intel_ring *ring)
>  {
>  	void *vaddr;
> +	u32 *regs;
>  	int ret;
>  
>  	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> @@ -2193,11 +2184,28 @@ populate_lr_context(struct i915_gem_context *ctx,
>  	}
>  	ctx_obj->mm.dirty = true;
>  
> +	if (engine->default_state) {
> +		void *defaults;
> +
> +		defaults = i915_gem_object_pin_map(engine->default_state,
> +						   I915_MAP_WB);
> +		if (IS_ERR(defaults))
> +			return PTR_ERR(defaults);
> +
> +		memcpy(vaddr + LRC_HEADER_PAGES * PAGE_SIZE,
> +		       defaults + LRC_HEADER_PAGES * PAGE_SIZE,
> +		       engine->context_size);
> +		i915_gem_object_unpin_map(engine->default_state);
> +	}
> +
>  	/* The second page of the context object contains some fields which must
>  	 * be set up prior to the first execution. */
> -
> -	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
> -				 ctx, engine, ring);
> +	regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
> +	execlists_init_reg_state(regs, ctx, engine, ring);
> +	if (!engine->default_state) {
> +		regs[CTX_CONTEXT_CONTROL+1] |=
> +			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
> +	}

We will get the default state we copy from now with restore inhibit set
for all copied context. Where do we clear it?
-Mika

>  
>  	i915_gem_object_unpin_map(ctx_obj);
>  
> @@ -2250,7 +2258,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>  
>  	ce->ring = ring;
>  	ce->state = vma;
> -	ce->initialised |= engine->init_context == NULL;
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 47fadf8da84e..3e5c296cd8bc 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1368,7 +1368,7 @@ static int context_pin(struct i915_gem_context *ctx)
>  	 * on an active context (which by nature is already on the GPU).
>  	 */
>  	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> -		ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
> +		ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1383,11 +1383,34 @@ alloc_context_vma(struct intel_engine_cs *engine)
>  	struct drm_i915_private *i915 = engine->i915;
>  	struct drm_i915_gem_object *obj;
>  	struct i915_vma *vma;
> +	int err;
>  
>  	obj = i915_gem_object_create(i915, engine->context_size);
>  	if (IS_ERR(obj))
>  		return ERR_CAST(obj);
>  
> +	if (engine->default_state) {
> +		void *defaults, *vaddr;
> +
> +		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +		if (IS_ERR(vaddr)) {
> +			err = PTR_ERR(vaddr);
> +			goto err_obj;
> +		}
> +
> +		defaults = i915_gem_object_pin_map(engine->default_state,
> +						   I915_MAP_WB);
> +		if (IS_ERR(defaults)) {
> +			err = PTR_ERR(defaults);
> +			goto err_map;
> +		}
> +
> +		memcpy(vaddr, defaults, engine->context_size);
> +
> +		i915_gem_object_unpin_map(engine->default_state);
> +		i915_gem_object_unpin_map(obj);
> +	}
> +
>  	/*
>  	 * Try to make the context utilize L3 as well as LLC.
>  	 *
> @@ -1409,10 +1432,18 @@ alloc_context_vma(struct intel_engine_cs *engine)
>  	}
>  
>  	vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
> -	if (IS_ERR(vma))
> -		i915_gem_object_put(obj);
> +	if (IS_ERR(vma)) {
> +		err = PTR_ERR(vma);
> +		goto err_obj;
> +	}
>  
>  	return vma;
> +
> +err_map:
> +	i915_gem_object_unpin_map(obj);
> +err_obj:
> +	i915_gem_object_put(obj);
> +	return ERR_PTR(err);
>  }
>  
>  static struct intel_ring *
> @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
>  		ce->state->obj->pin_global++;
>  	}
>  
> -	/* The kernel context is only used as a placeholder for flushing the
> -	 * active context. It is never used for submitting user rendering and
> -	 * as such never requires the golden render context, and so we can skip
> -	 * emitting it when we switch to the kernel context. This is required
> -	 * as during eviction we cannot allocate and pin the renderstate in
> -	 * order to initialise the context.
> -	 */
> -	if (i915_gem_context_is_kernel(ctx))
> -		ce->initialised = true;
> -
>  	i915_gem_context_get(ctx);
>  
>  out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69ad875fd011..1d752b9a3065 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -303,6 +303,7 @@ struct intel_engine_cs {
>  	struct intel_ring *buffer;
>  	struct intel_timeline *timeline;
>  
> +	struct drm_i915_gem_object *default_state;
>  	struct intel_render_state *render_state;
>  
>  	atomic_t irq_count;
> -- 
> 2.15.0