[Intel-gfx] [PATCH 4/6] drm/i915: Record the default hw state after reset upon load
Mika Kuoppala
mika.kuoppala at linux.intel.com
Thu Nov 2 13:56:50 UTC 2017
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Take a copy of the HW state after a reset upon module loading by
> executing a context switch from a blank context to the kernel context,
> thus saving the default hw state over the blank context image.
> We can then use the default hw state to initialise any future context,
> ensuring that each starts with the default view of hw state.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gvt/scheduler.c | 2 -
> drivers/gpu/drm/i915/i915_debugfs.c | 1 -
> drivers/gpu/drm/i915/i915_gem.c | 69 +++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_context.c | 50 +++---------------------
> drivers/gpu/drm/i915/i915_gem_context.h | 4 +-
> drivers/gpu/drm/i915/intel_engine_cs.c | 3 ++
> drivers/gpu/drm/i915/intel_lrc.c | 35 ++++++++++-------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++++++++++++++-------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 9 files changed, 137 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index f6ded475bb2c..42cc61230ca7 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
> if (IS_ERR(vgpu->shadow_ctx))
> return PTR_ERR(vgpu->shadow_ctx);
>
> - vgpu->shadow_ctx->engine[RCS].initialised = true;
> -
> bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES);
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..cfcef1899da8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
> struct intel_context *ce = &ctx->engine[engine->id];
>
> seq_printf(m, "%s: ", engine->name);
> - seq_putc(m, ce->initialised ? 'I' : 'i');
> if (ce->state)
> describe_obj(m, ce->state->obj);
> if (ce->ring)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e36a3a840552..ed4b1708fec5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4967,6 +4967,71 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
> return true;
> }
>
> +static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> +{
> + struct i915_gem_context *ctx;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err;
> +
> + /*
> + * As we reset the gpu during very early sanitisation, the current
> + * register state on the GPU should reflect its defaults values.
> + * We load a context onto the hw (with restore-inhibit), then switch
> + * over to a second context to save that default register state. We
> + * can then prime every new context with that state so they all start
> + * from defaults.
> + */
> +
> + ctx = i915_gem_context_create_kernel(i915, 0);
> + if (IS_ERR(ctx))
> + return PTR_ERR(ctx);
> +
> + for_each_engine(engine, i915, id) {
> + struct drm_i915_gem_request *rq;
> +
> + rq = i915_gem_request_alloc(engine, ctx);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto out_ctx;
> + }
> +
> + err = i915_switch_context(rq);
> + if (engine->init_context)
> + err = engine->init_context(rq);
> +
> + __i915_add_request(rq, true);
> + if (err)
> + goto out_ctx;
> + }
> +
> + err = i915_gem_switch_to_kernel_context(i915);
> + if (err)
> + goto out_ctx;
> +
> + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
> + if (err)
> + goto out_ctx;
> +
> + for_each_engine(engine, i915, id) {
> + if (!ctx->engine[id].state)
> + continue;
> +
> + engine->default_state =
> + i915_gem_object_get(ctx->engine[id].state->obj);
> +
> + err = i915_gem_object_set_to_cpu_domain(engine->default_state,
> + false);
> + if (err)
> + goto out_ctx;
> + }
> +
> +out_ctx:
> + i915_gem_context_set_closed(ctx);
> + i915_gem_context_put(ctx);
> + return err;
> +}
> +
> int i915_gem_init(struct drm_i915_private *dev_priv)
> {
> int ret;
> @@ -5022,6 +5087,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>
> intel_init_gt_powersave(dev_priv);
>
> + ret = __intel_engines_record_defaults(dev_priv);
> + if (ret)
> + goto out_unlock;
> +
> out_unlock:
> if (ret == -EIO) {
> /* Allow engine initialisation to fail by marking the GPU as
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 10affb35ac56..499cc0f25653 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -343,7 +343,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
> * context state of the GPU for applications that don't utilize HW contexts, as
> * well as an idle case.
> */
> -static struct i915_gem_context *
> +struct i915_gem_context *
> i915_gem_create_context(struct drm_i915_private *dev_priv,
> struct drm_i915_file_private *file_priv)
> {
> @@ -418,8 +418,8 @@ i915_gem_context_create_gvt(struct drm_device *dev)
> return ctx;
> }
>
> -static struct i915_gem_context *
> -create_kernel_context(struct drm_i915_private *i915, int prio)
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
> {
> struct i915_gem_context *ctx;
>
> @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> ida_init(&dev_priv->contexts.hw_ida);
>
> /* lowest priority; idle task */
> - ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
> + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
> if (IS_ERR(ctx)) {
> DRM_ERROR("Failed to create default global context\n");
> err = PTR_ERR(ctx);
> @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> dev_priv->kernel_context = ctx;
>
> /* highest priority; preempting task */
> - ctx = create_kernel_context(dev_priv, INT_MAX);
> + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
> if (IS_ERR(ctx)) {
> DRM_ERROR("Failed to create default preempt context\n");
> err = PTR_ERR(ctx);
> @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
> engine->context_unpin(engine, engine->last_retired_context);
> engine->last_retired_context = NULL;
> }
> -
> - /* Force the GPU state to be restored on enabling */
> - if (!i915_modparams.enable_execlists) {
> - struct i915_gem_context *ctx;
> -
> - list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> - if (!i915_gem_context_is_default(ctx))
> - continue;
> -
> - for_each_engine(engine, dev_priv, id)
> - ctx->engine[engine->id].initialised = false;
> -
> - ctx->remap_slice = ALL_L3_SLICES(dev_priv);
> - }
> -
> - for_each_engine(engine, dev_priv, id) {
> - struct intel_context *kce =
> - &dev_priv->kernel_context->engine[engine->id];
> -
> - kce->initialised = true;
> - }
> - }
> }
>
> void i915_gem_contexts_fini(struct drm_i915_private *i915)
> @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
> if (to->remap_slice)
> return false;
>
> - if (!to->engine[RCS].initialised)
> - return false;
> -
> if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
> return false;
>
> @@ -795,11 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> return ret;
> }
>
> - if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
> - /* NB: If we inhibit the restore, the context is not allowed to
> - * die because future work may end up depending on valid address
> - * space. This means we must enforce that a page table load
> - * occur when this occurs. */
> + if (i915_gem_context_is_kernel(to))
> hw_flags = MI_RESTORE_INHIBIT;
> else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
> hw_flags = MI_FORCE_RESTORE;
> @@ -843,15 +814,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> to->remap_slice &= ~(1<<i);
> }
>
> - if (!to->engine[RCS].initialised) {
> - if (engine->init_context) {
> - ret = engine->init_context(req);
> - if (ret)
> - return ret;
> - }
> - to->engine[RCS].initialised = true;
> - }
> -
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 44688e22a5c2..4bfb72f8e1cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -157,7 +157,6 @@ struct i915_gem_context {
> u32 *lrc_reg_state;
> u64 lrc_desc;
> int pin_count;
> - bool initialised;
> } engine[I915_NUM_ENGINES];
>
> /** ring_size: size for allocating the per-engine ring buffer */
> @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
>
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> +
> static inline struct i915_gem_context *
> i915_gem_context_get(struct i915_gem_context *ctx)
> {
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c15e288bed02..6b8519401d4d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -679,6 +679,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> intel_engine_cleanup_cmd_parser(engine);
> i915_gem_batch_pool_fini(&engine->batch_pool);
>
> + if (engine->default_state)
> + i915_gem_object_put(engine->default_state);
> +
> if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> engine->context_unpin(engine, engine->i915->preempt_context);
> engine->context_unpin(engine, engine->i915->kernel_context);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6840ec8db037..297ce0327273 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1147,7 +1147,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
> struct intel_engine_cs *engine = request->engine;
> struct intel_context *ce = &request->ctx->engine[engine->id];
> u32 *cs;
> - int ret;
>
> GEM_BUG_ON(!ce->pin_count);
>
> @@ -1161,14 +1160,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - if (!ce->initialised) {
> - ret = engine->init_context(request);
> - if (ret)
> - return ret;
> -
> - ce->initialised = true;
> - }
> -
> /* Note that after this point, we have committed to using
> * this request as it is being used to both track the
> * state of engine initialisation and liveness of the
> @@ -2100,7 +2091,6 @@ static void execlists_init_reg_state(u32 *regs,
>
> CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
> _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
> - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> (HAS_RESOURCE_STREAMER(dev_priv) ?
> CTX_CTRL_RS_CTX_ENABLE : 0)));
> CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> @@ -2177,6 +2167,7 @@ populate_lr_context(struct i915_gem_context *ctx,
> struct intel_ring *ring)
> {
> void *vaddr;
> + u32 *regs;
> int ret;
>
> ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> @@ -2193,11 +2184,28 @@ populate_lr_context(struct i915_gem_context *ctx,
> }
> ctx_obj->mm.dirty = true;
>
> + if (engine->default_state) {
> + void *defaults;
> +
> + defaults = i915_gem_object_pin_map(engine->default_state,
> + I915_MAP_WB);
> + if (IS_ERR(defaults))
> + return PTR_ERR(defaults);
> +
> + memcpy(vaddr + LRC_HEADER_PAGES * PAGE_SIZE,
> + defaults + LRC_HEADER_PAGES * PAGE_SIZE,
> + engine->context_size);
> + i915_gem_object_unpin_map(engine->default_state);
> + }
> +
> /* The second page of the context object contains some fields which must
> * be set up prior to the first execution. */
> -
> - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
> - ctx, engine, ring);
> + regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
> + execlists_init_reg_state(regs, ctx, engine, ring);
> + if (!engine->default_state) {
> + regs[CTX_CONTEXT_CONTROL+1] |=
> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
> + }
We will get the default state we copy from now with restore inhibit set
for all copied context. Where do we clear it?
-Mika
>
> i915_gem_object_unpin_map(ctx_obj);
>
> @@ -2250,7 +2258,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>
> ce->ring = ring;
> ce->state = vma;
> - ce->initialised |= engine->init_context == NULL;
>
> return 0;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 47fadf8da84e..3e5c296cd8bc 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1368,7 +1368,7 @@ static int context_pin(struct i915_gem_context *ctx)
> * on an active context (which by nature is already on the GPU).
> */
> if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
> + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> if (ret)
> return ret;
> }
> @@ -1383,11 +1383,34 @@ alloc_context_vma(struct intel_engine_cs *engine)
> struct drm_i915_private *i915 = engine->i915;
> struct drm_i915_gem_object *obj;
> struct i915_vma *vma;
> + int err;
>
> obj = i915_gem_object_create(i915, engine->context_size);
> if (IS_ERR(obj))
> return ERR_CAST(obj);
>
> + if (engine->default_state) {
> + void *defaults, *vaddr;
> +
> + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> + if (IS_ERR(vaddr)) {
> + err = PTR_ERR(vaddr);
> + goto err_obj;
> + }
> +
> + defaults = i915_gem_object_pin_map(engine->default_state,
> + I915_MAP_WB);
> + if (IS_ERR(defaults)) {
> + err = PTR_ERR(defaults);
> + goto err_map;
> + }
> +
> + memcpy(vaddr, defaults, engine->context_size);
> +
> + i915_gem_object_unpin_map(engine->default_state);
> + i915_gem_object_unpin_map(obj);
> + }
> +
> /*
> * Try to make the context utilize L3 as well as LLC.
> *
> @@ -1409,10 +1432,18 @@ alloc_context_vma(struct intel_engine_cs *engine)
> }
>
> vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
> - if (IS_ERR(vma))
> - i915_gem_object_put(obj);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto err_obj;
> + }
>
> return vma;
> +
> +err_map:
> + i915_gem_object_unpin_map(obj);
> +err_obj:
> + i915_gem_object_put(obj);
> + return ERR_PTR(err);
> }
>
> static struct intel_ring *
> @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
> ce->state->obj->pin_global++;
> }
>
> - /* The kernel context is only used as a placeholder for flushing the
> - * active context. It is never used for submitting user rendering and
> - * as such never requires the golden render context, and so we can skip
> - * emitting it when we switch to the kernel context. This is required
> - * as during eviction we cannot allocate and pin the renderstate in
> - * order to initialise the context.
> - */
> - if (i915_gem_context_is_kernel(ctx))
> - ce->initialised = true;
> -
> i915_gem_context_get(ctx);
>
> out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69ad875fd011..1d752b9a3065 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -303,6 +303,7 @@ struct intel_engine_cs {
> struct intel_ring *buffer;
> struct intel_timeline *timeline;
>
> + struct drm_i915_gem_object *default_state;
> struct intel_render_state *render_state;
>
> atomic_t irq_count;
> --
> 2.15.0
More information about the Intel-gfx
mailing list