[Intel-gfx] [PATCH 5/6] drm/i915: Only recover active engines
Mika Kuoppala
mika.kuoppala at linux.intel.com
Wed Jun 26 14:44:56 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> If we issue a reset to a currently idle engine, leave it idle
> afterwards. This is useful to excise a linkage between reset and the
> shrinker. When waking the engine, we need to pin the default context
default context, kernel context, golden context...
if we ever revisit the naming, I will advocate for proto context.
> image which we use for overwriting a guilty context -- if the engine is
> idle we do not need this pinned image! However, this pinning means that
> waking the engine acquires the FS_RECLAIM, and so may trigger the
> shrinker. The shrinker itself may need to wait upon the GPU to unbind
> and object and so may require services of reset; ergo we should avoid
> the engine wake up path.
>
> The danger in skipping the recovery for idle engines is that we leave the
> engine with no context defined, which may interfere with the operation of
> the power context on some older platforms. In practice, we should only
> be resetting an active GPU but it something to look out for on Ironlake
> (if memory serves).
>
I will place my bet on bdw.
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++----------
> drivers/gpu/drm/i915/gt/selftest_reset.c | 6 ++--
> 2 files changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 8ce92c51564e..e7cbd9cf85c1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -678,7 +678,6 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
> * written to the powercontext is undefined and so we may lose
> * GPU state upon resume, i.e. fail to restart after a reset.
> */
> - intel_engine_pm_get(engine);
> intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
> engine->reset.prepare(engine);
> }
> @@ -709,16 +708,21 @@ static void revoke_mmaps(struct drm_i915_private *i915)
> }
> }
>
> -static void reset_prepare(struct drm_i915_private *i915)
> +static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915)
> {
> struct intel_engine_cs *engine;
> + intel_engine_mask_t awake = 0;
> enum intel_engine_id id;
>
> - intel_gt_pm_get(&i915->gt);
> - for_each_engine(engine, i915, id)
> + for_each_engine(engine, i915, id) {
> + if (intel_engine_pm_get_if_awake(engine))
> + awake |= engine->mask;
> reset_prepare_engine(engine);
> + }
>
> intel_uc_reset_prepare(i915);
> +
> + return awake;
> }
>
> static void gt_revoke(struct drm_i915_private *i915)
> @@ -752,20 +756,22 @@ static int gt_reset(struct drm_i915_private *i915,
> static void reset_finish_engine(struct intel_engine_cs *engine)
> {
> engine->reset.finish(engine);
> - intel_engine_pm_put(engine);
> intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
> +
> + intel_engine_signal_breadcrumbs(engine);
> }
>
> -static void reset_finish(struct drm_i915_private *i915)
> +static void reset_finish(struct drm_i915_private *i915,
> + intel_engine_mask_t awake)
> {
> struct intel_engine_cs *engine;
> enum intel_engine_id id;
>
> for_each_engine(engine, i915, id) {
> reset_finish_engine(engine);
> - intel_engine_signal_breadcrumbs(engine);
> + if (awake & engine->mask)
> + intel_engine_pm_put(engine);
> }
> - intel_gt_pm_put(&i915->gt);
> }
>
> static void nop_submit_request(struct i915_request *request)
> @@ -789,6 +795,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
> {
> struct i915_gpu_error *error = &i915->gpu_error;
> struct intel_engine_cs *engine;
> + intel_engine_mask_t awake;
> enum intel_engine_id id;
>
> if (test_bit(I915_WEDGED, &error->flags))
> @@ -808,7 +815,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
> * rolling the global seqno forward (since this would complete requests
> * for which we haven't set the fence error to EIO yet).
> */
> - reset_prepare(i915);
> + awake = reset_prepare(i915);
>
> /* Even if the GPU reset fails, it should still stop the engines */
> if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
> @@ -832,7 +839,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
> for_each_engine(engine, i915, id)
> engine->cancel_requests(engine);
>
> - reset_finish(i915);
> + reset_finish(i915, awake);
>
> GEM_TRACE("end\n");
> }
> @@ -964,6 +971,7 @@ void i915_reset(struct drm_i915_private *i915,
> const char *reason)
> {
> struct i915_gpu_error *error = &i915->gpu_error;
> + intel_engine_mask_t awake;
> int ret;
>
> GEM_TRACE("flags=%lx\n", error->flags);
> @@ -980,7 +988,7 @@ void i915_reset(struct drm_i915_private *i915,
> dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
> error->reset_count++;
>
> - reset_prepare(i915);
> + awake = reset_prepare(i915);
>
> if (!intel_has_gpu_reset(i915)) {
> if (i915_modparams.reset)
> @@ -1021,7 +1029,7 @@ void i915_reset(struct drm_i915_private *i915,
> i915_queue_hangcheck(i915);
>
> finish:
> - reset_finish(i915);
> + reset_finish(i915, awake);
> unlock:
> mutex_unlock(&error->wedge_mutex);
> return;
> @@ -1072,7 +1080,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
> GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
> GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
>
> - if (!intel_engine_pm_is_awake(engine))
> + if (!intel_engine_pm_get_if_awake(engine))
> return 0;
>
> reset_prepare_engine(engine);
> @@ -1107,12 +1115,11 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
> * process to program RING_MODE, HWSP and re-enable submission.
> */
> ret = engine->resume(engine);
> - if (ret)
> - goto out;
>
> out:
> intel_engine_cancel_stop_cs(engine);
> reset_finish_engine(engine);
> + intel_engine_pm_put(engine);
> return ret;
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
> index 641cf3aee8d5..672e32e1ef95 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
> @@ -71,15 +71,17 @@ static int igt_atomic_reset(void *arg)
> goto unlock;
>
> for (p = igt_atomic_phases; p->name; p++) {
> + intel_engine_mask_t awake;
> +
> GEM_TRACE("intel_gpu_reset under %s\n", p->name);
>
> - reset_prepare(i915);
> + awake = reset_prepare(i915);
> p->critical_section_begin();
>
> err = intel_gpu_reset(i915, ALL_ENGINES);
>
> p->critical_section_end();
> - reset_finish(i915);
> + reset_finish(i915, awake);
>
> if (err) {
> pr_err("intel_gpu_reset failed under %s\n", p->name);
> --
> 2.20.1
More information about the Intel-gfx
mailing list