[Intel-gfx] [PATCH] drm/i915/selftests: Wait for idle between idle resets as well
Michał Winiarski
michal.winiarski at intel.com
Thu Apr 26 16:21:39 UTC 2018
On Wed, Apr 11, 2018 at 01:03:46PM +0100, Chris Wilson wrote:
> Even though we weren't injecting guilty requests to be reset, we could
> still fall over the issue of resetting the same request too fast -- where
> the GPU refuses to start again. (Although it is interesting to note that
> reloading the driver is sufficient, suggesting that we could recover if
> we delayed the setup after reset?) Continue to paper over the problem by
> adding a small delay by waiting for the engine to idle between tests,
> and ensure that the engines are idle before starting the idle tests.
>
> References: 028666793a02 ("drm/i915/selftests: Avoid repeatedly harming the same innocent context")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> .../gpu/drm/i915/selftests/intel_hangcheck.c | 48 ++++++++++++++++++-
> 1 file changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 24f913f26a7b..7e23e6a6719c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -454,6 +454,11 @@ static int igt_global_reset(void *arg)
> return err;
> }
>
#define IGT_IDLE_TIMEOUT 50 ?
It should even fit within a line.
With or without that:
Reviewed-by: Michał Winiarski <michal.winiarski at intel.com>
-Michał
> +static bool wait_for_idle(struct intel_engine_cs *engine)
> +{
> + return wait_for(intel_engine_is_idle(engine), 50) == 0;
> +}
> +
> static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
> {
> struct intel_engine_cs *engine;
> @@ -481,6 +486,13 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
> if (active && !intel_engine_can_store_dword(engine))
> continue;
>
> + if (!wait_for_idle(engine)) {
> + pr_err("%s failed to idle before reset\n",
> + engine->name);
> + err = -EIO;
> + break;
> + }
> +
> reset_count = i915_reset_count(&i915->gpu_error);
> reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
> engine);
> @@ -542,6 +554,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
> err = -EINVAL;
> break;
> }
> +
> + if (!wait_for_idle(engine)) {
> + struct drm_printer p =
> + drm_info_printer(i915->drm.dev);
> +
> + pr_err("%s failed to idle after reset\n",
> + engine->name);
> + intel_engine_dump(engine, &p,
> + "%s\n", engine->name);
> +
> + err = -EIO;
> + break;
> + }
> } while (time_before(jiffies, end_time));
> clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>
> @@ -696,6 +721,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
> !intel_engine_can_store_dword(engine))
> continue;
>
> + if (!wait_for_idle(engine)) {
> + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
> + engine->name, test_name);
> + err = -EIO;
> + break;
> + }
> +
> memset(threads, 0, sizeof(threads));
> for_each_engine(other, i915, tmp) {
> struct task_struct *tsk;
> @@ -772,6 +804,20 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
> i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> i915_request_put(rq);
> }
> +
> + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
> + struct drm_printer p =
> + drm_info_printer(i915->drm.dev);
> +
> + pr_err("i915_reset_engine(%s:%s):"
> + " failed to idle after reset\n",
> + engine->name, test_name);
> + intel_engine_dump(engine, &p,
> + "%s\n", engine->name);
> +
> + err = -EIO;
> + break;
> + }
> } while (time_before(jiffies, end_time));
> clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> pr_info("i915_reset_engine(%s:%s): %lu resets\n",
> @@ -981,7 +1027,7 @@ static int wait_for_others(struct drm_i915_private *i915,
> if (engine == exclude)
> continue;
>
> - if (wait_for(intel_engine_is_idle(engine), 10))
> + if (!wait_for_idle(engine))
> return -EIO;
> }
>
> --
> 2.17.0
>
More information about the Intel-gfx
mailing list