[Intel-gfx] [PATCH] drm/i915/selftests: Wait for idle between idle resets as well

Michał Winiarski michal.winiarski at intel.com
Thu Apr 26 16:21:39 UTC 2018


On Wed, Apr 11, 2018 at 01:03:46PM +0100, Chris Wilson wrote:
> Even though we weren't injecting guilty requests to be reset, we could
> still fall over the issue of resetting the same request too fast -- where
> the GPU refuses to start again. (Although it is interesting to note that
> reloading the driver is sufficient, suggesting that we could recover if
> we delayed the setup after reset?) Continue to paper over the problem by
> adding a small delay by waiting for the engine to idle between tests,
> and ensure that the engines are idle before starting the idle tests.
> 
> References: 028666793a02 ("drm/i915/selftests: Avoid repeatedly harming the same innocent context")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  | 48 ++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 24f913f26a7b..7e23e6a6719c 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -454,6 +454,11 @@ static int igt_global_reset(void *arg)
>  	return err;
>  }
>  

#define IGT_IDLE_TIMEOUT 50 ?
It should even fit within a line.

With or without that:

Reviewed-by: Michał Winiarski <michal.winiarski at intel.com>

-Michał

> +static bool wait_for_idle(struct intel_engine_cs *engine)
> +{
> +	return wait_for(intel_engine_is_idle(engine), 50) == 0;
> +}
> +
>  static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  {
>  	struct intel_engine_cs *engine;
> @@ -481,6 +486,13 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  		if (active && !intel_engine_can_store_dword(engine))
>  			continue;
>  
> +		if (!wait_for_idle(engine)) {
> +			pr_err("%s failed to idle before reset\n",
> +			       engine->name);
> +			err = -EIO;
> +			break;
> +		}
> +
>  		reset_count = i915_reset_count(&i915->gpu_error);
>  		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
>  							     engine);
> @@ -542,6 +554,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  				err = -EINVAL;
>  				break;
>  			}
> +
> +			if (!wait_for_idle(engine)) {
> +				struct drm_printer p =
> +					drm_info_printer(i915->drm.dev);
> +
> +				pr_err("%s failed to idle after reset\n",
> +				       engine->name);
> +				intel_engine_dump(engine, &p,
> +						  "%s\n", engine->name);
> +
> +				err = -EIO;
> +				break;
> +			}
>  		} while (time_before(jiffies, end_time));
>  		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>  
> @@ -696,6 +721,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>  		    !intel_engine_can_store_dword(engine))
>  			continue;
>  
> +		if (!wait_for_idle(engine)) {
> +			pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
> +			       engine->name, test_name);
> +			err = -EIO;
> +			break;
> +		}
> +
>  		memset(threads, 0, sizeof(threads));
>  		for_each_engine(other, i915, tmp) {
>  			struct task_struct *tsk;
> @@ -772,6 +804,20 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>  				i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
>  				i915_request_put(rq);
>  			}
> +
> +			if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
> +				struct drm_printer p =
> +					drm_info_printer(i915->drm.dev);
> +
> +				pr_err("i915_reset_engine(%s:%s):"
> +				       " failed to idle after reset\n",
> +				       engine->name, test_name);
> +				intel_engine_dump(engine, &p,
> +						  "%s\n", engine->name);
> +
> +				err = -EIO;
> +				break;
> +			}
>  		} while (time_before(jiffies, end_time));
>  		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>  		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
> @@ -981,7 +1027,7 @@ static int wait_for_others(struct drm_i915_private *i915,
>  		if (engine == exclude)
>  			continue;
>  
> -		if (wait_for(intel_engine_is_idle(engine), 10))
> +		if (!wait_for_idle(engine))
>  			return -EIO;
>  	}
>  
> -- 
> 2.17.0
> 


More information about the Intel-gfx mailing list