[Intel-gfx] [PATCH 17/20] drm/i915/selftest: Refactor reset locking
Michel Thierry
michel.thierry at intel.com
Mon Jul 24 19:25:52 UTC 2017
On 7/21/2017 5:32 AM, Chris Wilson wrote:
> Extract the common barrier against rogue hangchecks from disrupting our
> direct testing of resets, and in the process expand the lock to include
> the per-engine reset shortcuts.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
I don't know how you trigger that, but
Reviewed-by: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 58 ++++++++++++++++++------
> 1 file changed, 43 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index dbfcb31ba9f4..3ece564e4d96 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -297,6 +297,37 @@ static int igt_hang_sanitycheck(void *arg)
> return err;
> }
>
> +static void global_reset_lock(struct drm_i915_private *i915)
> +{
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> +
> + while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
> + wait_event(i915->gpu_error.reset_queue,
> + !test_bit(I915_RESET_BACKOFF,
> + &i915->gpu_error.flags));
> +
> + for_each_engine(engine, i915, id) {
> + while (test_and_set_bit(I915_RESET_ENGINE + id,
> + &i915->gpu_error.flags))
> + wait_on_bit(&i915->gpu_error.flags,
> + I915_RESET_ENGINE + id,
> + TASK_UNINTERRUPTIBLE);
> + }
> +}
> +
> +static void global_reset_unlock(struct drm_i915_private *i915)
> +{
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> +
> + for_each_engine(engine, i915, id)
> + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> +
> + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + wake_up_all(&i915->gpu_error.reset_queue);
> +}
> +
> static int igt_global_reset(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -305,7 +336,7 @@ static int igt_global_reset(void *arg)
>
> /* Check that we can issue a global GPU reset */
>
> - set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + global_reset_lock(i915);
> set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
>
> mutex_lock(&i915->drm.struct_mutex);
> @@ -320,8 +351,7 @@ static int igt_global_reset(void *arg)
> mutex_unlock(&i915->drm.struct_mutex);
>
> GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + global_reset_unlock(i915);
>
> if (i915_terminally_wedged(&i915->gpu_error))
> err = -EIO;
> @@ -572,7 +602,7 @@ static int igt_wait_reset(void *arg)
>
> /* Check that we detect a stuck waiter and issue a reset */
>
> - set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + global_reset_lock(i915);
>
> mutex_lock(&i915->drm.struct_mutex);
> err = hang_init(&h, i915);
> @@ -617,8 +647,7 @@ static int igt_wait_reset(void *arg)
> hang_fini(&h);
> unlock:
> mutex_unlock(&i915->drm.struct_mutex);
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + global_reset_unlock(i915);
>
> if (i915_terminally_wedged(&i915->gpu_error))
> return -EIO;
> @@ -639,7 +668,8 @@ static int igt_reset_queue(void *arg)
> if (!igt_can_mi_store_dword_imm(i915))
> return 0;
>
> - set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + global_reset_lock(i915);
> +
> mutex_lock(&i915->drm.struct_mutex);
> err = hang_init(&h, i915);
> if (err)
> @@ -733,8 +763,7 @@ static int igt_reset_queue(void *arg)
> hang_fini(&h);
> unlock:
> mutex_unlock(&i915->drm.struct_mutex);
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + global_reset_unlock(i915);
>
> if (i915_terminally_wedged(&i915->gpu_error))
> return -EIO;
> @@ -756,7 +785,8 @@ static int igt_render_engine_reset_fallback(void *arg)
> if (!intel_has_reset_engine(i915))
> return 0;
>
> - set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + global_reset_lock(i915);
> +
> mutex_lock(&i915->drm.struct_mutex);
>
> err = hang_init(&h, i915);
> @@ -786,8 +816,7 @@ static int igt_render_engine_reset_fallback(void *arg)
>
> /* unlock since we'll call handle_error */
> mutex_unlock(&i915->drm.struct_mutex);
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + global_reset_unlock(i915);
>
> i915_handle_error(i915, intel_engine_flag(engine), "live test");
>
> @@ -809,7 +838,7 @@ static int igt_render_engine_reset_fallback(void *arg)
> * more full reset to re-enable the hw.
> */
> if (i915_terminally_wedged(&i915->gpu_error)) {
> - set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> + global_reset_lock(i915);
> rq->fence.error = 0;
>
> mutex_lock(&i915->drm.struct_mutex);
> @@ -830,8 +859,7 @@ static int igt_render_engine_reset_fallback(void *arg)
> i915_gem_request_put(rq);
> hang_fini(&h);
> out_backoff:
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + global_reset_unlock(i915);
>
> if (i915_terminally_wedged(&i915->gpu_error))
> return -EIO;
>
More information about the Intel-gfx
mailing list