[Intel-gfx] [PATCH] drm/i915/selftests: Add a safety net to live_workarounds
Mika Kuoppala
mika.kuoppala at linux.intel.com
Wed Jul 11 11:27:49 UTC 2018
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Since live_workarounds poke around the w/a registers and checks to see
> if they survive across a reset, we are prone to fouling the machine and
> leaving it in a non-recoverable state. Wrap the probe inside a timeout
> to abort the test if the reset fails.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107188
How can we tell it is not about just reset flakyness but
associated whitelist poking?
-Mika
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/selftests/igt_wedge_me.h | 48 +++++++++++++++++++
> .../drm/i915/selftests/intel_workarounds.c | 8 +++-
> 2 files changed, 55 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/i915/selftests/igt_wedge_me.h
>
> diff --git a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h
> new file mode 100644
> index 000000000000..d2518cf9a5c8
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h
> @@ -0,0 +1,48 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#ifndef IGT_WEDGE_ME_H
> +#define IGT_WEDGE_ME_H
> +
> +struct igt_wedge_me {
> + struct delayed_work work;
> + struct drm_i915_private *i915;
> + const char *name;
> +};
> +
> +static void __igt_wedge_me(struct work_struct *work)
> +{
> + struct igt_wedge_me *w = container_of(work, typeof(*w), work.work);
> +
> + pr_err("%s timed out, cancelling test.\n", w->name);
> + i915_gem_set_wedged(w->i915);
> +}
> +
> +static void __igt_init_wedge(struct igt_wedge_me *w,
> + struct drm_i915_private *i915,
> + long timeout,
> + const char *name)
> +{
> + w->i915 = i915;
> + w->name = name;
> +
> + INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me);
> + schedule_delayed_work(&w->work, timeout);
> +}
> +
> +static void __igt_fini_wedge(struct igt_wedge_me *w)
> +{
> + cancel_delayed_work_sync(&w->work);
> + destroy_delayed_work_on_stack(&w->work);
> + w->i915 = NULL;
> +}
> +
> +#define igt_wedge_on_timeout(W, DEV, TIMEOUT) \
> + for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__); \
> + (W)->i915; \
> + __igt_fini_wedge((W)))
> +
> +#endif /* IGT_WEDGE_ME_H */
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index fafdec3fe83e..0d39b3bf0c0d 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -6,6 +6,7 @@
>
> #include "../i915_selftest.h"
>
> +#include "igt_wedge_me.h"
> #include "mock_context.h"
>
> static struct drm_i915_gem_object *
> @@ -111,6 +112,7 @@ static int check_whitelist(const struct whitelist *w,
> struct intel_engine_cs *engine)
> {
> struct drm_i915_gem_object *results;
> + struct igt_wedge_me wedge;
> u32 *vaddr;
> int err;
> int i;
> @@ -119,7 +121,11 @@ static int check_whitelist(const struct whitelist *w,
> if (IS_ERR(results))
> return PTR_ERR(results);
>
> - err = i915_gem_object_set_to_cpu_domain(results, false);
> + err = 0;
> + igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
> + err = i915_gem_object_set_to_cpu_domain(results, false);
> + if (i915_terminally_wedged(&ctx->i915->gpu_error))
> + err = -EIO;
> if (err)
> goto out_put;
>
> --
> 2.18.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list