[Intel-gfx] [PATCH] drm/i915/selftests: Exercise independence of per-engine resets
Michel Thierry
michel.thierry at intel.com
Thu Jul 6 17:15:59 UTC 2017
On 05/07/17 04:48, Chris Wilson wrote:
> If all goes well, resetting one engine should not affect the operation of
> any others. So to test this, we setup a continuous stream of requests
> onto to each of the "innocent" engines whilst constantly resetting our
> target engine.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
Reviewed-by: Michel Thierry <michel.thierry at intel.com>
> ---
> drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 165 +++++++++++++++++++++++
> drivers/gpu/drm/i915/selftests/mock_context.c | 8 ++
> drivers/gpu/drm/i915/selftests/mock_context.h | 3 +
> 3 files changed, 176 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 7096c3911cd3..dbfcb31ba9f4 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -22,8 +22,13 @@
> *
> */
>
> +#include <linux/kthread.h>
> +
> #include "../i915_selftest.h"
>
> +#include "mock_context.h"
> +#include "mock_drm.h"
> +
> struct hang {
> struct drm_i915_private *i915;
> struct drm_i915_gem_object *hws;
> @@ -372,6 +377,165 @@ static int igt_reset_engine(void *arg)
> return err;
> }
>
> +static int active_engine(void *data)
> +{
> + struct intel_engine_cs *engine = data;
> + struct drm_i915_gem_request *rq[2] = {};
> + struct i915_gem_context *ctx[2];
> + struct drm_file *file;
> + unsigned long count = 0;
> + int err = 0;
> +
> + file = mock_file(engine->i915);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + mutex_lock(&engine->i915->drm.struct_mutex);
> + ctx[0] = live_context(engine->i915, file);
> + mutex_unlock(&engine->i915->drm.struct_mutex);
> + if (IS_ERR(ctx[0])) {
> + err = PTR_ERR(ctx[0]);
> + goto err_file;
> + }
> +
> + mutex_lock(&engine->i915->drm.struct_mutex);
> + ctx[1] = live_context(engine->i915, file);
> + mutex_unlock(&engine->i915->drm.struct_mutex);
> + if (IS_ERR(ctx[1])) {
> + err = PTR_ERR(ctx[1]);
> + i915_gem_context_put(ctx[0]);
> + goto err_file;
> + }
> +
> + while (!kthread_should_stop()) {
> + unsigned int idx = count++ & 1;
> + struct drm_i915_gem_request *old = rq[idx];
> + struct drm_i915_gem_request *new;
> +
> + mutex_lock(&engine->i915->drm.struct_mutex);
> + new = i915_gem_request_alloc(engine, ctx[idx]);
> + if (IS_ERR(new)) {
> + mutex_unlock(&engine->i915->drm.struct_mutex);
> + err = PTR_ERR(new);
> + break;
> + }
> +
> + rq[idx] = i915_gem_request_get(new);
> + i915_add_request(new);
> + mutex_unlock(&engine->i915->drm.struct_mutex);
> +
> + if (old) {
> + i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
> + i915_gem_request_put(old);
> + }
> + }
> +
> + for (count = 0; count < ARRAY_SIZE(rq); count++)
> + i915_gem_request_put(rq[count]);
> +
> +err_file:
> + mock_file_free(engine->i915, file);
> + return err;
> +}
> +
> +static int igt_reset_active_engines(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *engine, *active;
> + enum intel_engine_id id, tmp;
> + int err = 0;
> +
> + /* Check that issuing a reset on one engine does not interfere
> + * with any other engine.
> + */
> +
> + if (!intel_has_reset_engine(i915))
> + return 0;
> +
> + for_each_engine(engine, i915, id) {
> + struct task_struct *threads[I915_NUM_ENGINES];
> + unsigned long resets[I915_NUM_ENGINES];
> + unsigned long global = i915_reset_count(&i915->gpu_error);
> + IGT_TIMEOUT(end_time);
> +
> + memset(threads, 0, sizeof(threads));
> + for_each_engine(active, i915, tmp) {
> + struct task_struct *tsk;
> +
> + if (active == engine)
> + continue;
> +
> + resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
> + active);
> +
> + tsk = kthread_run(active_engine, active,
> + "igt/%s", active->name);
> + if (IS_ERR(tsk)) {
> + err = PTR_ERR(tsk);
> + goto unwind;
> + }
> +
> + threads[tmp] = tsk;
> + get_task_struct(tsk);
> +
> + }
> +
> + set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
> + do {
> + err = i915_reset_engine(engine);
> + if (err) {
> + pr_err("i915_reset_engine(%s) failed, err=%d\n",
> + engine->name, err);
> + break;
> + }
> + } while (time_before(jiffies, end_time));
> + clear_bit(I915_RESET_ENGINE + engine->id,
> + &i915->gpu_error.flags);
> +
> +unwind:
> + for_each_engine(active, i915, tmp) {
> + int ret;
> +
> + if (!threads[tmp])
> + continue;
> +
> + ret = kthread_stop(threads[tmp]);
> + if (ret) {
> + pr_err("kthread for active engine %s failed, err=%d\n",
> + active->name, ret);
> + if (!err)
> + err = ret;
> + }
> + put_task_struct(threads[tmp]);
> +
> + if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
> + active)) {
> + pr_err("Innocent engine %s was reset (count=%ld)\n",
> + active->name,
> + i915_reset_engine_count(&i915->gpu_error,
> + active) - resets[tmp]);
> + err = -EIO;
> + }
> + }
> +
> + if (global != i915_reset_count(&i915->gpu_error)) {
> + pr_err("Global reset (count=%ld)!\n",
> + i915_reset_count(&i915->gpu_error) - global);
> + err = -EIO;
> + }
> +
> + if (err)
> + break;
> +
> + cond_resched();
> + }
> +
> + if (i915_terminally_wedged(&i915->gpu_error))
> + err = -EIO;
> +
> + return err;
> +}
> +
> static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
> {
> u32 reset_count;
> @@ -689,6 +853,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
> SUBTEST(igt_hang_sanitycheck),
> SUBTEST(igt_global_reset),
> SUBTEST(igt_reset_engine),
> + SUBTEST(igt_reset_active_engines),
> SUBTEST(igt_wait_reset),
> SUBTEST(igt_reset_queue),
> SUBTEST(igt_render_engine_reset_fallback),
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 9c7c68181f82..d436f2d5089b 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -95,3 +95,11 @@ void mock_init_contexts(struct drm_i915_private *i915)
> INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
> init_llist_head(&i915->contexts.free_list);
> }
> +
> +struct i915_gem_context *
> +live_context(struct drm_i915_private *i915, struct drm_file *file)
> +{
> + lockdep_assert_held(&i915->drm.struct_mutex);
> +
> + return i915_gem_create_context(i915, file->driver_priv);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
> index 383941a61124..2f432c03d413 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.h
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.h
> @@ -33,4 +33,7 @@ mock_context(struct drm_i915_private *i915,
>
> void mock_context_close(struct i915_gem_context *ctx);
>
> +struct i915_gem_context *
> +live_context(struct drm_i915_private *i915, struct drm_file *file);
> +
> #endif /* !__MOCK_CONTEXT_H */
>
More information about the Intel-gfx
mailing list