[Intel-gfx] [PATCH v2] drm/i915/selftests: Check for hangs mid context execution tests

Chris Wilson chris at chris-wilson.co.uk
Fri Oct 26 11:44:32 UTC 2018


Quoting Tvrtko Ursulin (2018-10-26 12:14:31)
> 
> On 12/10/2018 13:24, Chris Wilson wrote:
> > Use the live_test struct to record the reset count before and compare it
> > at the end of the test to assert that no mystery hang occurred during the
> > test.
> > 
> > v2: Check per-engine resets as well
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> > ---
> >   .../gpu/drm/i915/selftests/i915_gem_context.c | 51 +++++++++++++++----
> >   1 file changed, 41 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > index 913c0f83f86a..1be3b67a7c48 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > @@ -39,7 +39,8 @@ struct live_test {
> >       const char *func;
> >       const char *name;
> >   
> > -     unsigned int reset_count;
> > +     unsigned int reset_global;
> > +     unsigned int reset_engine[I915_NUM_ENGINES];
> >   };
> >   
> >   static int begin_live_test(struct live_test *t,
> > @@ -47,6 +48,8 @@ static int begin_live_test(struct live_test *t,
> >                          const char *func,
> >                          const char *name)
> >   {
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> >       int err;
> >   
> >       t->i915 = i915;
> > @@ -63,7 +66,11 @@ static int begin_live_test(struct live_test *t,
> >       }
> >   
> >       i915->gpu_error.missed_irq_rings = 0;
> > -     t->reset_count = i915_reset_count(&i915->gpu_error);
> > +     t->reset_global = i915_reset_count(&i915->gpu_error);
> > +
> > +     for_each_engine(engine, i915, id)
> > +             t->reset_engine[id] =
> > +                     i915_reset_engine_count(&i915->gpu_error, engine);
> >   
> >       return 0;
> >   }
> > @@ -71,14 +78,28 @@ static int begin_live_test(struct live_test *t,
> >   static int end_live_test(struct live_test *t)
> >   {
> >       struct drm_i915_private *i915 = t->i915;
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> >   
> >       if (igt_flush_test(i915, I915_WAIT_LOCKED))
> >               return -EIO;
> >   
> > -     if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
> > +     if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
> >               pr_err("%s(%s): GPU was reset %d times!\n",
> >                      t->func, t->name,
> > -                    i915_reset_count(&i915->gpu_error) - t->reset_count);
> > +                    i915_reset_count(&i915->gpu_error) - t->reset_global);
> > +             return -EIO;
> > +     }
> > +
> > +     for_each_engine(engine, i915, id) {
> > +             if (t->reset_engine[id] ==
> > +                 i915_reset_engine_count(&i915->gpu_error, engine))
> > +                     continue;
> > +
> > +             pr_err("%s(%s): engine '%s' was reset %d times!\n",
> > +                    t->func, t->name, engine->name,
> > +                    i915_reset_engine_count(&i915->gpu_error, engine) -
> > +                    t->reset_engine[id]);
> >               return -EIO;
> >       }
> >   
> > @@ -531,10 +552,11 @@ static int igt_ctx_exec(void *arg)
> >   {
> >       struct drm_i915_private *i915 = arg;
> >       struct drm_i915_gem_object *obj = NULL;
> > +     unsigned long ncontexts, ndwords, dw;
> >       struct drm_file *file;
> >       IGT_TIMEOUT(end_time);
> >       LIST_HEAD(objects);
> > -     unsigned long ncontexts, ndwords, dw;
> > +     struct live_test t;
> >       int err = -ENODEV;
> >   
> >       /*
> > @@ -552,6 +574,10 @@ static int igt_ctx_exec(void *arg)
> >   
> >       mutex_lock(&i915->drm.struct_mutex);
> >   
> > +     err = begin_live_test(&t, i915, __func__, "");
> > +     if (err)
> > +             goto out_unlock;
> > +
> >       ncontexts = 0;
> >       ndwords = 0;
> >       dw = 0;
> > @@ -616,7 +642,7 @@ static int igt_ctx_exec(void *arg)
> >       }
> >   
> >   out_unlock:
> > -     if (igt_flush_test(i915, I915_WAIT_LOCKED))
> > +     if (end_live_test(&t))
> >               err = -EIO;
> >       mutex_unlock(&i915->drm.struct_mutex);
> >   
> > @@ -628,13 +654,14 @@ static int igt_ctx_readonly(void *arg)
> >   {
> >       struct drm_i915_private *i915 = arg;
> >       struct drm_i915_gem_object *obj = NULL;
> > +     struct i915_gem_context *ctx;
> > +     struct i915_hw_ppgtt *ppgtt;
> > +     unsigned long ndwords, dw;
> >       struct drm_file *file;
> >       I915_RND_STATE(prng);
> >       IGT_TIMEOUT(end_time);
> >       LIST_HEAD(objects);
> > -     struct i915_gem_context *ctx;
> > -     struct i915_hw_ppgtt *ppgtt;
> > -     unsigned long ndwords, dw;
> > +     struct live_test t;
> >       int err = -ENODEV;
> >   
> >       /*
> > @@ -649,6 +676,10 @@ static int igt_ctx_readonly(void *arg)
> >   
> >       mutex_lock(&i915->drm.struct_mutex);
> >   
> > +     err = begin_live_test(&t, i915, __func__, "");
> > +     if (err)
> > +             goto out_unlock;
> > +
> >       ctx = i915_gem_create_context(i915, file->driver_priv);
> >       if (IS_ERR(ctx)) {
> >               err = PTR_ERR(ctx);
> > @@ -721,7 +752,7 @@ static int igt_ctx_readonly(void *arg)
> >       }
> >   
> >   out_unlock:
> > -     if (igt_flush_test(i915, I915_WAIT_LOCKED))
> > +     if (end_live_test(&t))
> >               err = -EIO;
> >       mutex_unlock(&i915->drm.struct_mutex);
> >   
> > 
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Ta, with any luck this will cause a deluge of bug reports about icl
being broken.
-Chris


More information about the Intel-gfx mailing list