[Intel-gfx] [PATCH 1/7] drm/i915/selftests: Smoketest preemption

Tue Sep 25 10:17:04 UTC 2018

Quoting Tvrtko Ursulin (2018-09-25 10:39:06)
> 
> On 25/09/2018 09:31, Chris Wilson wrote:
> > Very light stress test to bombard the submission backends with a large
> > stream with requests of randomly assigned priorities. Preemption will be
> > occasionally requested, but never succeed!
> 
> Why won't it ever succeed? By design or because test is targetting some bug?

There's no batch, and for all but a small window for arbitration between
requests, we disallow preemption in the ring.

> > v2: Include a second pattern with more frequent preemption
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski at intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> >   drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
> >   1 file changed, 137 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > index 1aea7a8f2224..3a474bb64c05 100644
> > --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > @@ -6,6 +6,7 @@
> >   
> >   #include "../i915_selftest.h"
> >   #include "igt_flush_test.h"
> > +#include "i915_random.h"
> >   
> >   #include "mock_context.h"
> >   
> > @@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
> >       return err;
> >   }
> >   
> > +static int random_range(struct rnd_state *rnd, int min, int max)
> > +{
> > +     return i915_prandom_u32_max_state(max - min, rnd) + min;
> > +}
> > +
> > +static int random_priority(struct rnd_state *rnd)
> > +{
> > +     return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
> > +}
> > +
> > +struct preempt_smoke {
> > +     struct drm_i915_private *i915;
> > +     struct i915_gem_context **contexts;
> > +     unsigned int ncontext;
> > +     struct rnd_state prng;
> > +};
> > +
> > +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
> > +{
> > +     return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
> > +                                                       &smoke->prng)];
> > +}
> > +
> > +static int smoke_crescendo(struct preempt_smoke *smoke)
> > +{
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     unsigned long count;
> > +
> > +     count = 0;
> > +     for_each_engine(engine, smoke->i915, id) {
> > +             IGT_TIMEOUT(end_time);
> > +
> > +             do {
> > +                     struct i915_gem_context *ctx = smoke_context(smoke);
> > +                     struct i915_request *rq;
> > +
> > +                     ctx->sched.priority = count % I915_PRIORITY_MAX;
> > +
> > +                     rq = i915_request_alloc(engine, ctx);
> > +                     if (IS_ERR(rq))
> > +                             return PTR_ERR(rq);
> > +
> > +                     i915_request_add(rq);
> > +                     count++;
> > +             } while (!__igt_timeout(end_time, NULL));
> > +     }
> > +
> > +     pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
> > +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> > +     return 0;
> > +}
> > +
> > +static int smoke_random(struct preempt_smoke *smoke)
> > +{
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned long count;
> > +
> > +     count = 0;
> > +     do {
> > +             for_each_engine(engine, smoke->i915, id) {
> > +                     struct i915_gem_context *ctx = smoke_context(smoke);
> > +                     struct i915_request *rq;
> > +
> > +                     ctx->sched.priority = random_priority(&smoke->prng);
> > +
> > +                     rq = i915_request_alloc(engine, ctx);
> > +                     if (IS_ERR(rq))
> > +                             return PTR_ERR(rq);
> > +
> > +                     i915_request_add(rq);
> > +                     count++;
> > +             }
> > +     } while (!__igt_timeout(end_time, NULL));
> > +
> > +     pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
> > +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
> > +     return 0;
> > +}
> 
> Merge smoke_crescendo and smoke_random into one which takes flags to 
> decide on priority assign policy, since that seems like the only difference?

The chaining along engines from the loop construct is a big difference.

> > +static int live_preempt_smoke(void *arg)
> > +{
> > +     struct preempt_smoke smoke = {
> > +             .i915 = arg,
> > +             .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
> > +             .ncontext = 1024,
> > +     };
> > +     int err = -ENOMEM;
> > +     int n;
> > +
> > +     if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
> > +             return 0;
> > +
> > +     smoke.contexts = kmalloc_array(smoke.ncontext,
> > +                                    sizeof(*smoke.contexts),
> > +                                    GFP_KERNEL);
> > +     if (!smoke.contexts)
> > +             return -ENOMEM;
> > +
> > +     mutex_lock(&smoke.i915->drm.struct_mutex);
> > +     intel_runtime_pm_get(smoke.i915);
> > +
> > +     for (n = 0; n < smoke.ncontext; n++) {
> > +             smoke.contexts[n] = kernel_context(smoke.i915);
> > +             if (!smoke.contexts[n])
> > +                     goto err_ctx;
> 
> There isn't any request emission on context creation I think so here 
> could jump to a new label which only frees.

Sure. The flush gives peace of mind.

> > +     }
> > +
> > +     err = smoke_crescendo(&smoke);
> > +     if (err)
> > +             goto err_ctx;
> 
> Sync/idle/flush between subtests?

Doesn't make much difference since it's all about trying to get the
magic smoke to leak.

> > +     err = smoke_random(&smoke);
> > +     if (err)
> > +             goto err_ctx;
> > +
> > +err_ctx:
> > +     if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
> > +             err = -EIO;
> > +
> > +     for (n = 0; n < smoke.ncontext; n++) {
> > +             if (!smoke.contexts[n])
> > +                     break;
> 
> So GFP_ZERO or a post-clear is needed on the array. Or kcalloc.

Oh no it isn't as we never see the uninitialised entries. NULL
indicates the error and time to abort.
-Chris