[Intel-gfx] [PATCH 1/7] drm/i915/selftests: Smoketest preemption

Tue Sep 25 10:24:53 UTC 2018

On 25/09/2018 11:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-25 10:39:06)
>>
>> On 25/09/2018 09:31, Chris Wilson wrote:
>>> Very light stress test to bombard the submission backends with a large
>>> stream with requests of randomly assigned priorities. Preemption will be
>>> occasionally requested, but never succeed!
>>
>> Why won't it ever succeed? By design or because test is targetting some bug?
> 
> There's no batch, and for all but a small window for arbitration between
> requests, we disallow preemption in the ring.

This described in the commit message would be good.

>>> v2: Include a second pattern with more frequent preemption
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Michał Winiarski <michal.winiarski at intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/selftests/intel_lrc.c | 137 +++++++++++++++++++++
>>>    1 file changed, 137 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> index 1aea7a8f2224..3a474bb64c05 100644
>>> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
>>> @@ -6,6 +6,7 @@
>>>    
>>>    #include "../i915_selftest.h"
>>>    #include "igt_flush_test.h"
>>> +#include "i915_random.h"
>>>    
>>>    #include "mock_context.h"
>>>    
>>> @@ -573,6 +574,141 @@ static int live_preempt_hang(void *arg)
>>>        return err;
>>>    }
>>>    
>>> +static int random_range(struct rnd_state *rnd, int min, int max)
>>> +{
>>> +     return i915_prandom_u32_max_state(max - min, rnd) + min;
>>> +}
>>> +
>>> +static int random_priority(struct rnd_state *rnd)
>>> +{
>>> +     return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
>>> +}
>>> +
>>> +struct preempt_smoke {
>>> +     struct drm_i915_private *i915;
>>> +     struct i915_gem_context **contexts;
>>> +     unsigned int ncontext;
>>> +     struct rnd_state prng;
>>> +};
>>> +
>>> +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
>>> +{
>>> +     return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
>>> +                                                       &smoke->prng)];
>>> +}
>>> +
>>> +static int smoke_crescendo(struct preempt_smoke *smoke)
>>> +{
>>> +     struct intel_engine_cs *engine;
>>> +     enum intel_engine_id id;
>>> +     unsigned long count;
>>> +
>>> +     count = 0;
>>> +     for_each_engine(engine, smoke->i915, id) {
>>> +             IGT_TIMEOUT(end_time);
>>> +
>>> +             do {
>>> +                     struct i915_gem_context *ctx = smoke_context(smoke);
>>> +                     struct i915_request *rq;
>>> +
>>> +                     ctx->sched.priority = count % I915_PRIORITY_MAX;
>>> +
>>> +                     rq = i915_request_alloc(engine, ctx);
>>> +                     if (IS_ERR(rq))
>>> +                             return PTR_ERR(rq);
>>> +
>>> +                     i915_request_add(rq);
>>> +                     count++;
>>> +             } while (!__igt_timeout(end_time, NULL));
>>> +     }
>>> +
>>> +     pr_info("Submitted %lu crescendo requests across %d engines and %d contexts\n",
>>> +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
>>> +     return 0;
>>> +}
>>> +
>>> +static int smoke_random(struct preempt_smoke *smoke)
>>> +{
>>> +     struct intel_engine_cs *engine;
>>> +     enum intel_engine_id id;
>>> +     IGT_TIMEOUT(end_time);
>>> +     unsigned long count;
>>> +
>>> +     count = 0;
>>> +     do {
>>> +             for_each_engine(engine, smoke->i915, id) {
>>> +                     struct i915_gem_context *ctx = smoke_context(smoke);
>>> +                     struct i915_request *rq;
>>> +
>>> +                     ctx->sched.priority = random_priority(&smoke->prng);
>>> +
>>> +                     rq = i915_request_alloc(engine, ctx);
>>> +                     if (IS_ERR(rq))
>>> +                             return PTR_ERR(rq);
>>> +
>>> +                     i915_request_add(rq);
>>> +                     count++;
>>> +             }
>>> +     } while (!__igt_timeout(end_time, NULL));
>>> +
>>> +     pr_info("Submitted %lu random requests across %d engines and %d contexts\n",
>>> +             count, INTEL_INFO(smoke->i915)->num_rings, smoke->ncontext);
>>> +     return 0;
>>> +}
>>
>> Merge smoke_crescendo and smoke_random into one which takes flags to
>> decide on priority assign policy, since that seems like the only difference?
> 
> The chaining along engines from the loop construct is a big difference.

True, I missed that when eyeballing it.

> 
>>> +static int live_preempt_smoke(void *arg)
>>> +{
>>> +     struct preempt_smoke smoke = {
>>> +             .i915 = arg,
>>> +             .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
>>> +             .ncontext = 1024,
>>> +     };
>>> +     int err = -ENOMEM;
>>> +     int n;
>>> +
>>> +     if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
>>> +             return 0;
>>> +
>>> +     smoke.contexts = kmalloc_array(smoke.ncontext,
>>> +                                    sizeof(*smoke.contexts),
>>> +                                    GFP_KERNEL);
>>> +     if (!smoke.contexts)
>>> +             return -ENOMEM;
>>> +
>>> +     mutex_lock(&smoke.i915->drm.struct_mutex);
>>> +     intel_runtime_pm_get(smoke.i915);
>>> +
>>> +     for (n = 0; n < smoke.ncontext; n++) {
>>> +             smoke.contexts[n] = kernel_context(smoke.i915);
>>> +             if (!smoke.contexts[n])
>>> +                     goto err_ctx;
>>
>> There isn't any request emission on context creation I think so here
>> could jump to a new label which only frees.
> 
> Sure. The flush gives peace of mind.
> 
>>> +     }
>>> +
>>> +     err = smoke_crescendo(&smoke);
>>> +     if (err)
>>> +             goto err_ctx;
>>
>> Sync/idle/flush between subtests?
> 
> Doesn't make much difference since it's all about trying to get the
> magic smoke to leak.
> 
>>> +     err = smoke_random(&smoke);
>>> +     if (err)
>>> +             goto err_ctx;
>>> +
>>> +err_ctx:
>>> +     if (igt_flush_test(smoke.i915, I915_WAIT_LOCKED))
>>> +             err = -EIO;
>>> +
>>> +     for (n = 0; n < smoke.ncontext; n++) {
>>> +             if (!smoke.contexts[n])
>>> +                     break;
>>
>> So GFP_ZERO or a post-clear is needed on the array. Or kcalloc.
> 
> Oh no it isn't as we never see the uninitialised entries. NULL
> indicates the error and time to abort.

True my bad.

In this case with a small update to the commit message:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko