[Intel-gfx] [PATCH] drm/i915/selftests/blt: add some kthreads into the mix

Fri Oct 25 11:36:37 UTC 2019

Quoting Matthew Auld (2019-10-25 12:24:42)
> We can be more aggressive in our testing by launching a number of
> kthreads, where each is submitting its own copy or fill batches on a set
> of random sized objects. Also since the underlying fill and copy batches
> can be pre-empted mid-batch(for particularly large objects), throw in a
> random mixture of ctx priorities per thread to make pre-emption a
> possibility.
> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  .../i915/gem/selftests/i915_gem_object_blt.c  | 144 +++++++++++++++---
>  1 file changed, 121 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
> index 9ec55b3a3815..41e0bd6a175c 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
> @@ -7,34 +7,53 @@
>  
>  #include "i915_selftest.h"
>  
> +#include "gem/i915_gem_context.h"
>  #include "selftests/igt_flush_test.h"
> +#include "selftests/i915_random.h"
>  #include "selftests/mock_drm.h"
>  #include "huge_gem_object.h"
>  #include "mock_context.h"
>  
> -static int igt_fill_blt(void *arg)
> +struct igt_thread_arg {
> +       struct drm_i915_private *i915;
> +       struct rnd_state *prng;
> +};
> +
> +static int igt_fill_blt_thread(void *arg)
>  {
> -       struct drm_i915_private *i915 = arg;
> -       struct intel_context *ce = i915->engine[BCS0]->kernel_context;
> +       struct igt_thread_arg *thread = arg;
> +       struct drm_i915_private *i915 = thread->i915;
> +       struct rnd_state *prng = thread->prng;
>         struct drm_i915_gem_object *obj;
> -       struct rnd_state prng;
> +       struct i915_gem_context *ctx;
> +       struct intel_context *ce;
> +       struct drm_file *file;
> +       unsigned int prio;
>         IGT_TIMEOUT(end);
> -       u32 *vaddr;
> -       int err = 0;
> +       int err;
> +
> +       file = mock_file(i915);
> +       if (IS_ERR(file))
> +               return PTR_ERR(file);
> +
> +       ctx = live_context(i915, file);
> +       if (IS_ERR(ctx)) {
> +               err = PTR_ERR(ctx);
> +               goto out_file;
> +       }
>  
> -       prandom_seed_state(&prng, i915_selftest.random_seed);
> +       prio = prandom_u32_state(prng) % I915_PRIORITY_MAX;
> +       ctx->sched.priority = I915_USER_PRIORITY(prio);
>  
> -       /*
> -        * XXX: needs some threads to scale all these tests, also maybe throw
> -        * in submission from higher priority context to see if we are
> -        * preempted for very large objects...
> -        */
> +       ce = i915_gem_context_get_engine(ctx, BCS0);
> +       GEM_BUG_ON(IS_ERR(ce));
>  
>         do {
>                 const u32 max_block_size = S16_MAX * PAGE_SIZE;
> -               u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
> +               u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(prng));
>                 u32 phys_sz = sz % (max_block_size + 1);
> -               u32 val = prandom_u32_state(&prng);
> +               u32 val = prandom_u32_state(prng);
> +               u32 *vaddr;
>                 u32 i;
>  
>                 sz = round_up(sz, PAGE_SIZE);
> @@ -98,26 +117,47 @@ static int igt_fill_blt(void *arg)
>         if (err == -ENOMEM)
>                 err = 0;
>  
> +       intel_context_put(ce);
> +out_file:
> +       mock_file_free(i915, file);
>         return err;
>  }
>  
> -static int igt_copy_blt(void *arg)
> +static int igt_copy_blt_thread(void *arg)
>  {
> -       struct drm_i915_private *i915 = arg;
> -       struct intel_context *ce = i915->engine[BCS0]->kernel_context;
> +       struct igt_thread_arg *thread = arg;
> +       struct drm_i915_private *i915 = thread->i915;
> +       struct rnd_state *prng = thread->prng;
>         struct drm_i915_gem_object *src, *dst;
> -       struct rnd_state prng;
> +       struct i915_gem_context *ctx;
> +       struct intel_context *ce;
> +       struct drm_file *file;
> +       unsigned int prio;
>         IGT_TIMEOUT(end);
> -       u32 *vaddr;
> -       int err = 0;
> +       int err;
> +
> +       file = mock_file(i915);
> +       if (IS_ERR(file))
> +               return PTR_ERR(file);
> +
> +       ctx = live_context(i915, file);
> +       if (IS_ERR(ctx)) {
> +               err = PTR_ERR(ctx);
> +               goto out_file;
> +       }
>  
> -       prandom_seed_state(&prng, i915_selftest.random_seed);
> +       prio = prandom_u32_state(prng) % I915_PRIORITY_MAX;

prio = i915_prandom_u32_max_state(prng, I915_PRIORITY_MAX);

Usual prng dilemma of high bits being more random than low bits, and it
avoids the divide. (Nice trick to remember :)

> +       ctx->sched.priority = I915_USER_PRIORITY(prio);
> +
> +       ce = i915_gem_context_get_engine(ctx, BCS0);
> +       GEM_BUG_ON(IS_ERR(ce));
>  
>         do {
>                 const u32 max_block_size = S16_MAX * PAGE_SIZE;
> -               u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
> +               u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(prng));
>                 u32 phys_sz = sz % (max_block_size + 1);
> -               u32 val = prandom_u32_state(&prng);
> +               u32 val = prandom_u32_state(prng);
> +               u32 *vaddr;
>                 u32 i;
>  
>                 sz = round_up(sz, PAGE_SIZE);
> @@ -201,9 +241,67 @@ static int igt_copy_blt(void *arg)
>         if (err == -ENOMEM)
>                 err = 0;
>  
> +       intel_context_put(ce);
> +out_file:
> +       mock_file_free(i915, file);
> +       return err;
> +}
> +
> +static int igt_threaded_blt(struct drm_i915_private *i915,
> +                           int (*blt_fn)(void *arg))
> +{
> +
> +       I915_RND_STATE(prng);
> +       struct igt_thread_arg thread = { i915, &prng, };

Each thread is using the same prng state then?

Bah. It's not thread safe so would lead to non-deterministic
behaviour. At least it's not every thread using the exact same sequence.

thread = kmalloc_array(n_cpus, sizeof(*thread), GFP_KERNEL);
for(...) {
	thread[i]->prng = I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));

I suspect we may end up pretifying that as I expect we'll do more and
more parallelised smoketesting.

> +       struct task_struct **tsk;
> +       unsigned int n_cpus;
> +       unsigned int i;
> +       int err = 0;
> +
> +       n_cpus = num_online_cpus() + 1;
> +
> +       tsk = kzalloc(n_cpus * sizeof(struct task_struct *), GFP_KERNEL);
> +       if (!tsk)
> +               return 0;
> +
> +       for (i = 0; i < n_cpus; ++i) {
> +               tsk[i] = kthread_run(blt_fn, &thread, "igt/blt-%d", i);
> +               if (IS_ERR(tsk[i])) {
> +                       err = PTR_ERR(tsk[i]);
> +                       break;
> +               }
> +
> +               get_task_struct(tsk[i]);
> +       }
> +
> +       for (i = 0; i < n_cpus; ++i) {
> +               int status;
> +
> +               if (IS_ERR_OR_NULL(tsk[i]))
> +                       continue;
> +
> +               status = kthread_stop(tsk[i]);
> +               if (status && !err)
> +                       err = status;

Looks good.

> +               put_task_struct(tsk[i]);
> +       }
> +

I think per-thread deterministic "random" behaviour is critical. But
that's the only drawback spotted, so with that resolved one way or
another,
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris