[Intel-gfx] [PATCH] drm/i915/selftests: Teach requests to use all available engines
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Oct 17 16:54:55 UTC 2019
On 16/10/2019 13:52, Chris Wilson wrote:
> The request selftests straddle the boundary between checking the driver
> and the hardware. They are subject to the quirks of the underlying HW,
> but operate on top of the backend abstractions. The tests focus on the
> scheduler elements and so should check for interactions of the scheduler
> across all exposed engines.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/selftests/i915_request.c | 276 +++++++++++-------
> 1 file changed, 170 insertions(+), 106 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 0897a7b04944..b95a0e8431ab 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -37,6 +37,18 @@
> #include "mock_drm.h"
> #include "mock_gem_device.h"
>
> +static unsigned int num_uabi_engines(struct drm_i915_private *i915)
> +{
> + struct intel_engine_cs *engine;
> + unsigned int count;
> +
> + count = 0;
> + for_each_uabi_engine(engine, i915)
> + count++;
> +
> + return count;
> +}
> +
> static int igt_add_request(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -511,15 +523,15 @@ static int live_nop_request(void *arg)
> struct drm_i915_private *i915 = arg;
> struct intel_engine_cs *engine;
> struct igt_live_test t;
> - unsigned int id;
> int err = -ENODEV;
>
> - /* Submit various sized batches of empty requests, to each engine
> + /*
> + * Submit various sized batches of empty requests, to each engine
> * (individually), and wait for the batch to complete. We can check
> * the overhead of submitting requests to the hardware.
> */
>
> - for_each_engine(engine, i915, id) {
> + for_each_uabi_engine(engine, i915) {
> unsigned long n, prime;
> IGT_TIMEOUT(end_time);
> ktime_t times[2] = {};
> @@ -539,7 +551,8 @@ static int live_nop_request(void *arg)
> if (IS_ERR(request))
> return PTR_ERR(request);
>
> - /* This space is left intentionally blank.
> + /*
> + * This space is left intentionally blank.
> *
> * We do not actually want to perform any
> * action with this request, we just want
> @@ -657,10 +670,10 @@ static int live_empty_request(void *arg)
> struct intel_engine_cs *engine;
> struct igt_live_test t;
> struct i915_vma *batch;
> - unsigned int id;
> int err = 0;
>
> - /* Submit various sized batches of empty requests, to each engine
> + /*
> + * Submit various sized batches of empty requests, to each engine
> * (individually), and wait for the batch to complete. We can check
> * the overhead of submitting requests to the hardware.
> */
> @@ -669,7 +682,7 @@ static int live_empty_request(void *arg)
> if (IS_ERR(batch))
> return PTR_ERR(batch);
>
> - for_each_engine(engine, i915, id) {
> + for_each_uabi_engine(engine, i915) {
> IGT_TIMEOUT(end_time);
> struct i915_request *request;
> unsigned long n, prime;
> @@ -801,63 +814,73 @@ static int recursive_batch_resolve(struct i915_vma *batch)
> static int live_all_engines(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> + const unsigned int nengines = num_uabi_engines(i915);
> struct intel_engine_cs *engine;
> - struct i915_request *request[I915_NUM_ENGINES];
> + struct i915_request **request;
> struct igt_live_test t;
> struct i915_vma *batch;
> - unsigned int id;
> + unsigned int idx;
> int err;
>
> - /* Check we can submit requests to all engines simultaneously. We
> + /*
> + * Check we can submit requests to all engines simultaneously. We
> * send a recursive batch to each engine - checking that we don't
> * block doing so, and that they don't complete too soon.
> */
>
> + request = kmalloc_array(nengines, sizeof(*request), GFP_KERNEL);
__GFP_ZERO as live_sequential for error unwind to work, I think.
> + if (!request)
> + return -ENOMEM;
> +
> err = igt_live_test_begin(&t, i915, __func__, "");
> if (err)
> - return err;
> + goto out_free;
>
> batch = recursive_batch(i915);
> if (IS_ERR(batch)) {
> err = PTR_ERR(batch);
> pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
> - return err;
> + goto out_free;
> }
>
> - for_each_engine(engine, i915, id) {
> - request[id] = i915_request_create(engine->kernel_context);
> - if (IS_ERR(request[id])) {
> - err = PTR_ERR(request[id]);
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + request[idx] = i915_request_create(engine->kernel_context);
> + if (IS_ERR(request[idx])) {
> + err = PTR_ERR(request[idx]);
> pr_err("%s: Request allocation failed with err=%d\n",
> __func__, err);
> goto out_request;
> }
>
> - err = engine->emit_bb_start(request[id],
> + err = engine->emit_bb_start(request[idx],
> batch->node.start,
> batch->node.size,
> 0);
> GEM_BUG_ON(err);
> - request[id]->batch = batch;
> + request[idx]->batch = batch;
>
> i915_vma_lock(batch);
> - err = i915_request_await_object(request[id], batch->obj, 0);
> + err = i915_request_await_object(request[idx], batch->obj, 0);
> if (err == 0)
> - err = i915_vma_move_to_active(batch, request[id], 0);
> + err = i915_vma_move_to_active(batch, request[idx], 0);
> i915_vma_unlock(batch);
> GEM_BUG_ON(err);
>
> - i915_request_get(request[id]);
> - i915_request_add(request[id]);
> + i915_request_get(request[idx]);
> + i915_request_add(request[idx]);
> + idx++;
> }
>
> - for_each_engine(engine, i915, id) {
> - if (i915_request_completed(request[id])) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + if (i915_request_completed(request[idx])) {
> pr_err("%s(%s): request completed too early!\n",
> __func__, engine->name);
> err = -EINVAL;
> goto out_request;
> }
> + idx++;
> }
>
> err = recursive_batch_resolve(batch);
> @@ -866,10 +889,11 @@ static int live_all_engines(void *arg)
> goto out_request;
> }
>
> - for_each_engine(engine, i915, id) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> long timeout;
>
> - timeout = i915_request_wait(request[id], 0,
> + timeout = i915_request_wait(request[idx], 0,
> MAX_SCHEDULE_TIMEOUT);
> if (timeout < 0) {
> err = timeout;
> @@ -878,43 +902,57 @@ static int live_all_engines(void *arg)
> goto out_request;
> }
>
> - GEM_BUG_ON(!i915_request_completed(request[id]));
> - i915_request_put(request[id]);
> - request[id] = NULL;
> + GEM_BUG_ON(!i915_request_completed(request[idx]));
> + i915_request_put(request[idx]);
> + request[idx] = NULL;
> + idx++;
> }
>
> err = igt_live_test_end(&t);
>
> out_request:
> - for_each_engine(engine, i915, id)
> - if (request[id])
> - i915_request_put(request[id]);
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + if (request[idx])
> + i915_request_put(request[idx]);
> + idx++;
> + }
> i915_vma_unpin(batch);
> i915_vma_put(batch);
> +out_free:
> + kfree(request);
> return err;
> }
>
> static int live_sequential_engines(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> - struct i915_request *request[I915_NUM_ENGINES] = {};
> + const unsigned int nengines = num_uabi_engines(i915);
> + struct i915_request **request;
> struct i915_request *prev = NULL;
> struct intel_engine_cs *engine;
> struct igt_live_test t;
> - unsigned int id;
> + unsigned int idx;
> int err;
>
> - /* Check we can submit requests to all engines sequentially, such
> + /*
> + * Check we can submit requests to all engines sequentially, such
> * that each successive request waits for the earlier ones. This
> * tests that we don't execute requests out of order, even though
> * they are running on independent engines.
> */
>
> + request = kmalloc_array(nengines, sizeof(*request),
> + GFP_KERNEL | __GFP_ZERO);
> + if (!request)
> + return -ENOMEM;
> +
> err = igt_live_test_begin(&t, i915, __func__, "");
> if (err)
> - return err;
> + goto out_free;
>
> - for_each_engine(engine, i915, id) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> struct i915_vma *batch;
>
> batch = recursive_batch(i915);
> @@ -922,66 +960,69 @@ static int live_sequential_engines(void *arg)
> err = PTR_ERR(batch);
> pr_err("%s: Unable to create batch for %s, err=%d\n",
> __func__, engine->name, err);
> - return err;
> + goto out_free;
> }
>
> - request[id] = i915_request_create(engine->kernel_context);
> - if (IS_ERR(request[id])) {
> - err = PTR_ERR(request[id]);
> + request[idx] = i915_request_create(engine->kernel_context);
> + if (IS_ERR(request[idx])) {
> + err = PTR_ERR(request[idx]);
> pr_err("%s: Request allocation failed for %s with err=%d\n",
> __func__, engine->name, err);
> goto out_request;
> }
>
> if (prev) {
> - err = i915_request_await_dma_fence(request[id],
> + err = i915_request_await_dma_fence(request[idx],
> &prev->fence);
> if (err) {
> - i915_request_add(request[id]);
> + i915_request_add(request[idx]);
> pr_err("%s: Request await failed for %s with err=%d\n",
> __func__, engine->name, err);
> goto out_request;
> }
> }
>
> - err = engine->emit_bb_start(request[id],
> + err = engine->emit_bb_start(request[idx],
> batch->node.start,
> batch->node.size,
> 0);
> GEM_BUG_ON(err);
> - request[id]->batch = batch;
> + request[idx]->batch = batch;
>
> i915_vma_lock(batch);
> - err = i915_request_await_object(request[id], batch->obj, false);
> + err = i915_request_await_object(request[idx],
> + batch->obj, false);
> if (err == 0)
> - err = i915_vma_move_to_active(batch, request[id], 0);
> + err = i915_vma_move_to_active(batch, request[idx], 0);
> i915_vma_unlock(batch);
> GEM_BUG_ON(err);
>
> - i915_request_get(request[id]);
> - i915_request_add(request[id]);
> + i915_request_get(request[idx]);
> + i915_request_add(request[idx]);
>
> - prev = request[id];
> + prev = request[idx];
> + idx++;
> }
>
> - for_each_engine(engine, i915, id) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> long timeout;
>
> - if (i915_request_completed(request[id])) {
> + if (i915_request_completed(request[idx])) {
> pr_err("%s(%s): request completed too early!\n",
> __func__, engine->name);
> err = -EINVAL;
> goto out_request;
> }
>
> - err = recursive_batch_resolve(request[id]->batch);
> + err = recursive_batch_resolve(request[idx]->batch);
> if (err) {
> pr_err("%s: failed to resolve batch, err=%d\n",
> __func__, err);
> goto out_request;
> }
>
> - timeout = i915_request_wait(request[id], 0,
> + timeout = i915_request_wait(request[idx], 0,
> MAX_SCHEDULE_TIMEOUT);
> if (timeout < 0) {
> err = timeout;
> @@ -990,30 +1031,35 @@ static int live_sequential_engines(void *arg)
> goto out_request;
> }
>
> - GEM_BUG_ON(!i915_request_completed(request[id]));
> + GEM_BUG_ON(!i915_request_completed(request[idx]));
> + idx++;
> }
>
> err = igt_live_test_end(&t);
>
> out_request:
> - for_each_engine(engine, i915, id) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> u32 *cmd;
>
> - if (!request[id])
> + if (!request[idx])
> break;
>
> - cmd = i915_gem_object_pin_map(request[id]->batch->obj,
> + cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
> I915_MAP_WC);
> if (!IS_ERR(cmd)) {
> *cmd = MI_BATCH_BUFFER_END;
> intel_gt_chipset_flush(engine->gt);
>
> - i915_gem_object_unpin_map(request[id]->batch->obj);
> + i915_gem_object_unpin_map(request[idx]->batch->obj);
> }
>
> - i915_vma_put(request[id]->batch);
> - i915_request_put(request[id]);
> + i915_vma_put(request[idx]->batch);
> + i915_request_put(request[idx]);
> + idx++;
> }
> +out_free:
> + kfree(request);
> return err;
> }
>
> @@ -1079,9 +1125,10 @@ static int live_parallel_engines(void *arg)
> __live_parallel_engineN,
> NULL,
> };
> + const unsigned int nengines = num_uabi_engines(i915);
> struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> int (* const *fn)(void *arg);
> + struct task_struct **tsk;
> int err = 0;
>
> /*
> @@ -1089,42 +1136,49 @@ static int live_parallel_engines(void *arg)
> * tests that we load up the system maximally.
> */
>
> + tsk = kmalloc_array(nengines, sizeof(*tsk), GFP_KERNEL | __GFP_ZERO);
> + if (!tsk)
> + return -ENOMEM;
> +
> for (fn = func; !err && *fn; fn++) {
> - struct task_struct *tsk[I915_NUM_ENGINES] = {};
> struct igt_live_test t;
> + unsigned int idx;
>
> err = igt_live_test_begin(&t, i915, __func__, "");
> if (err)
> break;
>
> - for_each_engine(engine, i915, id) {
> - tsk[id] = kthread_run(*fn, engine,
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + tsk[idx] = kthread_run(*fn, engine,
> "igt/parallel:%s",
> engine->name);
> - if (IS_ERR(tsk[id])) {
> - err = PTR_ERR(tsk[id]);
> + if (IS_ERR(tsk[idx])) {
> + err = PTR_ERR(tsk[idx]);
> break;
> }
> - get_task_struct(tsk[id]);
> + get_task_struct(tsk[idx++]);
> }
>
> - for_each_engine(engine, i915, id) {
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> int status;
>
> - if (IS_ERR_OR_NULL(tsk[id]))
> - continue;
> + if (IS_ERR(tsk[idx]))
> + break;
>
> - status = kthread_stop(tsk[id]);
> + status = kthread_stop(tsk[idx]);
> if (status && !err)
> err = status;
>
> - put_task_struct(tsk[id]);
> + put_task_struct(tsk[idx++]);
> }
>
> if (igt_live_test_end(&t))
> err = -EIO;
> }
>
> + kfree(tsk);
> return err;
> }
>
> @@ -1168,16 +1222,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
> static int live_breadcrumbs_smoketest(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> - struct smoketest t[I915_NUM_ENGINES];
> - unsigned int ncpus = num_online_cpus();
> + const unsigned int nengines = num_uabi_engines(i915);
> + const unsigned int ncpus = num_online_cpus();
> unsigned long num_waits, num_fences;
> struct intel_engine_cs *engine;
> struct task_struct **threads;
> struct igt_live_test live;
> - enum intel_engine_id id;
> intel_wakeref_t wakeref;
> struct drm_file *file;
> - unsigned int n;
> + struct smoketest *smoke;
> + unsigned int n, idx;
> int ret = 0;
>
> /*
> @@ -1196,28 +1250,31 @@ static int live_breadcrumbs_smoketest(void *arg)
> goto out_rpm;
> }
>
> - threads = kcalloc(ncpus * I915_NUM_ENGINES,
> - sizeof(*threads),
> - GFP_KERNEL);
> - if (!threads) {
> + smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
> + if (!smoke) {
> ret = -ENOMEM;
> goto out_file;
> }
>
> - memset(&t[0], 0, sizeof(t[0]));
> - t[0].request_alloc = __live_request_alloc;
> - t[0].ncontexts = 64;
> - t[0].contexts = kmalloc_array(t[0].ncontexts,
> - sizeof(*t[0].contexts),
> - GFP_KERNEL);
> - if (!t[0].contexts) {
> + threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
> + if (!threads) {
> + ret = -ENOMEM;
> + goto out_smoke;
> + }
> +
> + smoke[0].request_alloc = __live_request_alloc;
> + smoke[0].ncontexts = 64;
> + smoke[0].contexts = kmalloc_array(smoke[0].ncontexts,
> + sizeof(*smoke[0].contexts),
> + GFP_KERNEL);
> + if (!smoke[0].contexts) {
> ret = -ENOMEM;
> goto out_threads;
> }
>
> - for (n = 0; n < t[0].ncontexts; n++) {
> - t[0].contexts[n] = live_context(i915, file);
> - if (!t[0].contexts[n]) {
> + for (n = 0; n < smoke[0].ncontexts; n++) {
> + smoke[0].contexts[n] = live_context(i915, file);
> + if (!smoke[0].contexts[n]) {
> ret = -ENOMEM;
> goto out_contexts;
> }
> @@ -1227,42 +1284,46 @@ static int live_breadcrumbs_smoketest(void *arg)
> if (ret)
> goto out_contexts;
>
> - for_each_engine(engine, i915, id) {
> - t[id] = t[0];
> - t[id].engine = engine;
> - t[id].max_batch = max_batches(t[0].contexts[0], engine);
> - if (t[id].max_batch < 0) {
> - ret = t[id].max_batch;
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + smoke[idx] = smoke[0];
> + smoke[idx].engine = engine;
> + smoke[idx].max_batch = max_batches(smoke[0].contexts[0], engine);
> + if (smoke[idx].max_batch < 0) {
> + ret = smoke[idx].max_batch;
> goto out_flush;
> }
> /* One ring interleaved between requests from all cpus */
> - t[id].max_batch /= num_online_cpus() + 1;
> + smoke[idx].max_batch /= num_online_cpus() + 1;
> pr_debug("Limiting batches to %d requests on %s\n",
> - t[id].max_batch, engine->name);
> + smoke[idx].max_batch, engine->name);
>
> for (n = 0; n < ncpus; n++) {
> struct task_struct *tsk;
>
> tsk = kthread_run(__igt_breadcrumbs_smoketest,
> - &t[id], "igt/%d.%d", id, n);
> + &smoke[idx], "igt/%d.%d", idx, n);
> if (IS_ERR(tsk)) {
> ret = PTR_ERR(tsk);
> goto out_flush;
> }
>
> get_task_struct(tsk);
> - threads[id * ncpus + n] = tsk;
> + threads[idx * ncpus + n] = tsk;
> }
> +
> + idx++;
> }
>
> msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
>
> out_flush:
> + idx = 0;
> num_waits = 0;
> num_fences = 0;
> - for_each_engine(engine, i915, id) {
> + for_each_uabi_engine(engine, i915) {
> for (n = 0; n < ncpus; n++) {
> - struct task_struct *tsk = threads[id * ncpus + n];
> + struct task_struct *tsk = threads[idx * ncpus + n];
> int err;
>
> if (!tsk)
> @@ -1275,17 +1336,20 @@ static int live_breadcrumbs_smoketest(void *arg)
> put_task_struct(tsk);
> }
>
> - num_waits += atomic_long_read(&t[id].num_waits);
> - num_fences += atomic_long_read(&t[id].num_fences);
> + num_waits += atomic_long_read(&smoke[idx].num_waits);
> + num_fences += atomic_long_read(&smoke[idx].num_fences);
> + idx++;
> }
> pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
> num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
>
> ret = igt_live_test_end(&live) ?: ret;
> out_contexts:
> - kfree(t[0].contexts);
> + kfree(smoke[0].contexts);
> out_threads:
> kfree(threads);
> +out_smoke:
> + kfree(smoke);
> out_file:
> mock_file_free(i915, file);
> out_rpm:
>
Rest looks fine.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Will we end up with a for_each_uabi_engine_idx iterator? And storing the
num_uabi_engines in somewhere?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list