[Intel-gfx] [PATCH] drm/i915/selftests: Teach requests to use all available engines

Thu Oct 17 16:54:55 UTC 2019

On 16/10/2019 13:52, Chris Wilson wrote:
> The request selftests straddle the boundary between checking the driver
> and the hardware. They are subject to the quirks of the underlying HW,
> but operate on top of the backend abstractions. The tests focus on the
> scheduler elements and so should check for interactions of the scheduler
> across all exposed engines.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/selftests/i915_request.c | 276 +++++++++++-------
>   1 file changed, 170 insertions(+), 106 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 0897a7b04944..b95a0e8431ab 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -37,6 +37,18 @@
>   #include "mock_drm.h"
>   #include "mock_gem_device.h"
>   
> +static unsigned int num_uabi_engines(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	unsigned int count;
> +
> +	count = 0;
> +	for_each_uabi_engine(engine, i915)
> +		count++;
> +
> +	return count;
> +}
> +
>   static int igt_add_request(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -511,15 +523,15 @@ static int live_nop_request(void *arg)
>   	struct drm_i915_private *i915 = arg;
>   	struct intel_engine_cs *engine;
>   	struct igt_live_test t;
> -	unsigned int id;
>   	int err = -ENODEV;
>   
> -	/* Submit various sized batches of empty requests, to each engine
> +	/*
> +	 * Submit various sized batches of empty requests, to each engine
>   	 * (individually), and wait for the batch to complete. We can check
>   	 * the overhead of submitting requests to the hardware.
>   	 */
>   
> -	for_each_engine(engine, i915, id) {
> +	for_each_uabi_engine(engine, i915) {
>   		unsigned long n, prime;
>   		IGT_TIMEOUT(end_time);
>   		ktime_t times[2] = {};
> @@ -539,7 +551,8 @@ static int live_nop_request(void *arg)
>   				if (IS_ERR(request))
>   					return PTR_ERR(request);
>   
> -				/* This space is left intentionally blank.
> +				/*
> +				 * This space is left intentionally blank.
>   				 *
>   				 * We do not actually want to perform any
>   				 * action with this request, we just want
> @@ -657,10 +670,10 @@ static int live_empty_request(void *arg)
>   	struct intel_engine_cs *engine;
>   	struct igt_live_test t;
>   	struct i915_vma *batch;
> -	unsigned int id;
>   	int err = 0;
>   
> -	/* Submit various sized batches of empty requests, to each engine
> +	/*
> +	 * Submit various sized batches of empty requests, to each engine
>   	 * (individually), and wait for the batch to complete. We can check
>   	 * the overhead of submitting requests to the hardware.
>   	 */
> @@ -669,7 +682,7 @@ static int live_empty_request(void *arg)
>   	if (IS_ERR(batch))
>   		return PTR_ERR(batch);
>   
> -	for_each_engine(engine, i915, id) {
> +	for_each_uabi_engine(engine, i915) {
>   		IGT_TIMEOUT(end_time);
>   		struct i915_request *request;
>   		unsigned long n, prime;
> @@ -801,63 +814,73 @@ static int recursive_batch_resolve(struct i915_vma *batch)
>   static int live_all_engines(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> +	const unsigned int nengines = num_uabi_engines(i915);
>   	struct intel_engine_cs *engine;
> -	struct i915_request *request[I915_NUM_ENGINES];
> +	struct i915_request **request;
>   	struct igt_live_test t;
>   	struct i915_vma *batch;
> -	unsigned int id;
> +	unsigned int idx;
>   	int err;
>   
> -	/* Check we can submit requests to all engines simultaneously. We
> +	/*
> +	 * Check we can submit requests to all engines simultaneously. We
>   	 * send a recursive batch to each engine - checking that we don't
>   	 * block doing so, and that they don't complete too soon.
>   	 */
>   
> +	request = kmalloc_array(nengines, sizeof(*request), GFP_KERNEL);

__GFP_ZERO as live_sequential for error unwind to work, I think.

> +	if (!request)
> +		return -ENOMEM;
> +
>   	err = igt_live_test_begin(&t, i915, __func__, "");
>   	if (err)
> -		return err;
> +		goto out_free;
>   
>   	batch = recursive_batch(i915);
>   	if (IS_ERR(batch)) {
>   		err = PTR_ERR(batch);
>   		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
> -		return err;
> +		goto out_free;
>   	}
>   
> -	for_each_engine(engine, i915, id) {
> -		request[id] = i915_request_create(engine->kernel_context);
> -		if (IS_ERR(request[id])) {
> -			err = PTR_ERR(request[id]);
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		request[idx] = i915_request_create(engine->kernel_context);
> +		if (IS_ERR(request[idx])) {
> +			err = PTR_ERR(request[idx]);
>   			pr_err("%s: Request allocation failed with err=%d\n",
>   			       __func__, err);
>   			goto out_request;
>   		}
>   
> -		err = engine->emit_bb_start(request[id],
> +		err = engine->emit_bb_start(request[idx],
>   					    batch->node.start,
>   					    batch->node.size,
>   					    0);
>   		GEM_BUG_ON(err);
> -		request[id]->batch = batch;
> +		request[idx]->batch = batch;
>   
>   		i915_vma_lock(batch);
> -		err = i915_request_await_object(request[id], batch->obj, 0);
> +		err = i915_request_await_object(request[idx], batch->obj, 0);
>   		if (err == 0)
> -			err = i915_vma_move_to_active(batch, request[id], 0);
> +			err = i915_vma_move_to_active(batch, request[idx], 0);
>   		i915_vma_unlock(batch);
>   		GEM_BUG_ON(err);
>   
> -		i915_request_get(request[id]);
> -		i915_request_add(request[id]);
> +		i915_request_get(request[idx]);
> +		i915_request_add(request[idx]);
> +		idx++;
>   	}
>   
> -	for_each_engine(engine, i915, id) {
> -		if (i915_request_completed(request[id])) {
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		if (i915_request_completed(request[idx])) {
>   			pr_err("%s(%s): request completed too early!\n",
>   			       __func__, engine->name);
>   			err = -EINVAL;
>   			goto out_request;
>   		}
> +		idx++;
>   	}
>   
>   	err = recursive_batch_resolve(batch);
> @@ -866,10 +889,11 @@ static int live_all_engines(void *arg)
>   		goto out_request;
>   	}
>   
> -	for_each_engine(engine, i915, id) {
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
>   		long timeout;
>   
> -		timeout = i915_request_wait(request[id], 0,
> +		timeout = i915_request_wait(request[idx], 0,
>   					    MAX_SCHEDULE_TIMEOUT);
>   		if (timeout < 0) {
>   			err = timeout;
> @@ -878,43 +902,57 @@ static int live_all_engines(void *arg)
>   			goto out_request;
>   		}
>   
> -		GEM_BUG_ON(!i915_request_completed(request[id]));
> -		i915_request_put(request[id]);
> -		request[id] = NULL;
> +		GEM_BUG_ON(!i915_request_completed(request[idx]));
> +		i915_request_put(request[idx]);
> +		request[idx] = NULL;
> +		idx++;
>   	}
>   
>   	err = igt_live_test_end(&t);
>   
>   out_request:
> -	for_each_engine(engine, i915, id)
> -		if (request[id])
> -			i915_request_put(request[id]);
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		if (request[idx])
> +			i915_request_put(request[idx]);
> +		idx++;
> +	}
>   	i915_vma_unpin(batch);
>   	i915_vma_put(batch);
> +out_free:
> +	kfree(request);
>   	return err;
>   }
>   
>   static int live_sequential_engines(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> -	struct i915_request *request[I915_NUM_ENGINES] = {};
> +	const unsigned int nengines = num_uabi_engines(i915);
> +	struct i915_request **request;
>   	struct i915_request *prev = NULL;
>   	struct intel_engine_cs *engine;
>   	struct igt_live_test t;
> -	unsigned int id;
> +	unsigned int idx;
>   	int err;
>   
> -	/* Check we can submit requests to all engines sequentially, such
> +	/*
> +	 * Check we can submit requests to all engines sequentially, such
>   	 * that each successive request waits for the earlier ones. This
>   	 * tests that we don't execute requests out of order, even though
>   	 * they are running on independent engines.
>   	 */
>   
> +	request = kmalloc_array(nengines, sizeof(*request),
> +				GFP_KERNEL | __GFP_ZERO);
> +	if (!request)
> +		return -ENOMEM;
> +
>   	err = igt_live_test_begin(&t, i915, __func__, "");
>   	if (err)
> -		return err;
> +		goto out_free;
>   
> -	for_each_engine(engine, i915, id) {
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
>   		struct i915_vma *batch;
>   
>   		batch = recursive_batch(i915);
> @@ -922,66 +960,69 @@ static int live_sequential_engines(void *arg)
>   			err = PTR_ERR(batch);
>   			pr_err("%s: Unable to create batch for %s, err=%d\n",
>   			       __func__, engine->name, err);
> -			return err;
> +			goto out_free;
>   		}
>   
> -		request[id] = i915_request_create(engine->kernel_context);
> -		if (IS_ERR(request[id])) {
> -			err = PTR_ERR(request[id]);
> +		request[idx] = i915_request_create(engine->kernel_context);
> +		if (IS_ERR(request[idx])) {
> +			err = PTR_ERR(request[idx]);
>   			pr_err("%s: Request allocation failed for %s with err=%d\n",
>   			       __func__, engine->name, err);
>   			goto out_request;
>   		}
>   
>   		if (prev) {
> -			err = i915_request_await_dma_fence(request[id],
> +			err = i915_request_await_dma_fence(request[idx],
>   							   &prev->fence);
>   			if (err) {
> -				i915_request_add(request[id]);
> +				i915_request_add(request[idx]);
>   				pr_err("%s: Request await failed for %s with err=%d\n",
>   				       __func__, engine->name, err);
>   				goto out_request;
>   			}
>   		}
>   
> -		err = engine->emit_bb_start(request[id],
> +		err = engine->emit_bb_start(request[idx],
>   					    batch->node.start,
>   					    batch->node.size,
>   					    0);
>   		GEM_BUG_ON(err);
> -		request[id]->batch = batch;
> +		request[idx]->batch = batch;
>   
>   		i915_vma_lock(batch);
> -		err = i915_request_await_object(request[id], batch->obj, false);
> +		err = i915_request_await_object(request[idx],
> +						batch->obj, false);
>   		if (err == 0)
> -			err = i915_vma_move_to_active(batch, request[id], 0);
> +			err = i915_vma_move_to_active(batch, request[idx], 0);
>   		i915_vma_unlock(batch);
>   		GEM_BUG_ON(err);
>   
> -		i915_request_get(request[id]);
> -		i915_request_add(request[id]);
> +		i915_request_get(request[idx]);
> +		i915_request_add(request[idx]);
>   
> -		prev = request[id];
> +		prev = request[idx];
> +		idx++;
>   	}
>   
> -	for_each_engine(engine, i915, id) {
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
>   		long timeout;
>   
> -		if (i915_request_completed(request[id])) {
> +		if (i915_request_completed(request[idx])) {
>   			pr_err("%s(%s): request completed too early!\n",
>   			       __func__, engine->name);
>   			err = -EINVAL;
>   			goto out_request;
>   		}
>   
> -		err = recursive_batch_resolve(request[id]->batch);
> +		err = recursive_batch_resolve(request[idx]->batch);
>   		if (err) {
>   			pr_err("%s: failed to resolve batch, err=%d\n",
>   			       __func__, err);
>   			goto out_request;
>   		}
>   
> -		timeout = i915_request_wait(request[id], 0,
> +		timeout = i915_request_wait(request[idx], 0,
>   					    MAX_SCHEDULE_TIMEOUT);
>   		if (timeout < 0) {
>   			err = timeout;
> @@ -990,30 +1031,35 @@ static int live_sequential_engines(void *arg)
>   			goto out_request;
>   		}
>   
> -		GEM_BUG_ON(!i915_request_completed(request[id]));
> +		GEM_BUG_ON(!i915_request_completed(request[idx]));
> +		idx++;
>   	}
>   
>   	err = igt_live_test_end(&t);
>   
>   out_request:
> -	for_each_engine(engine, i915, id) {
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
>   		u32 *cmd;
>   
> -		if (!request[id])
> +		if (!request[idx])
>   			break;
>   
> -		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
> +		cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
>   					      I915_MAP_WC);
>   		if (!IS_ERR(cmd)) {
>   			*cmd = MI_BATCH_BUFFER_END;
>   			intel_gt_chipset_flush(engine->gt);
>   
> -			i915_gem_object_unpin_map(request[id]->batch->obj);
> +			i915_gem_object_unpin_map(request[idx]->batch->obj);
>   		}
>   
> -		i915_vma_put(request[id]->batch);
> -		i915_request_put(request[id]);
> +		i915_vma_put(request[idx]->batch);
> +		i915_request_put(request[idx]);
> +		idx++;
>   	}
> +out_free:
> +	kfree(request);
>   	return err;
>   }
>   
> @@ -1079,9 +1125,10 @@ static int live_parallel_engines(void *arg)
>   		__live_parallel_engineN,
>   		NULL,
>   	};
> +	const unsigned int nengines = num_uabi_engines(i915);
>   	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
>   	int (* const *fn)(void *arg);
> +	struct task_struct **tsk;
>   	int err = 0;
>   
>   	/*
> @@ -1089,42 +1136,49 @@ static int live_parallel_engines(void *arg)
>   	 * tests that we load up the system maximally.
>   	 */
>   
> +	tsk = kmalloc_array(nengines, sizeof(*tsk), GFP_KERNEL | __GFP_ZERO);
> +	if (!tsk)
> +		return -ENOMEM;
> +
>   	for (fn = func; !err && *fn; fn++) {
> -		struct task_struct *tsk[I915_NUM_ENGINES] = {};
>   		struct igt_live_test t;
> +		unsigned int idx;
>   
>   		err = igt_live_test_begin(&t, i915, __func__, "");
>   		if (err)
>   			break;
>   
> -		for_each_engine(engine, i915, id) {
> -			tsk[id] = kthread_run(*fn, engine,
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			tsk[idx] = kthread_run(*fn, engine,
>   					      "igt/parallel:%s",
>   					      engine->name);
> -			if (IS_ERR(tsk[id])) {
> -				err = PTR_ERR(tsk[id]);
> +			if (IS_ERR(tsk[idx])) {
> +				err = PTR_ERR(tsk[idx]);
>   				break;
>   			}
> -			get_task_struct(tsk[id]);
> +			get_task_struct(tsk[idx++]);
>   		}
>   
> -		for_each_engine(engine, i915, id) {
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
>   			int status;
>   
> -			if (IS_ERR_OR_NULL(tsk[id]))
> -				continue;
> +			if (IS_ERR(tsk[idx]))
> +				break;
>   
> -			status = kthread_stop(tsk[id]);
> +			status = kthread_stop(tsk[idx]);
>   			if (status && !err)
>   				err = status;
>   
> -			put_task_struct(tsk[id]);
> +			put_task_struct(tsk[idx++]);
>   		}
>   
>   		if (igt_live_test_end(&t))
>   			err = -EIO;
>   	}
>   
> +	kfree(tsk);
>   	return err;
>   }
>   
> @@ -1168,16 +1222,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   static int live_breadcrumbs_smoketest(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> -	struct smoketest t[I915_NUM_ENGINES];
> -	unsigned int ncpus = num_online_cpus();
> +	const unsigned int nengines = num_uabi_engines(i915);
> +	const unsigned int ncpus = num_online_cpus();
>   	unsigned long num_waits, num_fences;
>   	struct intel_engine_cs *engine;
>   	struct task_struct **threads;
>   	struct igt_live_test live;
> -	enum intel_engine_id id;
>   	intel_wakeref_t wakeref;
>   	struct drm_file *file;
> -	unsigned int n;
> +	struct smoketest *smoke;
> +	unsigned int n, idx;
>   	int ret = 0;
>   
>   	/*
> @@ -1196,28 +1250,31 @@ static int live_breadcrumbs_smoketest(void *arg)
>   		goto out_rpm;
>   	}
>   
> -	threads = kcalloc(ncpus * I915_NUM_ENGINES,
> -			  sizeof(*threads),
> -			  GFP_KERNEL);
> -	if (!threads) {
> +	smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
> +	if (!smoke) {
>   		ret = -ENOMEM;
>   		goto out_file;
>   	}
>   
> -	memset(&t[0], 0, sizeof(t[0]));
> -	t[0].request_alloc = __live_request_alloc;
> -	t[0].ncontexts = 64;
> -	t[0].contexts = kmalloc_array(t[0].ncontexts,
> -				      sizeof(*t[0].contexts),
> -				      GFP_KERNEL);
> -	if (!t[0].contexts) {
> +	threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
> +	if (!threads) {
> +		ret = -ENOMEM;
> +		goto out_smoke;
> +	}
> +
> +	smoke[0].request_alloc = __live_request_alloc;
> +	smoke[0].ncontexts = 64;
> +	smoke[0].contexts = kmalloc_array(smoke[0].ncontexts,
> +					  sizeof(*smoke[0].contexts),
> +					  GFP_KERNEL);
> +	if (!smoke[0].contexts) {
>   		ret = -ENOMEM;
>   		goto out_threads;
>   	}
>   
> -	for (n = 0; n < t[0].ncontexts; n++) {
> -		t[0].contexts[n] = live_context(i915, file);
> -		if (!t[0].contexts[n]) {
> +	for (n = 0; n < smoke[0].ncontexts; n++) {
> +		smoke[0].contexts[n] = live_context(i915, file);
> +		if (!smoke[0].contexts[n]) {
>   			ret = -ENOMEM;
>   			goto out_contexts;
>   		}
> @@ -1227,42 +1284,46 @@ static int live_breadcrumbs_smoketest(void *arg)
>   	if (ret)
>   		goto out_contexts;
>   
> -	for_each_engine(engine, i915, id) {
> -		t[id] = t[0];
> -		t[id].engine = engine;
> -		t[id].max_batch = max_batches(t[0].contexts[0], engine);
> -		if (t[id].max_batch < 0) {
> -			ret = t[id].max_batch;
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		smoke[idx] = smoke[0];
> +		smoke[idx].engine = engine;
> +		smoke[idx].max_batch = max_batches(smoke[0].contexts[0], engine);
> +		if (smoke[idx].max_batch < 0) {
> +			ret = smoke[idx].max_batch;
>   			goto out_flush;
>   		}
>   		/* One ring interleaved between requests from all cpus */
> -		t[id].max_batch /= num_online_cpus() + 1;
> +		smoke[idx].max_batch /= num_online_cpus() + 1;
>   		pr_debug("Limiting batches to %d requests on %s\n",
> -			 t[id].max_batch, engine->name);
> +			 smoke[idx].max_batch, engine->name);
>   
>   		for (n = 0; n < ncpus; n++) {
>   			struct task_struct *tsk;
>   
>   			tsk = kthread_run(__igt_breadcrumbs_smoketest,
> -					  &t[id], "igt/%d.%d", id, n);
> +					  &smoke[idx], "igt/%d.%d", idx, n);
>   			if (IS_ERR(tsk)) {
>   				ret = PTR_ERR(tsk);
>   				goto out_flush;
>   			}
>   
>   			get_task_struct(tsk);
> -			threads[id * ncpus + n] = tsk;
> +			threads[idx * ncpus + n] = tsk;
>   		}
> +
> +		idx++;
>   	}
>   
>   	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
>   
>   out_flush:
> +	idx = 0;
>   	num_waits = 0;
>   	num_fences = 0;
> -	for_each_engine(engine, i915, id) {
> +	for_each_uabi_engine(engine, i915) {
>   		for (n = 0; n < ncpus; n++) {
> -			struct task_struct *tsk = threads[id * ncpus + n];
> +			struct task_struct *tsk = threads[idx * ncpus + n];
>   			int err;
>   
>   			if (!tsk)
> @@ -1275,17 +1336,20 @@ static int live_breadcrumbs_smoketest(void *arg)
>   			put_task_struct(tsk);
>   		}
>   
> -		num_waits += atomic_long_read(&t[id].num_waits);
> -		num_fences += atomic_long_read(&t[id].num_fences);
> +		num_waits += atomic_long_read(&smoke[idx].num_waits);
> +		num_fences += atomic_long_read(&smoke[idx].num_fences);
> +		idx++;
>   	}
>   	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
>   		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
>   
>   	ret = igt_live_test_end(&live) ?: ret;
>   out_contexts:
> -	kfree(t[0].contexts);
> +	kfree(smoke[0].contexts);
>   out_threads:
>   	kfree(threads);
> +out_smoke:
> +	kfree(smoke);
>   out_file:
>   	mock_file_free(i915, file);
>   out_rpm:
> 

Rest looks fine.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Will we end up with a for_each_uabi_engine_idx iterator? And storing the 
num_uabi_engines in somewhere?

Regards,

Tvrtko