[Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise context switching in parallel

Mon Sep 30 13:47:26 UTC 2019

On 30/09/2019 12:31, Chris Wilson wrote:
> We currently test context switching on each engine as a basic stress
> test (just verifying that nothing explodes if we execute 2 requests from
> different contexts sequentially). What we have not tested is what
> happens if we try and do so on all available engines simultaneously,
> putting our SW and the HW under the maximal stress.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> Keep struct_mutex the outer lock while it still exists
> ---
>   .../drm/i915/gem/selftests/i915_gem_context.c | 203 ++++++++++++++++++
>   1 file changed, 203 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index dc25bcc3e372..8325c7329dc7 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -156,6 +156,208 @@ static int live_nop_switch(void *arg)
>   	return err;
>   }
>   
> +struct parallel_switch {
> +	struct task_struct *tsk;
> +	struct intel_context *ce[2];
> +};
> +
> +static int __live_parallel_switch1(void *data)
> +{
> +	struct parallel_switch *arg = data;
> +	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +
> +	count = 0;
> +	do {
> +		struct i915_request *rq;
> +		int err;
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		rq = i915_request_create(arg->ce[0]);
> +		if (IS_ERR(rq)) {
> +			mutex_unlock(&i915->drm.struct_mutex);
> +			return PTR_ERR(rq);
> +		}
> +
> +		i915_request_add(rq);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mutex_lock(&i915->drm.struct_mutex);

unlock-lock! :) I guess in anticipation of removing it all.

> +		rq = i915_request_create(arg->ce[1]);
> +		if (IS_ERR(rq)) {
> +			mutex_unlock(&i915->drm.struct_mutex);
> +			return PTR_ERR(rq);
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		err = 0;
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(rq);
> +		if (err)
> +			return err;
> +
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
> +	return 0;
> +}
> +
> +static int __live_parallel_switchN(void *data)
> +{
> +	struct parallel_switch *arg = data;
> +	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +
> +	count = 0;
> +	do {
> +		struct i915_request *rq;
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		rq = i915_request_create(arg->ce[0]);
> +		if (IS_ERR(rq)) {
> +			mutex_unlock(&i915->drm.struct_mutex);
> +			return PTR_ERR(rq);
> +		}
> +
> +		i915_request_add(rq);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		rq = i915_request_create(arg->ce[1]);
> +		if (IS_ERR(rq)) {
> +			mutex_unlock(&i915->drm.struct_mutex);
> +			return PTR_ERR(rq);
> +		}
> +
> +		i915_request_add(rq);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
> +	return 0;
> +}
> +
> +static int live_parallel_switch(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	static int (* const func[])(void *arg) = {
> +		__live_parallel_switch1,
> +		__live_parallel_switchN,
> +		NULL,
> +	};
> +	struct i915_gem_context *ctx[2];
> +	struct parallel_switch *data;
> +	int (* const *fn)(void *arg);
> +	struct drm_file *file;
> +	int err = 0;
> +	int n;
> +
> +	/*
> +	 * Check we can process switches on all engines simultaneously.
> +	 */
> +
> +	if (!DRIVER_CAPS(i915)->has_logical_contexts)
> +		return 0;
> +
> +	file = mock_file(i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	data = kcalloc(I915_NUM_ENGINES, sizeof(*data), GFP_KERNEL);

There is a little bit of mixing up I915_NUM_ENGINES and gem engines 
(which contains the num_engines field) in this function.

I think it would be better to limit to one - so maybe get the count from 
gem engines? It can't change during selftest so don't have to have them 
locked for the whole time.

> +	if (!data)

mock_file_free

> +		return -ENOMEM;
> +
> +	for (n = 0; n < ARRAY_SIZE(ctx); n++) {
> +		struct i915_gem_engines_iter it;
> +		struct intel_context *ce;
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		ctx[n] = live_context(i915, file);
> +		if (IS_ERR(ctx[n])) {
> +			err = PTR_ERR(ctx[n]);
> +			goto out_locked;
> +		}
> +
> +		for_each_gem_engine(ce,
> +				    i915_gem_context_lock_engines(ctx[n]), it) {
> +			err = intel_context_pin(ce);
> +			if (err) {
> +				i915_gem_context_unlock_engines(ctx[n]);
> +				goto out_locked;
> +			}
> +			data[ce->engine->legacy_idx].ce[n] = ce;

IMHO a bit confusing to use legacy_idx - makes it sound like there is 
some significance to the legacy part so why not just use engine->id?

> +		}
> +		i915_gem_context_unlock_engines(ctx[n]);
> +		mutex_unlock(&i915->drm.struct_mutex);
> +	}
> +
> +	for (fn = func; !err && *fn; fn++) {
> +		struct igt_live_test t;
> +		int n;
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		err = igt_live_test_begin(&t, i915, __func__, "");
> +		mutex_unlock(&i915->drm.struct_mutex);
> +		if (err)
> +			break;
> +
> +		for (n = 0; n < I915_NUM_ENGINES; n++) {
> +			if (data[n].ce[0] == NULL || data[n].ce[1] == NULL)
> +				continue;
> +
> +			data[n].tsk = kthread_run(*fn, &data[n],
> +						  "igt/parallel:%s",
> +						  data[n].ce[0]->engine->name);
> +			if (IS_ERR(data[n].tsk)) {
> +				err = PTR_ERR(data[n].tsk);
> +				break;
> +			}
> +			get_task_struct(data[n].tsk);
> +		}
> +
> +		for (n = 0; n < I915_NUM_ENGINES; n++) {
> +			int status;
> +
> +			if (IS_ERR_OR_NULL(data[n].tsk))
> +				continue;
> +
> +			status = kthread_stop(data[n].tsk);
> +			if (status && !err)
> +				err = status;
> +
> +			put_task_struct(data[n].tsk);
> +			data[n].tsk = NULL;
> +		}
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
> +	}
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +out_locked:
> +	for (n = 0; n < I915_NUM_ENGINES; n++) {
> +		if (data[n].ce[0])
> +			intel_context_unpin(data[n].ce[0]);
> +		if (data[n].ce[1])
> +			intel_context_unpin(data[n].ce[1]);
> +	}
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	kfree(data);
> +	mock_file_free(i915, file);
> +	return err;
> +}
> +
>   static unsigned long real_page_count(struct drm_i915_gem_object *obj)
>   {
>   	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
> @@ -1681,6 +1883,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(live_nop_switch),
> +		SUBTEST(live_parallel_switch),
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
>   		SUBTEST(igt_ctx_sseu),
> 

Regards,

Tvrtko