[Intel-gfx] [PATCH v2 3/3] drm/i915/selftests: Exercise adding requests to a full GGTT

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Oct 11 12:33:28 UTC 2017


On 10/10/2017 22:38, Chris Wilson wrote:
> A bug recently encountered involved the issue where are we were
> submitting requests to different ppGTT, each would pin a segment of the
> GGTT for its logical context and ring. However, this is invisible to
> eviction as we do not tie the context/ring VMA to a request and so do
> not automatically wait upon it them (instead they are marked as pinned,
> prevent eviction entirely). Instead the eviction code must flush those

preventing?

> contexts by switching to the kernel context. This selftest tries to
> fill the GGTT with contexts to exercise a path where the
> switch-to-kernel-context failed to make forward progress and we fail
> with ENOSPC.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/selftests/i915_gem_evict.c    | 121 +++++++++++++++++++++
>   .../gpu/drm/i915/selftests/i915_live_selftests.h   |   1 +
>   drivers/gpu/drm/i915/selftests/mock_context.c      |   6 +-
>   3 files changed, 123 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 5ea373221f49..53df8926be7f 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -24,6 +24,8 @@
>   
>   #include "../i915_selftest.h"
>   
> +#include "mock_context.h"
> +#include "mock_drm.h"
>   #include "mock_gem_device.h"
>   
>   static int populate_ggtt(struct drm_i915_private *i915)
> @@ -325,6 +327,116 @@ static int igt_evict_vm(void *arg)
>   	return err;
>   }
>   
> +static int igt_evict_contexts(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct reserved {
> +		struct drm_mm_node node;
> +		struct reserved *next;
> +	} *reserved = NULL;
> +	unsigned long count;
> +	int err = 0;
> +
> +	/* Make the GGTT appear small (but leave just enough to function) */

How does it do that?

> +	count = 0;
> +	mutex_lock(&i915->drm.struct_mutex);
> +	do {
> +		struct reserved *r;
> +
> +		r = kcalloc(1, sizeof(*r), GFP_KERNEL);
> +		if (!r) {
> +			err = -ENOMEM;
> +			goto out_locked;
> +		}
> +
> +		if (i915_gem_gtt_insert(&i915->ggtt.base, &r->node,
> +					1ul << 20, 0, I915_COLOR_UNEVICTABLE,
> +					16ul << 20, i915->ggtt.base.total,
> +					PIN_NOEVICT)) {
> +			kfree(r);
> +			break;
> +		}
> +
> +		r->next = reserved;
> +		reserved = r;
> +
> +		count++;
> +	} while (1);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	pr_info("Filled GGTT with %lu 1MiB nodes\n", count);

Oh right, this helps. :)

> +
> +	/* Overfill the GGTT with context objects and so try to evict one. */

Can GGTT be divisible by 1MiB in which case the above filling would fill 
everything and make any eviction impossible? Do you need an assert that 
there is a little bit of space left for below to work?

> +	for_each_engine(engine, i915, id) {
> +		struct i915_sw_fence *fence;
> +		struct drm_file *file;
> +		unsigned long count = 0;

No shadows variable warnings here?

> +		unsigned long timeout;
> +
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		timeout = round_jiffies_up(jiffies + HZ/2);
> +		fence = i915_sw_fence_create_timer(timeout, GFP_KERNEL);
> +		if (IS_ERR(fence))
> +			return PTR_ERR(fence);
> +
> +		count = 0;

Set to zero already above.

> +		mutex_lock(&i915->drm.struct_mutex);
> +		do {
> +			struct drm_i915_gem_request *rq;
> +			struct i915_gem_context *ctx;
> +
> +			ctx = live_context(i915, file);
> +			if (!ctx)
> +				break;
> +
> +			rq = i915_gem_request_alloc(engine, ctx);
> +			if (IS_ERR(rq)) {
> +				if (PTR_ERR(rq) != -ENOMEM) {
> +					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
> +					       ctx->hw_id, engine->name,
> +					       (int)PTR_ERR(rq));
> +					err = PTR_ERR(rq);
> +				}
> +				break;

Comment on why it is OK to stop the test on ENOMEM would be good.

> +			}
> +
> +			i915_sw_fence_await_sw_fence_gfp(&rq->submit, fence,
> +							 GFP_KERNEL);
> +
> +			i915_add_request(rq);
> +			count++;
> +		} while(!i915_sw_fence_done(fence));
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		i915_sw_fence_timer_flush(fence);
> +		pr_info("Submitted %lu contexts/requests on %s\n",
> +			count, engine->name);
> +
> +		mock_file_free(i915, file);
> +
> +		if (err)
> +			break;
> +	}
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +out_locked:
> +	while (reserved) {
> +		struct reserved *next = reserved->next;
> +
> +		drm_mm_remove_node(&reserved->node);
> +		kfree(reserved);
> +
> +		reserved = next;
> +	}
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	return err;
> +}
> +
>   int i915_gem_evict_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -348,3 +460,12 @@ int i915_gem_evict_mock_selftests(void)
>   	drm_dev_unref(&i915->drm);
>   	return err;
>   }
> +
> +int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_evict_contexts),
> +	};
> +
> +	return i915_subtests(tests, i915);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> index 64acd7eccc5c..54a73534b37e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
> @@ -15,6 +15,7 @@ selftest(objects, i915_gem_object_live_selftests)
>   selftest(dmabuf, i915_gem_dmabuf_live_selftests)
>   selftest(coherency, i915_gem_coherency_live_selftests)
>   selftest(gtt, i915_gem_gtt_live_selftests)
> +selftest(evict, i915_gem_evict_live_selftests)
>   selftest(hugepages, i915_gem_huge_page_live_selftests)
>   selftest(contexts, i915_gem_context_live_selftests)
>   selftest(hangcheck, intel_hangcheck_live_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 098ce643ad07..bbf80d42e793 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -73,11 +73,7 @@ mock_context(struct drm_i915_private *i915,
>   
>   void mock_context_close(struct i915_gem_context *ctx)
>   {
> -	i915_gem_context_set_closed(ctx);
> -
> -	i915_ppgtt_close(&ctx->ppgtt->base);
> -
> -	i915_gem_context_put(ctx);
> +	context_close(ctx);
>   }
>   
>   void mock_init_contexts(struct drm_i915_private *i915)
> 

Regards,

Tvrtko


More information about the Intel-gfx mailing list