[Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise CS TLB invalidation

Thu Sep 19 13:40:15 UTC 2019

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Check that we are correctly invalidating the TLB at the start of a
> batch after updating the GTT.
>
> v2: Comments and hold the request reference while spinning
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>

> ---
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 308 ++++++++++++++++++
>  1 file changed, 308 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 598c18d10640..aa7be9089e2d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -25,13 +25,16 @@
>  #include <linux/list_sort.h>
>  #include <linux/prime_numbers.h>
>  
> +#include "gem/i915_gem_context.h"
>  #include "gem/selftests/mock_context.h"
> +#include "gt/intel_context.h"
>  
>  #include "i915_random.h"
>  #include "i915_selftest.h"
>  
>  #include "mock_drm.h"
>  #include "mock_gem_device.h"
> +#include "igt_flush_test.h"
>  
>  static void cleanup_freed_objects(struct drm_i915_private *i915)
>  {
> @@ -1705,6 +1708,310 @@ int i915_gem_gtt_mock_selftests(void)
>  	return err;
>  }
>  
> +static int context_sync(struct intel_context *ce)
> +{
> +	struct i915_request *rq;
> +	long timeout;
> +
> +	rq = intel_context_create_request(ce);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +
> +	timeout = i915_request_wait(rq, 0, HZ / 5);
> +	i915_request_put(rq);
> +
> +	return timeout < 0 ? -EIO : 0;
> +}
> +
> +static struct i915_request *
> +submit_batch(struct intel_context *ce, u64 addr)
> +{
> +	struct i915_request *rq;
> +	int err;
> +
> +	rq = intel_context_create_request(ce);
> +	if (IS_ERR(rq))
> +		return rq;
> +
> +	err = 0;
> +	if (rq->engine->emit_init_breadcrumb) /* detect a hang */
> +		err = rq->engine->emit_init_breadcrumb(rq);
> +	if (err == 0)
> +		err = rq->engine->emit_bb_start(rq, addr, 0, 0);
> +
> +	if (err == 0)
> +		i915_request_get(rq);
> +	i915_request_add(rq);
> +
> +	return err ? ERR_PTR(err) : rq;
> +}
> +
> +static u32 *spinner(u32 *batch, int i)
> +{
> +	return batch + i * 64 / sizeof(*batch) + 4;
> +}
> +
> +static void end_spin(u32 *batch, int i)
> +{
> +	*spinner(batch, i) = MI_BATCH_BUFFER_END;
> +	wmb();
> +}
> +
> +static int igt_cs_tlb(void *arg)
> +{
> +	const unsigned int count = PAGE_SIZE / 64;
> +	const unsigned int chunk_size = count * PAGE_SIZE;
> +	struct drm_i915_private *i915 = arg;
> +	struct drm_i915_gem_object *bbe, *act, *out;
> +	struct i915_gem_engines_iter it;
> +	struct i915_address_space *vm;
> +	struct i915_gem_context *ctx;
> +	struct intel_context *ce;
> +	struct drm_file *file;
> +	struct i915_vma *vma;
> +	unsigned int i;
> +	u32 *result;
> +	u32 *batch;
> +	int err = 0;
> +
> +	/*
> +	 * Our mission here is to fool the hardware to execute something
> +	 * from scratch as it has not seen the batch move (due to missing
> +	 * the TLB invalidate).
> +	 */
> +
> +	file = mock_file(i915);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	ctx = live_context(i915, file);
> +	if (IS_ERR(ctx)) {
> +		err = PTR_ERR(ctx);
> +		goto out_unlock;
> +	}
> +
> +	vm = ctx->vm;
> +	if (!vm)
> +		goto out_unlock;
> +
> +	/* Create two pages; dummy we prefill the TLB, and intended */
> +	bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(bbe)) {
> +		err = PTR_ERR(bbe);
> +		goto out_unlock;
> +	}
> +
> +	batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
> +	if (IS_ERR(batch)) {
> +		err = PTR_ERR(batch);
> +		goto out_bbe;
> +	}
> +	memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32));
> +	i915_gem_object_flush_map(bbe);
> +	i915_gem_object_unpin_map(bbe);
> +
> +	act = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(act)) {
> +		err = PTR_ERR(act);
> +		goto out_bbe;
> +	}
> +
> +	/* Track the execution of each request by writing into different slot */
> +	batch = i915_gem_object_pin_map(act, I915_MAP_WC);
> +	if (IS_ERR(batch)) {
> +		err = PTR_ERR(batch);
> +		goto out_act;
> +	}
> +	for (i = 0; i < count; i++) {
> +		u32 *cs = batch + i * 64 / sizeof(*cs);
> +		u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
> +
> +		GEM_BUG_ON(INTEL_GEN(i915) < 6);
> +		cs[0] = MI_STORE_DWORD_IMM_GEN4;
> +		if (INTEL_GEN(i915) >= 8) {
> +			cs[1] = lower_32_bits(addr);
> +			cs[2] = upper_32_bits(addr);
> +			cs[3] = i;
> +			cs[4] = MI_NOOP;
> +			cs[5] = MI_BATCH_BUFFER_START_GEN8;
> +		} else {
> +			cs[1] = 0;
> +			cs[2] = lower_32_bits(addr);
> +			cs[3] = i;
> +			cs[4] = MI_NOOP;
> +			cs[5] = MI_BATCH_BUFFER_START;
> +		}
> +	}
> +
> +	out = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +	if (IS_ERR(out)) {
> +		err = PTR_ERR(out);
> +		goto out_batch;
> +	}
> +	i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
> +
> +	vma = i915_vma_instance(out, vm, NULL);
> +	if (IS_ERR(vma)) {
> +		err = PTR_ERR(vma);
> +		goto out_batch;
> +	}
> +
> +	err = i915_vma_pin(vma, 0, 0,
> +			   PIN_USER |
> +			   PIN_OFFSET_FIXED |
> +			   (vm->total - PAGE_SIZE));
> +	if (err)
> +		goto out_out;
> +	GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
> +
> +	result = i915_gem_object_pin_map(out, I915_MAP_WB);
> +	if (IS_ERR(result)) {
> +		err = PTR_ERR(result);
> +		goto out_out;
> +	}
> +
> +	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> +		IGT_TIMEOUT(end_time);
> +		unsigned long pass = 0;
> +
> +		if (!intel_engine_can_store_dword(ce->engine))
> +			continue;
> +
> +		while (!__igt_timeout(end_time, NULL)) {
> +			struct i915_request *rq;
> +			u64 offset;
> +
> +			offset = random_offset(0, vm->total - PAGE_SIZE,
> +					       chunk_size, PAGE_SIZE);
> +
> +			err = vm->allocate_va_range(vm, offset, chunk_size);
> +			if (err)
> +				goto end;
> +
> +			memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
> +
> +			vma = i915_vma_instance(bbe, vm, NULL);
> +			if (IS_ERR(vma)) {
> +				err = PTR_ERR(vma);
> +				goto end;
> +			}
> +
> +			err = vma->ops->set_pages(vma);
> +			if (err)
> +				goto end;
> +
> +			/* Prime the TLB with the dummy pages */
> +			for (i = 0; i < count; i++) {
> +				vma->node.start = offset + i * PAGE_SIZE;
> +				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
> +
> +				rq = submit_batch(ce, vma->node.start);
> +				if (IS_ERR(rq)) {
> +					err = PTR_ERR(rq);
> +					goto end;
> +				}
> +				i915_request_put(rq);
> +			}
> +
> +			vma->ops->clear_pages(vma);
> +
> +			err = context_sync(ce);
> +			if (err) {
> +				pr_err("%s: dummy setup timed out\n",
> +				       ce->engine->name);
> +				goto end;
> +			}
> +
> +			vma = i915_vma_instance(act, vm, NULL);
> +			if (IS_ERR(vma)) {
> +				err = PTR_ERR(vma);
> +				goto end;
> +			}
> +
> +			err = vma->ops->set_pages(vma);
> +			if (err)
> +				goto end;
> +
> +			/* Replace the TLB with target batches */
> +			for (i = 0; i < count; i++) {
> +				struct i915_request *rq;
> +				u32 *cs = batch + i * 64 / sizeof(*cs);
> +				u64 addr;
> +
> +				vma->node.start = offset + i * PAGE_SIZE;
> +				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
> +
> +				addr = vma->node.start + i * 64;
> +				cs[4] = MI_NOOP;
> +				cs[6] = lower_32_bits(addr);
> +				cs[7] = upper_32_bits(addr);
> +				wmb();
> +
> +				rq = submit_batch(ce, addr);
> +				if (IS_ERR(rq)) {
> +					err = PTR_ERR(rq);
> +					goto end;
> +				}
> +
> +				/* Wait until the context chain has started */
> +				if (i == 0) {
> +					while (READ_ONCE(result[i]) &&
> +					       !i915_request_completed(rq))
> +						cond_resched();
> +				} else {
> +					end_spin(batch, i - 1);
> +				}
> +
> +				i915_request_put(rq);
> +			}
> +			end_spin(batch, count - 1);
> +
> +			vma->ops->clear_pages(vma);
> +
> +			err = context_sync(ce);
> +			if (err) {
> +				pr_err("%s: writes timed out\n",
> +				       ce->engine->name);
> +				goto end;
> +			}
> +
> +			for (i = 0; i < count; i++) {
> +				if (result[i] != i) {
> +					pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n",
> +					       ce->engine->name, pass,
> +					       offset, i, result[i], i);
> +					err = -EINVAL;
> +					goto end;
> +				}
> +			}
> +
> +			vm->clear_range(vm, offset, chunk_size);
> +			pass++;
> +		}
> +	}
> +end:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +	i915_gem_context_unlock_engines(ctx);
> +	i915_gem_object_unpin_map(out);
> +out_out:
> +	i915_gem_object_put(out);
> +out_batch:
> +	i915_gem_object_unpin_map(act);
> +out_act:
> +	i915_gem_object_put(act);
> +out_bbe:
> +	i915_gem_object_put(bbe);
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	mock_file_free(i915, file);
> +	return err;
> +}
> +
>  int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
>  {
>  	static const struct i915_subtest tests[] = {
> @@ -1722,6 +2029,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
>  		SUBTEST(igt_ggtt_pot),
>  		SUBTEST(igt_ggtt_fill),
>  		SUBTEST(igt_ggtt_page),
> +		SUBTEST(igt_cs_tlb),
>  	};
>  
>  	GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
> -- 
> 2.23.0