[Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation
Mika Kuoppala
mika.kuoppala at linux.intel.com
Thu Sep 19 12:57:45 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Check that we are correctly invalidating the TLB at the start of a
> batch after updating the GTT.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++++++++++++++++++
> 1 file changed, 227 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 598c18d10640..f8709b332bd7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -25,13 +25,16 @@
> #include <linux/list_sort.h>
> #include <linux/prime_numbers.h>
>
> +#include "gem/i915_gem_context.h"
> #include "gem/selftests/mock_context.h"
> +#include "gt/intel_context.h"
>
> #include "i915_random.h"
> #include "i915_selftest.h"
>
> #include "mock_drm.h"
> #include "mock_gem_device.h"
> +#include "igt_flush_test.h"
>
> static void cleanup_freed_objects(struct drm_i915_private *i915)
> {
> @@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
> return err;
> }
>
> +static int context_sync(struct intel_context *ce)
> +{
> + struct i915_request *rq;
> + long timeout;
> +
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_get(rq);
> + i915_request_add(rq);
> +
> + timeout = i915_request_wait(rq, 0, HZ / 5);
> + i915_request_put(rq);
> +
> + return timeout < 0 ? -EIO : 0;
> +}
> +
> +static int submit_batch(struct intel_context *ce, u64 addr)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + err = 0;
> + if (rq->engine->emit_init_breadcrumb) /* detect a hang */
for seqno write?
> + err = rq->engine->emit_init_breadcrumb(rq);
> + if (err == 0)
> + err = rq->engine->emit_bb_start(rq, addr, 0, 0);
> +
In context_sync part we grabbed a reference. In here we
don't. I don't see how we can get away without it even
if we don't wait in here.
> + i915_request_add(rq);
> +
> + return err;
> +}
> +
> +static int igt_cs_tlb(void *arg)
> +{
> + const unsigned int count = PAGE_SIZE / 64;
> + const unsigned int chunk_size = count * PAGE_SIZE;
> + struct drm_i915_private *i915 = arg;
> + struct drm_i915_gem_object *bbe, *out;
> + struct i915_gem_engines_iter it;
> + struct i915_address_space *vm;
> + struct i915_gem_context *ctx;
> + struct intel_context *ce;
> + struct drm_file *file;
> + struct i915_vma *vma;
> + unsigned int i;
> + u32 *result;
> + u32 *batch;
> + int err = 0;
> +
> + file = mock_file(i915);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + ctx = live_context(i915, file);
> + if (IS_ERR(ctx)) {
> + err = PTR_ERR(ctx);
> + goto out_unlock;
> + }
> +
> + vm = ctx->vm;
> + if (!vm)
> + goto out_unlock;
> +
> + bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(bbe)) {
> + err = PTR_ERR(bbe);
> + goto out_unlock;
> + }
> +
> + batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
> + if (IS_ERR(batch)) {
> + err = PTR_ERR(batch);
> + goto out_bbe;
> + }
> + for (i = 0; i < count; i++) {
> + u32 *cs = batch + i * 64 / sizeof(*cs);
> + u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
> +
> + GEM_BUG_ON(INTEL_GEN(i915) < 6);
> + cs[0] = MI_STORE_DWORD_IMM_GEN4;
> + if (INTEL_GEN(i915) >= 8) {
> + cs[1] = lower_32_bits(addr);
> + cs[2] = upper_32_bits(addr);
> + cs[3] = i;
> + cs[4] = MI_NOOP;
> + cs[5] = MI_BATCH_BUFFER_START_GEN8;
> + } else {
> + cs[1] = 0;
> + cs[2] = lower_32_bits(addr);
> + cs[3] = i;
> + cs[4] = MI_NOOP;
> + cs[5] = MI_BATCH_BUFFER_START;
> + }
> + }
> +
> + out = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(out)) {
> + err = PTR_ERR(out);
> + goto out_batch;
> + }
> + i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
> +
> + vma = i915_vma_instance(out, vm, NULL);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto out_batch;
> + }
> +
> + err = i915_vma_pin(vma, 0, 0,
> + PIN_USER |
> + PIN_OFFSET_FIXED |
> + (vm->total - PAGE_SIZE));
> + if (err)
> + goto out_out;
out_put?
Oh and we don't have to do anything with the instance on
error paths?
> + GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
> +
> + result = i915_gem_object_pin_map(out, I915_MAP_WB);
> + if (IS_ERR(result)) {
> + err = PTR_ERR(result);
> + goto out_out;
> + }
> +
> + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> + IGT_TIMEOUT(end_time);
> + unsigned long pass = 0;
> +
> + if (!intel_engine_can_store_dword(ce->engine))
> + continue;
> +
> + while (!__igt_timeout(end_time, NULL)) {
> + u64 offset;
> +
> + offset = random_offset(0, vm->total - PAGE_SIZE,
> + chunk_size, PAGE_SIZE);
> +
> + err = vm->allocate_va_range(vm, offset, chunk_size);
> + if (err)
> + goto end;
> +
> + memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
> +
> + vma = i915_vma_instance(bbe, vm, NULL);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto end;
> + }
> +
> + err = vma->ops->set_pages(vma);
> + if (err)
> + goto end;
> +
> + /* Replace the TLB with target batches */
> + for (i = 0; i < count; i++) {
> + u32 *cs = batch + i * 64 / sizeof(*cs);
> + u64 addr;
> +
> + vma->node.start = offset + i * PAGE_SIZE;
on previous loop, we have now primed the pte to tlb in here?
> + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
..now changing in it here...
> +
> + addr = vma->node.start + i * 64;
> + cs[4] = MI_NOOP;
> + cs[6] = lower_32_bits(addr);
> + cs[7] = upper_32_bits(addr);
> + wmb();
> +
> + err = submit_batch(ce, addr);
in hope that with this submission hardware will see the old one and
completely miss the store+bb start?
Perhaps rewiring a more dummy emit only to prove this case
is pushing it.
But I am curious to know if you did try it out by removing
the invalidate on the emits and managed to bring
out the missing writes?
-Mika
> + if (err)
> + goto end;
> + }
> +
> + yield();
> + for (i = 0; i < count; i++)
> + batch[i * 64 / sizeof(*batch) + 4] =
> + MI_BATCH_BUFFER_END;
> + wmb();
> +
> + vma->ops->clear_pages(vma);
> +
> + err = context_sync(ce);
> + if (err) {
> + pr_err("%s: writes timed out\n",
> + ce->engine->name);
> + goto end;
> + }
> +
> + for (i = 0; i < count; i++) {
> + if (result[i] != i) {
> + pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n",
> + ce->engine->name, pass,
> + offset, i, result[i], i);
> + err = -EINVAL;
> + goto end;
> + }
> + }
> +
> + vm->clear_range(vm, offset, chunk_size);
> + pass++;
> + }
> + }
> +end:
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> + i915_gem_context_unlock_engines(ctx);
> + i915_gem_object_unpin_map(out);
> +out_out:
> + i915_gem_object_put(out);
> +out_batch:
> + i915_gem_object_unpin_map(bbe);
> +out_bbe:
> + i915_gem_object_put(bbe);
> +out_unlock:
> + mutex_unlock(&i915->drm.struct_mutex);
> + mock_file_free(i915, file);
> + return err;
> +}
> +
> int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> @@ -1722,6 +1948,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
> SUBTEST(igt_ggtt_pot),
> SUBTEST(igt_ggtt_fill),
> SUBTEST(igt_ggtt_page),
> + SUBTEST(igt_cs_tlb),
> };
>
> GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
> --
> 2.23.0
More information about the Intel-gfx
mailing list