[PATCH i-g-t v3] tests/intel/xe_tlb: Add test to check TLB invalidation

Tue Oct 8 21:00:44 UTC 2024

On 10/3/2024 1:03 PM, Matthew Auld wrote:
> On 30/09/2024 17:26, sai.gowtham.ch at intel.com wrote:
>> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>>
>> Test validates TLB invalidation by binding different buffer objects
>> with the same vma and submitting workload simultaneously, Ideally
>> expecting gpu to handle pages by invalidating and avoding page faults.
>>
>> v2: Validating tlb_invalidation counts with pre and post increment counter
>>      values, rather complete stats.
>>
>> v3: Move xe_tlb_count to lib making it generic to all stat entries. which returns
>>      the counter of a given stat.
>>
>> Cc: Nirmoy Das <nirmoy.das at intel.com>
>> Cc: Priyanka Dandamudi <priyanka.dandamudi at intel.com>
>> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>> ---
>>   lib/xe/xe_gt.c       |  25 ++++++++
>>   lib/xe/xe_gt.h       |   1 +
>>   tests/intel/xe_tlb.c | 148 +++++++++++++++++++++++++++++++++++++++++++
>>   tests/meson.build    |   1 +
>>   4 files changed, 175 insertions(+)
>>   create mode 100644 tests/intel/xe_tlb.c
>>
>> diff --git a/lib/xe/xe_gt.c b/lib/xe/xe_gt.c
>> index 36e8fde36..dfc60e2ff 100644
>> --- a/lib/xe/xe_gt.c
>> +++ b/lib/xe/xe_gt.c
>> @@ -141,3 +141,28 @@ void xe_post_hang_ring(int fd, igt_hang_t arg)
>>       xe_vm_destroy(fd, arg.spin->vm);
>>   }
>>   +/**
>> + * xe_tlb_count:
>> + * @fd: open xe drm file descriptor
>> + * @gt: gt_id
>> + *
>> + * This function returns the counter for a given stat.
>> + */
>> +int xe_tlb_count(int fd, int gt, const char *stat)
>> +{
>> +    FILE *f;
>> +    char tlb_path[4096];
>> +    char path[256];
>> +    int count;
>> +
>> +    sprintf(path, "/sys/kernel/debug/dri/0/gt%d/stats", gt);
>> +    f = fopen(path, "r");
>> +
>> +    while (fgets(tlb_path, sizeof(tlb_path), f)) {
>> +        if (strstr(tlb_path, stat) != NULL) {
>> +            sscanf(tlb_path, "%*[^:]: %d", &count);
>> +            break;
>> +        }
>> +    }
>> +    return count;
>> +}
>> diff --git a/lib/xe/xe_gt.h b/lib/xe/xe_gt.h
>> index 6fa05d6a9..7910587ac 100644
>> --- a/lib/xe/xe_gt.h
>> +++ b/lib/xe/xe_gt.h
>> @@ -13,3 +13,4 @@ void xe_force_gt_reset_all(int fd);
>>   igt_hang_t xe_hang_ring(int fd, uint64_t ahnd, uint32_t ctx, int ring,
>>                   unsigned int flags);
>>   void xe_post_hang_ring(int fd, igt_hang_t arg);
>> +int xe_tlb_count(int fd, int gt, const char *stat);
>> diff --git a/tests/intel/xe_tlb.c b/tests/intel/xe_tlb.c
>> new file mode 100644
>> index 000000000..399384f5b
>> --- /dev/null
>> +++ b/tests/intel/xe_tlb.c
>> @@ -0,0 +1,148 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> +* Copyright © 2024 Intel Corporation
>> +*
>> +* Authors:
>> +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>> +*/
>> +#include "igt.h"
>> +#include "lib/igt_syncobj.h"
>> +#include "xe/xe_gt.c"
>> +#include "xe/xe_ioctl.h"
>> +#include "xe/xe_query.h"
>> +#include "xe_drm.h"
>> +#include "igt_debugfs.h"
>> +
>> +/**
>> + * TEST: Check Translation Lookaside Buffer Invalidation.
>> + * Category: Software building block
>> + * Mega feature: General Core features
>> + * Sub-category: CMD submission
>> + * Functionality: TLB invalidate
>> + * Test category: functionality test
>> + */
>> +struct data {
>> +    uint32_t batch[16];
>> +    uint32_t data;
>> +    uint64_t addr;
>> +};
>> +
>> +static void store_dword_batch(struct data *data, uint64_t addr, int value)
>> +{
>> +    int b;
>> +    uint64_t batch_offset = (char *)&(data->batch) - (char *)data;
>> +    uint64_t batch_addr = addr + batch_offset;
>> +    uint64_t sdi_offset = (char *)&(data->data) - (char *)data;
>> +    uint64_t sdi_addr = addr + sdi_offset;
>> +
>> +    b = 0;
>> +    data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
>> +    data->batch[b++] = sdi_addr;
>> +    data->batch[b++] = sdi_addr >> 32;
>> +    data->batch[b++] = value;
>> +    data->batch[b++] = MI_BATCH_BUFFER_END;
>> +    igt_assert(b <= ARRAY_SIZE(data->batch));
>> +
>> +    data->addr = batch_addr;
>> +}
>> +
>> +/**
>> + * SUBTEST: basic-tlb
>> + * Description: Check Translation Lookaside Buffer Invalidation.
>> + */
>> +static void tlb_invalidation(int fd, struct drm_xe_engine_class_instance *eci)
>> +{
>> +    struct drm_xe_sync sync[2] = {
>> +        { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
>> +        { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }
>> +    };
>> +    struct drm_xe_exec exec = {
>> +        .num_batch_buffer = 1,
>> +        .num_syncs = 2,
>> +        .syncs = to_user_pointer(&sync),
>> +    };
>> +    struct data *data1;
>> +    struct data *data2;
>> +    uint32_t vm;
>> +    uint32_t exec_queue;
>> +    uint32_t bind_engine;
>> +    uint32_t syncobj;
>> +    size_t bo_size;
>> +    int value1 = 0x123456;
>> +    int value2 = 0x123465;
>> +    uint64_t addr = 0x100000;
>> +    uint32_t bo1, bo2;
>> +    int tlb_pre, tlb_pos;
>> +    const char *stat = "tlb_inval_count";
>> +
>> +    syncobj = syncobj_create(fd, 0);
>> +    sync[0].handle = syncobj_create(fd, 0);
>> +    sync[1].handle = syncobj;
>> +
>> +    vm = xe_vm_create(fd, 0, 0);
>> +    bo_size = sizeof(*data1);
>> +    bo_size = xe_bb_size(fd, bo_size);
>> +    bo1 = xe_bo_create(fd, vm, bo_size,
>> +                   vram_if_possible(fd, eci->gt_id),
>> +                   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
>> +    bo2 = xe_bo_create(fd, vm, bo_size,
>> +                   vram_if_possible(fd, eci->gt_id),
>> +                   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
>> +
>> +    tlb_pre = xe_tlb_count(fd, eci->gt_id, stat);
>> +    exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
>> +    bind_engine = xe_bind_exec_queue_create(fd, vm, 0);
>> +    xe_vm_bind_async(fd, vm, bind_engine, bo1, 0, addr, bo_size, sync, 1);
>> +    data1 = xe_bo_map(fd, bo1, bo_size);
>> +
>> +    store_dword_batch(data1, addr, value1);
>> +    exec.exec_queue_id = exec_queue;
>> +    exec.address = data1->addr;
>> +    sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
>> +    sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
>> +    xe_exec(fd, &exec);
>> +    igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
>> +    xe_vm_bind_async(fd, vm, bind_engine, bo2, 0, addr, bo_size, sync, 1);
>> +    data2 = xe_bo_map(fd, bo2, bo_size);
>> +
>> +    store_dword_batch(data2, addr, value2);
>> +    exec.exec_queue_id = exec_queue;
>> +    exec.address = data2->addr;
>> +    sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
>> +    sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
>> +    xe_exec(fd, &exec);
>> +    igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
>> +
>> +    tlb_pos = xe_tlb_count(fd, eci->gt_id, stat);
>> +    igt_assert_eq(data1->data, value1);
>> +    igt_assert_eq(data2->data, value2);
>> +    igt_assert(tlb_pos > tlb_pre);
>
> Can't this overflow? 

Good point, yes it can.

> Should we not check for that? See here:
> https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2922.
>
> It is possible kmd is missing an invalidation, but seems pretty unlikely? Also strange that is only triggers on dg2.

Yes, likely overflow here. I was hoping igt would print values of tlb_pos and  tlb_pre on failure that should help.

Regards,

Nirmoy

>
>> +
>> +    syncobj_destroy(fd, sync[0].handle);
>> +    syncobj_destroy(fd, syncobj);
>> +    munmap(data1, bo_size);
>> +    munmap(data2, bo_size);
>> +    gem_close(fd, bo1);
>> +    gem_close(fd, bo2);
>> +    xe_exec_queue_destroy(fd, exec_queue);
>> +    xe_vm_destroy(fd, vm);
>> +}
>> +
>> +igt_main
>> +{
>> +    int fd;
>> +    struct drm_xe_engine *engine;
>> +
>> +    igt_fixture {
>> +        fd = drm_open_driver(DRIVER_XE);
>> +    }
>> +
>> +    igt_subtest("basic-tlb") {
>> +        engine = xe_engine(fd, 0);
>> +        tlb_invalidation(fd, &engine->instance);
>> +    }
>> +
>> +    igt_fixture {
>> +        drm_close_driver(fd);
>> +    }
>> +}
>> diff --git a/tests/meson.build b/tests/meson.build
>> index 0782d3cc7..b0a490ccb 100644
>> --- a/tests/meson.build
>> +++ b/tests/meson.build
>> @@ -317,6 +317,7 @@ intel_xe_progs = [
>>       'xe_sysfs_preempt_timeout',
>>       'xe_sysfs_scheduler',
>>           'xe_sysfs_timeslice_duration',
>> +        'xe_tlb',
>>   ]
>>     intel_xe_eudebug_progs = [