[PATCH i-g-t v3] tests/intel/xe_tlb: Add test to check TLB invalidation

Thu Oct 3 11:03:57 UTC 2024

On 30/09/2024 17:26, sai.gowtham.ch at intel.com wrote:
> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> 
> Test validates TLB invalidation by binding different buffer objects
> with the same vma and submitting workload simultaneously, Ideally
> expecting gpu to handle pages by invalidating and avoding page faults.
> 
> v2: Validating tlb_invalidation counts with pre and post increment counter
>      values, rather complete stats.
> 
> v3: Move xe_tlb_count to lib making it generic to all stat entries. which returns
>      the counter of a given stat.
> 
> Cc: Nirmoy Das <nirmoy.das at intel.com>
> Cc: Priyanka Dandamudi <priyanka.dandamudi at intel.com>
> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> ---
>   lib/xe/xe_gt.c       |  25 ++++++++
>   lib/xe/xe_gt.h       |   1 +
>   tests/intel/xe_tlb.c | 148 +++++++++++++++++++++++++++++++++++++++++++
>   tests/meson.build    |   1 +
>   4 files changed, 175 insertions(+)
>   create mode 100644 tests/intel/xe_tlb.c
> 
> diff --git a/lib/xe/xe_gt.c b/lib/xe/xe_gt.c
> index 36e8fde36..dfc60e2ff 100644
> --- a/lib/xe/xe_gt.c
> +++ b/lib/xe/xe_gt.c
> @@ -141,3 +141,28 @@ void xe_post_hang_ring(int fd, igt_hang_t arg)
>   	xe_vm_destroy(fd, arg.spin->vm);
>   }
>   
> +/**
> + * xe_tlb_count:
> + * @fd: open xe drm file descriptor
> + * @gt: gt_id
> + *
> + * This function returns the counter for a given stat.
> + */
> +int xe_tlb_count(int fd, int gt, const char *stat)
> +{
> +	FILE *f;
> +	char tlb_path[4096];
> +	char path[256];
> +	int count;
> +
> +	sprintf(path, "/sys/kernel/debug/dri/0/gt%d/stats", gt);
> +	f = fopen(path, "r");
> +
> +	while (fgets(tlb_path, sizeof(tlb_path), f)) {
> +		if (strstr(tlb_path, stat) != NULL) {
> +			sscanf(tlb_path, "%*[^:]: %d", &count);
> +			break;
> +		}
> +	}
> +	return count;
> +}
> diff --git a/lib/xe/xe_gt.h b/lib/xe/xe_gt.h
> index 6fa05d6a9..7910587ac 100644
> --- a/lib/xe/xe_gt.h
> +++ b/lib/xe/xe_gt.h
> @@ -13,3 +13,4 @@ void xe_force_gt_reset_all(int fd);
>   igt_hang_t xe_hang_ring(int fd, uint64_t ahnd, uint32_t ctx, int ring,
>   				unsigned int flags);
>   void xe_post_hang_ring(int fd, igt_hang_t arg);
> +int xe_tlb_count(int fd, int gt, const char *stat);
> diff --git a/tests/intel/xe_tlb.c b/tests/intel/xe_tlb.c
> new file mode 100644
> index 000000000..399384f5b
> --- /dev/null
> +++ b/tests/intel/xe_tlb.c
> @@ -0,0 +1,148 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright © 2024 Intel Corporation
> +*
> +* Authors:
> +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> +*/
> +#include "igt.h"
> +#include "lib/igt_syncobj.h"
> +#include "xe/xe_gt.c"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +#include "xe_drm.h"
> +#include "igt_debugfs.h"
> +
> +/**
> + * TEST: Check Translation Lookaside Buffer Invalidation.
> + * Category: Software building block
> + * Mega feature: General Core features
> + * Sub-category: CMD submission
> + * Functionality: TLB invalidate
> + * Test category: functionality test
> + */
> +struct data {
> +	uint32_t batch[16];
> +	uint32_t data;
> +	uint64_t addr;
> +};
> +
> +static void store_dword_batch(struct data *data, uint64_t addr, int value)
> +{
> +	int b;
> +	uint64_t batch_offset = (char *)&(data->batch) - (char *)data;
> +	uint64_t batch_addr = addr + batch_offset;
> +	uint64_t sdi_offset = (char *)&(data->data) - (char *)data;
> +	uint64_t sdi_addr = addr + sdi_offset;
> +
> +	b = 0;
> +	data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
> +	data->batch[b++] = sdi_addr;
> +	data->batch[b++] = sdi_addr >> 32;
> +	data->batch[b++] = value;
> +	data->batch[b++] = MI_BATCH_BUFFER_END;
> +	igt_assert(b <= ARRAY_SIZE(data->batch));
> +
> +	data->addr = batch_addr;
> +}
> +
> +/**
> + * SUBTEST: basic-tlb
> + * Description: Check Translation Lookaside Buffer Invalidation.
> + */
> +static void tlb_invalidation(int fd, struct drm_xe_engine_class_instance *eci)
> +{
> +	struct drm_xe_sync sync[2] = {
> +		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
> +		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }
> +	};
> +	struct drm_xe_exec exec = {
> +		.num_batch_buffer = 1,
> +		.num_syncs = 2,
> +		.syncs = to_user_pointer(&sync),
> +	};
> +	struct data *data1;
> +	struct data *data2;
> +	uint32_t vm;
> +	uint32_t exec_queue;
> +	uint32_t bind_engine;
> +	uint32_t syncobj;
> +	size_t bo_size;
> +	int value1 = 0x123456;
> +	int value2 = 0x123465;
> +	uint64_t addr = 0x100000;
> +	uint32_t bo1, bo2;
> +	int tlb_pre, tlb_pos;
> +	const char *stat = "tlb_inval_count";
> +
> +	syncobj = syncobj_create(fd, 0);
> +	sync[0].handle = syncobj_create(fd, 0);
> +	sync[1].handle = syncobj;
> +
> +	vm = xe_vm_create(fd, 0, 0);
> +	bo_size = sizeof(*data1);
> +	bo_size = xe_bb_size(fd, bo_size);
> +	bo1 = xe_bo_create(fd, vm, bo_size,
> +				   vram_if_possible(fd, eci->gt_id),
> +				   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> +	bo2 = xe_bo_create(fd, vm, bo_size,
> +				   vram_if_possible(fd, eci->gt_id),
> +				   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> +
> +	tlb_pre = xe_tlb_count(fd, eci->gt_id, stat);
> +	exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
> +	bind_engine = xe_bind_exec_queue_create(fd, vm, 0);
> +	xe_vm_bind_async(fd, vm, bind_engine, bo1, 0, addr, bo_size, sync, 1);
> +	data1 = xe_bo_map(fd, bo1, bo_size);
> +
> +	store_dword_batch(data1, addr, value1);
> +	exec.exec_queue_id = exec_queue;
> +	exec.address = data1->addr;
> +	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
> +	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
> +	xe_exec(fd, &exec);
> +	igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
> +	xe_vm_bind_async(fd, vm, bind_engine, bo2, 0, addr, bo_size, sync, 1);
> +	data2 = xe_bo_map(fd, bo2, bo_size);
> +
> +	store_dword_batch(data2, addr, value2);
> +	exec.exec_queue_id = exec_queue;
> +	exec.address = data2->addr;
> +	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
> +	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
> +	xe_exec(fd, &exec);
> +	igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
> +
> +	tlb_pos = xe_tlb_count(fd, eci->gt_id, stat);
> +	igt_assert_eq(data1->data, value1);
> +	igt_assert_eq(data2->data, value2);
> +	igt_assert(tlb_pos > tlb_pre);

Can't this overflow? Should we not check for that? See here:
https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2922.

It is possible kmd is missing an invalidation, but seems pretty 
unlikely? Also strange that is only triggers on dg2.

> +
> +	syncobj_destroy(fd, sync[0].handle);
> +	syncobj_destroy(fd, syncobj);
> +	munmap(data1, bo_size);
> +	munmap(data2, bo_size);
> +	gem_close(fd, bo1);
> +	gem_close(fd, bo2);
> +	xe_exec_queue_destroy(fd, exec_queue);
> +	xe_vm_destroy(fd, vm);
> +}
> +
> +igt_main
> +{
> +	int fd;
> +	struct drm_xe_engine *engine;
> +
> +	igt_fixture {
> +		fd = drm_open_driver(DRIVER_XE);
> +	}
> +
> +	igt_subtest("basic-tlb") {
> +		engine = xe_engine(fd, 0);
> +		tlb_invalidation(fd, &engine->instance);
> +	}
> +
> +	igt_fixture {
> +		drm_close_driver(fd);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 0782d3cc7..b0a490ccb 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -317,6 +317,7 @@ intel_xe_progs = [
>   	'xe_sysfs_preempt_timeout',
>   	'xe_sysfs_scheduler',
>           'xe_sysfs_timeslice_duration',
> +        'xe_tlb',
>   ]
>   
>   intel_xe_eudebug_progs = [