[PATCH 3/3] tests/intel/xe_eudebug_online: add pagefault stress tests

Mon Jul 7 16:50:12 UTC 2025

W dniu 01.07.2025 o 15:53, Jan Maslak pisze:
> Add a stress variant of the already existing
> xe_eudebug_online at pagefault-[read|write] tests that increases
> number of threads in the shader and number of threads in
> threadgroup to their maximum available amounts.
>
> Signed-off-by: Jan Maslak <jan.maslak at intel.com>
> ---
>   tests/intel/xe_eudebug_online.c | 75 ++++++++++++++++++++++++++++-----
>   1 file changed, 64 insertions(+), 11 deletions(-)
>
> diff --git a/tests/intel/xe_eudebug_online.c b/tests/intel/xe_eudebug_online.c
> index 0498be80a..7f06fef27 100644
> --- a/tests/intel/xe_eudebug_online.c
> +++ b/tests/intel/xe_eudebug_online.c
> @@ -40,6 +40,7 @@
>   #define SHADER_PAGEFAULT_READ		(1 << 14)
>   #define SHADER_PAGEFAULT_WRITE		(1 << 15)
>   #define FAULTABLE_VM			(1 << 16)
> +#define PAGEFAULT_STRESS_TEST		(1 << 17)
>   #define TRIGGER_UFENCE_SET_BREAKPOINT	(1 << 24)
>   #define TRIGGER_RESUME_SINGLE_WALK	(1 << 25)
>   #define TRIGGER_RESUME_PARALLEL_WALK	(1 << 26)
> @@ -125,8 +126,34 @@ static struct intel_buf *create_uc_buf(int fd, int width, int height, uint64_t r
>   	return buf;
>   }
>   
> -static int get_number_of_threads(uint64_t flags)
> +static int get_maximum_number_of_threads(int fd)
>   {
> +	uint32_t hwconfig_len;
> +	uint32_t subslices = 0;
> +	uint32_t eus_per_subslice = 0;
> +	uint32_t threads_per_eu = 0;
> +	uint32_t *val;
> +
> +	val = xe_hwconfig_lookup_value(fd, INTEL_HWCONFIG_MAX_SUBSLICE, &hwconfig_len);
> +	igt_assert(val && hwconfig_len == 1);
> +	subslices = *val;
> +
> +	val = xe_hwconfig_lookup_value(fd, INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE, &hwconfig_len);
> +	igt_assert(val && hwconfig_len == 1);
> +	eus_per_subslice = *val;
> +
> +	val = xe_hwconfig_lookup_value(fd, INTEL_HWCONFIG_NUM_THREADS_PER_EU, &hwconfig_len);
> +	igt_assert(val && hwconfig_len == 1);
> +	threads_per_eu = *val;
> +
> +	return subslices * eus_per_subslice * threads_per_eu;
> +}

Nice function, with small helper it would be more compact:

static uint32_t xe_hwconfig_get_value(int fd, enum intel_hwconfig attribute)
{
     uint32_t len, *val;

     val = xe_hwconfig_lookup_value(fd, attribute, &len);
     igt_assert(val && hwconfig_len == 1);
     return *val;
}
static int get_maximum_number_of_threads(int fd)
{
         return xe_hwconfig_get_value(fd, INTEL_HWCONFIG_MAX_SUBSLICE) *
                     xe_hwconfig_get_value(fd, 
INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE) *
                     xe_hwconfig_get_value(fd, 
INTEL_HWCONFIG_NUM_THREADS_PER_EU);
}

What is difference between above and query_max_concurrent_threads, 
functionally, if they should return the same maybe you can replace it

with your implementation, looks superior (if it is in our internal 
branch only, then you can try replace there).

Have you tested this on all supported platforms?

The rest look OK.

Regards

Andrzej

> +
> +static int get_number_of_threads(int fd, uint64_t flags)
> +{
> +	if (flags & (PAGEFAULT_STRESS_TEST))
> +		return get_maximum_number_of_threads(fd);
> +
>   	if (flags & (SHADER_MIN_THREADS | SHADER_PAGEFAULT))
>   		return 16;
>   
> @@ -154,8 +181,8 @@ static int caching_get_instruction_count(int fd, uint32_t s_dim__x, int flags)
>   
>   static struct gpgpu_shader *get_shader(int fd, const unsigned int flags)
>   {
> -	struct dim_t w_dim = walker_dimensions(get_number_of_threads(flags));
> -	struct dim_t s_dim = surface_dimensions(get_number_of_threads(flags));
> +	struct dim_t w_dim = walker_dimensions(get_number_of_threads(fd, flags));
> +	struct dim_t s_dim = surface_dimensions(get_number_of_threads(fd, flags));
>   	static struct gpgpu_shader *shader;
>   
>   	shader = gpgpu_shader_create(fd);
> @@ -208,7 +235,7 @@ static struct gpgpu_shader *get_shader(int fd, const unsigned int flags)
>   
>   static struct gpgpu_shader *get_sip(int fd, const unsigned int flags)
>   {
> -	struct dim_t w_dim = walker_dimensions(get_number_of_threads(flags));
> +	struct dim_t w_dim = walker_dimensions(get_number_of_threads(fd, flags));
>   	static struct gpgpu_shader *sip;
>   
>   	sip = gpgpu_shader_create(fd);
> @@ -671,7 +698,7 @@ static void eu_attention_resume_trigger(struct xe_eudebug_debugger *d,
>   	}
>   
>   	if (d->flags & (SHADER_LOOP | SHADER_PAGEFAULT)) {
> -		uint32_t threads = get_number_of_threads(d->flags);
> +		uint32_t threads = get_number_of_threads(d->master_fd, d->flags);
>   		uint32_t val = STEERING_END_LOOP;
>   
>   		igt_assert_eq(pwrite(data->vm_fd, &val, sizeof(uint32_t),
> @@ -693,7 +720,7 @@ static void eu_attention_resume_single_step_trigger(struct xe_eudebug_debugger *
>   {
>   	struct drm_xe_eudebug_event_eu_attention *att = (void *) e;
>   	struct online_debug_data *data = d->ptr;
> -	const int threads = get_number_of_threads(d->flags);
> +	const int threads = get_number_of_threads(d->fd, d->flags);
>   	uint32_t val;
>   	size_t sz = sizeof(uint32_t);
>   
> @@ -918,7 +945,7 @@ static void eu_attention_resume_caching_trigger(struct xe_eudebug_debugger *d,
>   {
>   	struct drm_xe_eudebug_event_eu_attention *att = (void *)e;
>   	struct online_debug_data *data = d->ptr;
> -	struct dim_t s_dim = surface_dimensions(get_number_of_threads(d->flags));
> +	struct dim_t s_dim = surface_dimensions(get_number_of_threads(d->fd, d->flags));
>   	uint32_t *kernel_offset = &data->kernel_offset;
>   	int *counter = &data->att_event_counter;
>   	int val;
> @@ -1042,7 +1069,7 @@ static uint64_t get_memory_region(int fd, int flags, int region_bitmask)
>   
>   static void run_online_client(struct xe_eudebug_client *c)
>   {
> -	int threads = get_number_of_threads(c->flags);
> +	int threads;
>   	const uint64_t target_offset = 0x1a000000;
>   	const uint64_t bb_offset = 0x1b000000;
>   	size_t bb_size;
> @@ -1059,8 +1086,8 @@ static void run_online_client(struct xe_eudebug_client *c)
>   		.num_placements = 1,
>   		.extensions = c->flags & DISABLE_DEBUG_MODE ? 0 : to_user_pointer(&ext)
>   	};
> -	struct dim_t w_dim = walker_dimensions(threads);
> -	struct dim_t s_dim = surface_dimensions(threads);
> +	struct dim_t w_dim;
> +	struct dim_t s_dim;
>   	struct timespec ts = { };
>   	struct gpgpu_shader *sip, *shader;
>   	uint32_t metadata_id[2];
> @@ -1069,6 +1096,7 @@ static void run_online_client(struct xe_eudebug_client *c)
>   	struct intel_buf *buf;
>   	uint32_t *ptr;
>   	int fd, vm_flags;
> +	uint32_t num_threads_in_tg = 1;
>   
>   	metadata[0] = calloc(2, sizeof(**metadata));
>   	metadata[1] = calloc(2, sizeof(**metadata));
> @@ -1077,9 +1105,17 @@ static void run_online_client(struct xe_eudebug_client *c)
>   
>   	fd = xe_eudebug_client_open_driver(c);
>   
> +	threads = get_number_of_threads(fd, c->flags);
> +	w_dim = walker_dimensions(threads);
> +	s_dim = surface_dimensions(threads);
> +
>   	shader = get_shader(fd, c->flags);
>   	bb_size = get_bb_size(fd, shader);
>   
> +	if (c->flags & PAGEFAULT_STRESS_TEST)
> +		num_threads_in_tg = compute_max_threads_in_tg(shader->vrt, false, SIMD_SIZE,
> +							      false);
> +
>   	/* Additional memory for steering control */
>   	if (c->flags & SHADER_LOOP || c->flags & SHADER_SINGLE_STEP || c->flags & SHADER_PAGEFAULT)
>   		s_dim.y++;
> @@ -1116,7 +1152,7 @@ static void run_online_client(struct xe_eudebug_client *c)
>   	sip = get_sip(fd, c->flags);
>   
>   	igt_nsec_elapsed(&ts);
> -	gpgpu_shader_exec(ibb, buf, w_dim.x, w_dim.y, 1, shader, sip, 0, 0);
> +	gpgpu_shader_exec(ibb, buf, w_dim.x, w_dim.y, num_threads_in_tg, shader, sip, 0, 0);
>   
>   	gpgpu_shader_destroy(sip);
>   	gpgpu_shader_destroy(shader);
> @@ -1630,6 +1666,18 @@ static void test_set_breakpoint_online_sigint_debugger(int fd,
>    * Description:
>    *     Check whether KMD sends pagefault event for workload in debug mode that
>    *     triggers a write pagefault.
> + *
> + * SUBTEST: pagefault-read-stress
> + * Functionality: page faults
> + * Description:
> + *     Check whether KMD sends read pagefault event for workload in debug mode
> + *     with many threads.
> + *
> + * SUBTEST: pagefault-write-stress
> + * Functionality: page faults
> + * Description:
> + *     Check whether KMD sends write pagefault event for workload in debug mode
> + *     with many threads.
>    */
>   static void test_pagefault_online(int fd, struct drm_xe_engine_class_instance *hwe,
>   				  int flags)
> @@ -2665,6 +2713,11 @@ igt_main
>   	test_gt_render_or_compute("pagefault-write", fd, hwe)
>   		test_pagefault_online(fd, hwe, SHADER_PAGEFAULT_WRITE);
>   
> +	test_gt_render_or_compute("pagefault-read-stress", fd, hwe)
> +		test_pagefault_online(fd, hwe, SHADER_PAGEFAULT_READ | PAGEFAULT_STRESS_TEST);
> +	test_gt_render_or_compute("pagefault-write-stress", fd, hwe)
> +		test_pagefault_online(fd, hwe, SHADER_PAGEFAULT_WRITE | PAGEFAULT_STRESS_TEST);
> +
>   	igt_fixture {
>   		xe_eudebug_enable(fd, was_enabled);
>