[PATCH i-g-t v6 16/17] tests/xe_eudebug_online: Debug client which runs workloads on EU

Fri Sep 13 11:39:02 UTC 2024

On Thu, Sep 05, 2024 at 11:28:11AM +0200, Christoph Manszewski wrote:
> From: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> 
> For typical debugging under gdb one can specify two main usecases:
> accessing and manupulating resources created by the application and
> manipulating thread execution (interrupting and setting breakpoints).
> 
> This test adds coverage for the latter by checking that:
> - EU workloads that hit a instruction with breakpoint bit set will stop
>   halt execution and the debugger will report this via attention events,
> - the debugger is able to interrupt workload execution by issuing a
>   'interrupt_all' ioctl call,
> - the debugger is able to resume selected workloads that are stopped.
> 
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> Signed-off-by: Christoph Manszewski <christoph.manszewski at intel.com>
> Signed-off-by: Dominik Karol Piątkowski <dominik.karol.piatkowski at intel.com>
> Signed-off-by: Pawel Sikora <pawel.sikora at intel.com>
> Signed-off-by: Karolina Stolarek <karolina.stolarek at intel.com>
> Signed-off-by: Kolanupaka Naveena <kolanupaka.naveena at intel.com>
> ---
>  tests/intel/xe_eudebug_online.c | 2254 +++++++++++++++++++++++++++++++
>  tests/meson.build               |    1 +
>  2 files changed, 2255 insertions(+)
>  create mode 100644 tests/intel/xe_eudebug_online.c
> 
> diff --git a/tests/intel/xe_eudebug_online.c b/tests/intel/xe_eudebug_online.c
> new file mode 100644
> index 000000000..20f8e3601
> --- /dev/null
> +++ b/tests/intel/xe_eudebug_online.c
> @@ -0,0 +1,2254 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +/**
> + * TEST: Tests for eudebug online functionality
> + * Category: Core
> + * Mega feature: EUdebug
> + * Sub-category: EUdebug tests
> + * Functionality: eu kernel debug
> + * Test category: functionality test
> + */
> +
> +#include "xe/xe_eudebug.h"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +#include "igt.h"
> +#include "intel_pat.h"
> +#include "intel_mocs.h"
> +#include "gpgpu_shader.h"
> +
> +#define SHADER_NOP			(0 << 0)
> +#define SHADER_BREAKPOINT		(1 << 0)
> +#define SHADER_LOOP			(1 << 1)
> +#define SHADER_SINGLE_STEP		(1 << 2)
> +#define SIP_SINGLE_STEP			(1 << 3)
> +#define DISABLE_DEBUG_MODE		(1 << 4)
> +#define SHADER_N_NOOP_BREAKPOINT	(1 << 5)
> +#define SHADER_CACHING_SRAM		(1 << 6)
> +#define SHADER_CACHING_VRAM		(1 << 7)
> +#define SHADER_MIN_THREADS		(1 << 8)
> +#define DO_NOT_EXPECT_CANARIES		(1 << 9)
> +#define TRIGGER_UFENCE_SET_BREAKPOINT	(1 << 24)
> +#define TRIGGER_RESUME_SINGLE_WALK	(1 << 25)
> +#define TRIGGER_RESUME_PARALLEL_WALK	(1 << 26)
> +#define TRIGGER_RECONNECT		(1 << 27)
> +#define TRIGGER_RESUME_SET_BP		(1 << 28)
> +#define TRIGGER_RESUME_DELAYED		(1 << 29)
> +#define TRIGGER_RESUME_DSS		(1 << 30)
> +#define TRIGGER_RESUME_ONE		(1 << 31)
> +
> +#define DEBUGGER_REATTACHED	1
> +
> +#define SHADER_LOOP_N		3
> +#define SINGLE_STEP_COUNT	16
> +#define STEERING_SINGLE_STEP	0
> +#define STEERING_CONTINUE	0x00c0ffee
> +#define STEERING_END_LOOP	0xdeadca11
> +
> +#define CACHING_INIT_VALUE	0xcafe0000
> +#define CACHING_POISON_VALUE	0xcafedead
> +#define CACHING_VALUE(n)	(CACHING_INIT_VALUE + (n))
> +
> +#define SHADER_CANARY 0x01010101
> +
> +#define WALKER_X_DIM		4
> +#define WALKER_ALIGNMENT	16
> +#define SIMD_SIZE		16
> +
> +#define STARTUP_TIMEOUT_MS	3000
> +#define WORKLOAD_DELAY_US	(5000 * 1000)
> +
> +#define PAGE_SIZE 4096
> +
> +struct dim_t {
> +	uint32_t x;
> +	uint32_t y;
> +	uint32_t alignment;
> +};
> +
> +static struct dim_t walker_dimensions(int threads)
> +{
> +	uint32_t x_dim = min_t(x_dim, threads, WALKER_X_DIM);
> +	struct dim_t ret = {
> +		.x = x_dim,
> +		.y = threads / x_dim,
> +		.alignment = WALKER_ALIGNMENT
> +	};
> +
> +	return ret;
> +}
> +
> +static struct dim_t surface_dimensions(int threads)
> +{
> +	struct dim_t ret = walker_dimensions(threads);
> +
> +	ret.y = max_t(ret.y, threads / ret.x, 4);
> +	ret.x *= SIMD_SIZE;
> +	ret.alignment *= SIMD_SIZE;
> +
> +	return ret;
> +}
> +
> +static uint32_t steering_offset(int threads)
> +{
> +	struct dim_t w = walker_dimensions(threads);
> +
> +	return ALIGN(w.x, w.alignment) * w.y * 4;
> +}
> +
> +static struct intel_buf *create_uc_buf(int fd, int width, int height)
> +{
> +	struct intel_buf *buf;
> +
> +	buf = intel_buf_create_full(buf_ops_create(fd), 0, width / 4, height,
> +				    32, 0, I915_TILING_NONE, 0, 0, 0,
> +				    vram_if_possible(fd, 0),
> +				    DEFAULT_PAT_INDEX, DEFAULT_MOCS_INDEX);
> +
> +	return buf;
> +}
> +
> +static int get_number_of_threads(uint64_t flags)
> +{
> +	if (flags & SHADER_MIN_THREADS)
> +		return 16;
> +
> +	if (flags & (TRIGGER_RESUME_ONE | TRIGGER_RESUME_SINGLE_WALK |
> +		     TRIGGER_RESUME_PARALLEL_WALK | SHADER_CACHING_SRAM | SHADER_CACHING_VRAM))
> +		return 32;
> +
> +	return 512;
> +}
> +
> +static int caching_get_instruction_count(int fd, uint32_t s_dim__x, int flags)
> +{
> +	uint64_t memory;
> +
> +	igt_assert((flags & SHADER_CACHING_SRAM) || (flags & SHADER_CACHING_VRAM));
> +
> +	if (flags & SHADER_CACHING_SRAM)
> +		memory = system_memory(fd);
> +	else
> +		memory = vram_memory(fd, 0);
> +
> +	/* each instruction writes to given y offset */
> +	return (2 * xe_min_page_size(fd, memory)) / s_dim__x;
> +}
> +
> +static struct gpgpu_shader *get_shader(int fd, const unsigned int flags)
> +{
> +	struct dim_t w_dim = walker_dimensions(get_number_of_threads(flags));
> +	struct dim_t s_dim = surface_dimensions(get_number_of_threads(flags));
> +	static struct gpgpu_shader *shader;
> +
> +	shader = gpgpu_shader_create(fd);
> +
> +	gpgpu_shader__write_dword(shader, SHADER_CANARY, 0);
> +	if (flags & SHADER_BREAKPOINT) {
> +		gpgpu_shader__nop(shader);
> +		gpgpu_shader__breakpoint(shader);
> +	} else if (flags & SHADER_LOOP) {
> +		gpgpu_shader__label(shader, 0);
> +		gpgpu_shader__write_dword(shader, SHADER_CANARY, 0);
> +		gpgpu_shader__jump_neq(shader, 0, w_dim.y, STEERING_END_LOOP);
> +		gpgpu_shader__write_dword(shader, SHADER_CANARY, 0);
> +	} else if (flags & SHADER_SINGLE_STEP) {
> +		gpgpu_shader__nop(shader);
> +		gpgpu_shader__breakpoint(shader);
> +		for (int i = 0; i < SINGLE_STEP_COUNT; i++)
> +			gpgpu_shader__nop(shader);
> +	} else if (flags & SHADER_N_NOOP_BREAKPOINT) {
> +		for (int i = 0; i < SHADER_LOOP_N; i++) {
> +			gpgpu_shader__nop(shader);
> +			gpgpu_shader__breakpoint(shader);
> +		}
> +	} else if ((flags & SHADER_CACHING_SRAM) || (flags & SHADER_CACHING_VRAM)) {
> +		gpgpu_shader__nop(shader);
> +		gpgpu_shader__breakpoint(shader);
> +		for (int i = 0; i < caching_get_instruction_count(fd, s_dim.x, flags); i++)
> +			gpgpu_shader__common_target_write_u32(shader, s_dim.y + i, CACHING_VALUE(i));
> +		gpgpu_shader__nop(shader);
> +		gpgpu_shader__breakpoint(shader);
> +	}
> +
> +	gpgpu_shader__eot(shader);

Add blank line.

> +	return shader;
> +}
> +
> +static struct gpgpu_shader *get_sip(int fd, const unsigned int flags)
> +{
> +	struct dim_t w_dim = walker_dimensions(get_number_of_threads(flags));
> +	static struct gpgpu_shader *sip;
> +
> +	sip = gpgpu_shader_create(fd);
> +	gpgpu_shader__write_aip(sip, 0);
> +
> +	gpgpu_shader__wait(sip);
> +	if (flags & SIP_SINGLE_STEP)
> +		gpgpu_shader__end_system_routine_step_if_eq(sip, w_dim.y, 0);
> +	else
> +		gpgpu_shader__end_system_routine(sip, true);

Same.

> +	return sip;
> +}
> +
> +static int count_set_bits(void *ptr, size_t size)
> +{
> +	uint8_t *p = ptr;
> +	int count = 0;
> +	int i, j;
> +

hweight()?

> +	for (i = 0; i < size; i++)
> +		for (j = 0; j < 8; j++)
> +			count += !!(p[i] & (1 << j));
> +
> +	return count;
> +}
> +
> +static int count_canaries_eq(uint32_t *ptr, struct dim_t w_dim, uint32_t value)
> +{
> +	int count = 0;
> +	int x, y;
> +
> +	for (x = 0; x < w_dim.x; x++)
> +		for (y = 0; y < w_dim.y; y++)
> +			if (READ_ONCE(ptr[x + ALIGN(w_dim.x, w_dim.alignment) * y]) == value)
> +				count++;
> +
> +	return count;
> +}
> +
> +static int count_canaries_neq(uint32_t *ptr, struct dim_t w_dim, uint32_t value)
> +{
> +	return w_dim.x * w_dim.y - count_canaries_eq(ptr, w_dim, value);
> +}
> +
> +static const char *td_ctl_cmd_to_str(uint32_t cmd)
> +{
> +	switch (cmd) {
> +	case DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL:
> +		return "interrupt all";
> +	case DRM_XE_EUDEBUG_EU_CONTROL_CMD_STOPPED:
> +		return "stopped";
> +	case DRM_XE_EUDEBUG_EU_CONTROL_CMD_RESUME:
> +		return "resume";
> +	default:
> +		return "unknown command";
> +	}
> +}
> +
> +static int __eu_ctl(int debugfd, uint64_t client,
> +		    uint64_t exec_queue, uint64_t lrc,
> +		    uint8_t *bitmask, uint32_t *bitmask_size,
> +		    uint32_t cmd, uint64_t *seqno)
> +{
> +	struct drm_xe_eudebug_eu_control control = {
> +		.client_handle = lower_32_bits(client),
> +		.exec_queue_handle = exec_queue,
> +		.lrc_handle = lrc,
> +		.cmd = cmd,
> +		.bitmask_ptr = to_user_pointer(bitmask),
> +	};
> +	int ret;
> +
> +	if (bitmask_size)
> +		control.bitmask_size = *bitmask_size;
> +
> +	ret = igt_ioctl(debugfd, DRM_XE_EUDEBUG_IOCTL_EU_CONTROL, &control);
> +
> +	if (ret < 0)
> +		return -errno;
> +
> +	igt_debug("EU CONTROL[%llu]: %s\n", control.seqno, td_ctl_cmd_to_str(cmd));
> +
> +	if (bitmask_size)
> +		*bitmask_size = control.bitmask_size;
> +
> +	if (seqno)
> +		*seqno = control.seqno;
> +
> +	return 0;
> +}
> +
> +static uint64_t eu_ctl(int debugfd, uint64_t client,
> +		       uint64_t exec_queue, uint64_t lrc,
> +		       uint8_t *bitmask, uint32_t *bitmask_size, uint32_t cmd)
> +{
> +	uint64_t seqno;
> +
> +	igt_assert_eq(__eu_ctl(debugfd, client, exec_queue, lrc, bitmask,
> +			       bitmask_size, cmd, &seqno), 0);
> +
> +	return seqno;
> +}
> +
> +static bool intel_gen_needs_resume_wa(int fd)
> +{
> +	const uint32_t id = intel_get_drm_devid(fd);
> +
> +	return intel_gen(id) == 12 && intel_graphics_ver(id) < IP_VER(12, 55);
> +}
> +
> +static uint64_t eu_ctl_resume(int fd, int debugfd, uint64_t client,
> +			      uint64_t exec_queue, uint64_t lrc,
> +			      uint8_t *bitmask, uint32_t bitmask_size)
> +{
> +	int i;
> +
> +	/*  Wa_14011332042 */
> +	if (intel_gen_needs_resume_wa(fd)) {
> +		uint32_t *att_reg_half = (uint32_t *)bitmask;
> +
> +		for (i = 0; i < bitmask_size / sizeof(uint32_t); i += 2) {
> +			att_reg_half[i] |= att_reg_half[i + 1];
> +			att_reg_half[i + 1] |= att_reg_half[i];
> +		}
> +	}
> +
> +	return eu_ctl(debugfd, client, exec_queue, lrc, bitmask, &bitmask_size,
> +		      DRM_XE_EUDEBUG_EU_CONTROL_CMD_RESUME);
> +}
> +
> +static inline uint64_t eu_ctl_stopped(int debugfd, uint64_t client,
> +				      uint64_t exec_queue, uint64_t lrc,
> +				      uint8_t *bitmask, uint32_t *bitmask_size)
> +{
> +	return eu_ctl(debugfd, client, exec_queue, lrc, bitmask, bitmask_size,
> +		      DRM_XE_EUDEBUG_EU_CONTROL_CMD_STOPPED);
> +}
> +
> +static inline uint64_t eu_ctl_interrupt_all(int debugfd, uint64_t client,
> +					    uint64_t exec_queue, uint64_t lrc)
> +{
> +	return eu_ctl(debugfd, client, exec_queue, lrc, NULL, 0,
> +		      DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL);
> +}
> +
> +struct online_debug_data {
> +	pthread_mutex_t mutex;
> +	/* client in */
> +	struct drm_xe_engine_class_instance hwe;
> +	/* client out */
> +	int threads_count;
> +	/* debugger internals */
> +	uint64_t client_handle;
> +	uint64_t exec_queue_handle;
> +	uint64_t lrc_handle;
> +	uint64_t target_offset;
> +	size_t target_size;
> +	uint64_t bb_offset;
> +	size_t bb_size;
> +	int vm_fd;
> +	uint32_t first_aip;
> +	uint64_t *aips_offset_table;
> +	uint32_t steps_done;
> +	uint8_t *single_step_bitmask;
> +	int stepped_threads_count;
> +	struct timespec exception_arrived;
> +	int last_eu_control_seqno;
> +	struct drm_xe_eudebug_event *exception_event;
> +};
> +
> +static struct online_debug_data *
> +online_debug_data_create(struct drm_xe_engine_class_instance *hwe)
> +{
> +	struct online_debug_data *data;
> +
> +	data = mmap(0, ALIGN(sizeof(*data), PAGE_SIZE),
> +		    PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);

Check is data valid pointer.

> +	memcpy(&data->hwe, hwe, sizeof(*hwe));
> +	pthread_mutex_init(&data->mutex, NULL);
> +	data->client_handle = -1ULL;
> +	data->exec_queue_handle = -1ULL;
> +	data->lrc_handle = -1ULL;
> +	data->vm_fd = -1;
> +	data->stepped_threads_count = -1;
> +
> +	return data;
> +}
> +
> +static void online_debug_data_destroy(struct online_debug_data *data)
> +{
> +	free(data->aips_offset_table);
> +	munmap(data, ALIGN(sizeof(*data), PAGE_SIZE));
> +}
> +
> +static void eu_attention_debug_trigger(struct xe_eudebug_debugger *d,
> +				       struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *att = (void *)e;
> +	uint32_t *ptr = (uint32_t *)att->bitmask;
> +
> +	igt_debug("EVENT[%llu] eu-attenttion; threads=%d "
> +		 "client[%llu], exec_queue[%llu], lrc[%llu], bitmask_size[%d]\n",
> +		 att->base.seqno, count_set_bits(att->bitmask, att->bitmask_size),
> +				att->client_handle, att->exec_queue_handle,
> +				att->lrc_handle, att->bitmask_size);
> +
> +	for (uint32_t i = 0; i < att->bitmask_size / 4; i += 2)
> +		igt_debug("bitmask[%d] = 0x%08x%08x\n", i / 2, ptr[i], ptr[i + 1]);
> +}
> +
> +static void eu_attention_reset_trigger(struct xe_eudebug_debugger *d,
> +				       struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *att = (void *)e;
> +	uint32_t *ptr = (uint32_t *)att->bitmask;
> +	struct online_debug_data *data = d->ptr;
> +
> +	igt_debug("EVENT[%llu] eu-attention with reset; threads=%d "
> +		 "client[%llu], exec_queue[%llu], lrc[%llu], bitmask_size[%d]\n",
> +		 att->base.seqno, count_set_bits(att->bitmask, att->bitmask_size),
> +				att->client_handle, att->exec_queue_handle,
> +				att->lrc_handle, att->bitmask_size);
> +
> +	for (uint32_t i = 0; i < att->bitmask_size / 4; i += 2)
> +		igt_debug("bitmask[%d] = 0x%08x%08x\n", i / 2, ptr[i], ptr[i + 1]);
> +
> +	xe_force_gt_reset_async(d->master_fd, data->hwe.gt_id);
> +}
> +
> +static void copy_first_bit(uint8_t *dst, uint8_t *src, int size)
> +{
> +	bool found = false;
> +	int i, j;
> +
> +	for (i = 0; i < size; i++) {
> +		if (found) {
> +			dst[i] = 0;

Function is static, but according to line above I would add some
comment that it is cleaning dst buffer. copy_first_bit() is misleading
as you mean first bit set. First bit is src[0] & 1.

And what 'first' means? Having lets say src = { 0x0, 0xff, 0xcc, 0xaa }
I would expect first should be most significant bit of 0xff.

> +		} else {
> +			uint32_t tmp = src[i]; /* in case dst == src */
> +
> +			for (j = 0; j < 8; j++) {

ffs()? But according to copy copy_nth_bit() I've doubts shouldn't this
be fls()?

> +				dst[i] = tmp & (1 << j);
> +				if (dst[i]) {
> +					found = true;
> +					break;
> +				}
> +			}
> +		}
> +	}
> +}
> +
> +static void copy_nth_bit(uint8_t *dst, uint8_t *src, int size, int n)
> +{
> +	int count = 0;
> +
> +	for (int i = 0; i < size; i++) {
> +		uint32_t tmp = src[i];
> +
> +		for (int j = 7; j >= 0; j--) {

I'm confused. In above function you iterate starting from least
significant bit, here you start from most significant bit.
Same concern about function name - shouldn't this be copy_nth_bit_set()?

> +			if (tmp & (1 << j)) {
> +				count++;
> +				if (count == n)
> +					dst[i] |= (1 << j);
> +				else
> +					dst[i] &= ~(1 << j);

Do I understand correctly that you are clearing other bits in dst?
It's extremely weird calling function copy_nth_bit() where it scans
for n-th bit set, zeroing other bits in dst. Or I just don't understand
logic behind this decision.

> +			} else {
> +				dst[i] &= ~(1 << j);
> +			}
> +		}
> +	}
> +}
> +
> +/*
> + * Searches for the first instruction. It stands on assumption,
> + * that shader kernel is placed before sip within the bb.
> + */
> +static uint32_t find_kernel_in_bb(struct gpgpu_shader *kernel,
> +				  struct online_debug_data *data)
> +{
> +	uint32_t *p = kernel->code;
> +	size_t sz = 4 * sizeof(uint32_t);
> +	uint32_t buf[4];
> +	int i;
> +
> +	for (i = 0; i < data->bb_size; i += sz) {
> +		igt_assert_eq(pread(data->vm_fd, &buf, sz, data->bb_offset + i), sz);
> +
> +

Unnecessary blank line.

> +		if (memcmp(p, buf, sz) == 0)
> +			break;
> +	}

Isn't simpler to pread whole bb then use memmem()? Unless you want
to exercise pread() with different offsets as well.

> +
> +	igt_assert(i < data->bb_size);
> +
> +	return i;
> +}
> +
> +static void set_breakpoint_once(struct xe_eudebug_debugger *d,
> +				struct online_debug_data *data)
> +{
> +	const uint32_t breakpoint_bit = 1 << 30;
> +	size_t sz = sizeof(uint32_t);
> +	struct gpgpu_shader *kernel;
> +	uint32_t aip;
> +
> +	kernel = get_shader(d->master_fd, d->flags);
> +
> +	if (data->first_aip) {
> +		uint32_t expected = find_kernel_in_bb(kernel, data) + kernel->size * 4 - 0x10;
> +
> +		igt_assert_eq(pread(data->vm_fd, &aip, sz, data->target_offset), sz);
> +		igt_assert_eq_u32(aip, expected);

I've checked how this is used, because it just compares aip, and it
seems it is called second time for validating is target offset contains
stored aip. Shouldn't this be in separate function like check_aip() or
whatever but not in set_breakpoint_once()?

> +	} else {
> +		uint32_t instr_usdw;
> +
> +		igt_assert(data->vm_fd != -1);
> +		igt_assert(data->target_size != 0);
> +		igt_assert(data->bb_size != 0);
> +
> +		igt_assert_eq(pread(data->vm_fd, &aip, sz, data->target_offset), sz);
> +		data->first_aip = aip;
> +
> +		aip = find_kernel_in_bb(kernel, data);
> +
> +		/* set breakpoint on last instruction */
> +		aip += kernel->size * 4 - 0x10;
> +		igt_assert_eq(pread(data->vm_fd, &instr_usdw, sz,
> +				    data->bb_offset + aip), sz);
> +		instr_usdw |= breakpoint_bit;
> +		igt_assert_eq(pwrite(data->vm_fd, &instr_usdw, sz,
> +				     data->bb_offset + aip), sz);
> +
> +	}
> +
> +	gpgpu_shader_destroy(kernel);
> +}
> +
> +static void get_aips_offset_table(struct online_debug_data *data, int threads)
> +{
> +	size_t sz = sizeof(uint32_t);
> +	uint32_t aip;
> +	uint32_t first_aip;
> +	int table_index = 0;
> +
> +	if (data->aips_offset_table)
> +		return;
> +
> +	data->aips_offset_table = malloc(threads * sizeof(uint64_t));
> +	igt_assert(data->aips_offset_table);
> +
> +	igt_assert_eq(pread(data->vm_fd, &first_aip, sz, data->target_offset), sz);
> +	data->first_aip = first_aip;
> +	data->aips_offset_table[table_index++] = 0;
> +
> +	fsync(data->vm_fd);
> +	for (int i = sz; i < data->target_size; i += sz) {
> +		igt_assert_eq(pread(data->vm_fd, &aip, sz, data->target_offset + i), sz);
> +		if (aip == first_aip)
> +			data->aips_offset_table[table_index++] = i;
> +	}
> +
> +	igt_assert_eq(threads, table_index);
> +
> +	igt_debug("AIPs offset table:\n");
> +	for (int i = 0; i < threads; i++)
> +		igt_debug("%lx\n", data->aips_offset_table[i]);
> +}
> +
> +static int get_stepped_threads_count(struct online_debug_data *data, int threads)
> +{
> +	int count = 0;
> +	size_t sz = sizeof(uint32_t);
> +	uint32_t aip;
> +
> +	fsync(data->vm_fd);
> +	for (int i = 0; i < threads; i++) {
> +		igt_assert_eq(pread(data->vm_fd, &aip, sz,
> +				    data->target_offset + data->aips_offset_table[i]), sz);
> +		if (aip != data->first_aip) {
> +			igt_assert(aip == data->first_aip + 0x10);
> +			count++;
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +static void save_first_exception_trigger(struct xe_eudebug_debugger *d,
> +					 struct drm_xe_eudebug_event *e)
> +{
> +	struct online_debug_data *data = d->ptr;
> +
> +	pthread_mutex_lock(&data->mutex);
> +	if (!data->exception_event) {
> +		igt_gettime(&data->exception_arrived);
> +		data->exception_event = igt_memdup(e, e->len);
> +	}
> +	pthread_mutex_unlock(&data->mutex);
> +}
> +
> +#define MAX_PREEMPT_TIMEOUT 10ull
> +static void eu_attention_resume_trigger(struct xe_eudebug_debugger *d,
> +					struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *att = (void *) e;
> +	struct online_debug_data *data = d->ptr;
> +	uint32_t bitmask_size = att->bitmask_size;
> +	uint8_t *bitmask;
> +	int i;
> +
> +	if (data->last_eu_control_seqno > att->base.seqno)
> +		return;
> +
> +	bitmask = calloc(1, att->bitmask_size);
> +
> +	eu_ctl_stopped(d->fd, att->client_handle, att->exec_queue_handle,
> +		       att->lrc_handle, bitmask, &bitmask_size);
> +	igt_assert(bitmask_size == att->bitmask_size);
> +	igt_assert(memcmp(bitmask, att->bitmask, att->bitmask_size) == 0);
> +
> +	pthread_mutex_lock(&data->mutex);
> +	if (igt_nsec_elapsed(&data->exception_arrived) < (MAX_PREEMPT_TIMEOUT + 1) * NSEC_PER_SEC &&
> +	    d->flags & TRIGGER_RESUME_DELAYED) {
> +		pthread_mutex_unlock(&data->mutex);
> +		free(bitmask);
> +		return;
> +	} else if (d->flags & TRIGGER_RESUME_ONE) {
> +		copy_first_bit(bitmask, bitmask, bitmask_size);
> +	} else if (d->flags & TRIGGER_RESUME_DSS) {
> +		uint64_t *event = (uint64_t *)att->bitmask;
> +		uint64_t *resume = (uint64_t *)bitmask;
> +
> +		memset(bitmask, 0, bitmask_size);
> +		for (i = 0; i < att->bitmask_size / sizeof(uint64_t); i++) {
> +			if (!event[i])
> +				continue;
> +
> +			resume[i] = event[i];
> +			break;
> +		}
> +	} else if (d->flags & TRIGGER_RESUME_SET_BP) {
> +		set_breakpoint_once(d, data);
> +	}
> +
> +	if (d->flags & SHADER_LOOP) {
> +		uint32_t threads = get_number_of_threads(d->flags);
> +		uint32_t val = STEERING_END_LOOP;
> +
> +		igt_assert_eq(pwrite(data->vm_fd, &val, sizeof(uint32_t),
> +				     data->target_offset + steering_offset(threads)),
> +			      sizeof(uint32_t));
> +		fsync(data->vm_fd);
> +	}
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	data->last_eu_control_seqno = eu_ctl_resume(d->master_fd, d->fd, att->client_handle,
> +						    att->exec_queue_handle, att->lrc_handle,
> +						    bitmask, att->bitmask_size);
> +
> +	free(bitmask);
> +}
> +
> +static void eu_attention_resume_single_step_trigger(struct xe_eudebug_debugger *d,
> +						    struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *att = (void *) e;
> +	struct online_debug_data *data = d->ptr;
> +	const int threads = get_number_of_threads(d->flags);
> +	uint32_t val;
> +	size_t sz = sizeof(uint32_t);
> +
> +	get_aips_offset_table(data, threads);
> +
> +	if (d->flags & TRIGGER_RESUME_PARALLEL_WALK) {
> +		if (data->stepped_threads_count != -1)
> +			if (data->steps_done < SINGLE_STEP_COUNT) {
> +				int stepped_threads_count_after_resume =
> +						get_stepped_threads_count(data, threads);
> +				igt_debug("Stepped threads after: %d\n",
> +					  stepped_threads_count_after_resume);
> +
> +				if (stepped_threads_count_after_resume == threads) {
> +					data->first_aip += 0x10;
> +					data->steps_done++;
> +				}
> +
> +				igt_debug("Shader steps: %d\n", data->steps_done);
> +				igt_assert(data->stepped_threads_count == 0);
> +				igt_assert(stepped_threads_count_after_resume == threads);
> +			}
> +
> +		if (data->steps_done < SINGLE_STEP_COUNT) {
> +			data->stepped_threads_count = get_stepped_threads_count(data, threads);
> +			igt_debug("Stepped threads before: %d\n", data->stepped_threads_count);
> +		}
> +
> +		val = data->steps_done < SINGLE_STEP_COUNT ? STEERING_SINGLE_STEP :
> +							     STEERING_CONTINUE;
> +	} else if (d->flags & TRIGGER_RESUME_SINGLE_WALK) {
> +		if (data->stepped_threads_count != -1)
> +			if (data->steps_done < 2) {
> +				int stepped_threads_count_after_resume =
> +						get_stepped_threads_count(data, threads);
> +				igt_debug("Stepped threads after: %d\n",
> +					  stepped_threads_count_after_resume);
> +
> +				if (stepped_threads_count_after_resume == threads) {
> +					data->first_aip += 0x10;
> +					data->steps_done++;
> +					free(data->single_step_bitmask);
> +					data->single_step_bitmask = 0;
> +				}
> +
> +				igt_debug("Shader steps: %d\n", data->steps_done);
> +				igt_assert(data->stepped_threads_count +
> +					   (intel_gen_needs_resume_wa(d->master_fd) ? 2 : 1) ==
> +					   stepped_threads_count_after_resume);
> +			}
> +
> +		if (data->steps_done < 2) {
> +			data->stepped_threads_count = get_stepped_threads_count(data, threads);
> +			igt_debug("Stepped threads before: %d\n", data->stepped_threads_count);
> +			if (intel_gen_needs_resume_wa(d->master_fd)) {
> +				if (!data->single_step_bitmask) {
> +					data->single_step_bitmask = malloc(att->bitmask_size *
> +									   sizeof(uint8_t));
> +					igt_assert(data->single_step_bitmask);
> +					memcpy(data->single_step_bitmask, att->bitmask,
> +					       att->bitmask_size);
> +				}
> +
> +				copy_first_bit(att->bitmask, data->single_step_bitmask,
> +					       att->bitmask_size);
> +			} else
> +				copy_nth_bit(att->bitmask, att->bitmask, att->bitmask_size,
> +					     data->stepped_threads_count + 1);
> +		}
> +
> +		val = data->steps_done < 2 ? STEERING_SINGLE_STEP : STEERING_CONTINUE;
> +	}
> +
> +	igt_assert_eq(pwrite(data->vm_fd, &val, sz,
> +			     data->target_offset + steering_offset(threads)), sz);
> +	fsync(data->vm_fd);
> +
> +	eu_ctl_resume(d->master_fd, d->fd, att->client_handle,
> +		      att->exec_queue_handle, att->lrc_handle,
> +		      att->bitmask, att->bitmask_size);
> +
> +	if (data->single_step_bitmask)
> +		for (int i = 0; i < att->bitmask_size; i++)
> +			data->single_step_bitmask[i] &= ~att->bitmask[i];
> +}
> +
> +static void open_trigger(struct xe_eudebug_debugger *d,
> +			 struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_client *client = (void *)e;
> +	struct online_debug_data *data = d->ptr;
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_DESTROY)
> +		return;
> +
> +	pthread_mutex_lock(&data->mutex);
> +	data->client_handle = client->client_handle;
> +	pthread_mutex_unlock(&data->mutex);
> +}
> +
> +static void exec_queue_trigger(struct xe_eudebug_debugger *d,
> +			       struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_exec_queue *eq = (void *)e;
> +	struct online_debug_data *data = d->ptr;
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_DESTROY)
> +		return;
> +
> +	pthread_mutex_lock(&data->mutex);
> +	data->exec_queue_handle = eq->exec_queue_handle;
> +	data->lrc_handle = eq->lrc_handle[0];
> +	pthread_mutex_unlock(&data->mutex);
> +}
> +
> +static void vm_open_trigger(struct xe_eudebug_debugger *d,
> +			    struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_vm *vm = (void *)e;
> +	struct online_debug_data *data = d->ptr;
> +	struct drm_xe_eudebug_vm_open vo = {
> +		.client_handle = vm->client_handle,
> +		.vm_handle = vm->vm_handle,
> +	};
> +	int fd;
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_CREATE) {
> +		fd = igt_ioctl(d->fd, DRM_XE_EUDEBUG_IOCTL_VM_OPEN, &vo);
> +		igt_assert_lte(0, fd);
> +
> +		pthread_mutex_lock(&data->mutex);
> +		igt_assert(data->vm_fd == -1);
> +		data->vm_fd = fd;
> +		pthread_mutex_unlock(&data->mutex);
> +		return;
> +	}
> +
> +	pthread_mutex_lock(&data->mutex);
> +	close(data->vm_fd);
> +	data->vm_fd = -1;
> +	pthread_mutex_unlock(&data->mutex);
> +}
> +
> +static void read_metadata(struct xe_eudebug_debugger *d,
> +			  uint64_t client_handle,
> +			  uint64_t metadata_handle,
> +			  uint64_t type,
> +			  uint64_t len)
> +{
> +	struct drm_xe_eudebug_read_metadata rm = {
> +		.client_handle = client_handle,
> +		.metadata_handle = metadata_handle,
> +		.size = len,
> +	};
> +	struct online_debug_data *data = d->ptr;
> +	uint64_t *metadata;
> +
> +	metadata = malloc(len);
> +	igt_assert(metadata);
> +
> +	rm.ptr = to_user_pointer(metadata);
> +	igt_assert_eq(igt_ioctl(d->fd, DRM_XE_EUDEBUG_IOCTL_READ_METADATA, &rm), 0);
> +
> +	pthread_mutex_lock(&data->mutex);
> +	switch (type) {
> +	case DRM_XE_DEBUG_METADATA_ELF_BINARY:
> +		data->bb_offset = metadata[0];
> +		data->bb_size = metadata[1];
> +		break;
> +	case DRM_XE_DEBUG_METADATA_PROGRAM_MODULE:
> +		data->target_offset = metadata[0];
> +		data->target_size = metadata[1];
> +		break;
> +	default:
> +		break;
> +	}
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	free(metadata);
> +}
> +
> +static void create_metadata_trigger(struct xe_eudebug_debugger *d, struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_metadata *em = (void *)e;
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_CREATE)
> +		read_metadata(d, em->client_handle, em->metadata_handle, em->type, em->len);
> +}
> +
> +static void overwrite_immediate_value_in_common_target_write(int vm_fd, uint64_t offset,
> +							     uint32_t old_val, uint32_t new_val)
> +{
> +	uint64_t addr = offset;
> +	int vals_changed = 0;
> +	uint32_t val;
> +
> +	while (vals_changed < 4) {
> +		igt_assert_eq(pread(vm_fd, &val, sizeof(uint32_t), addr), sizeof(uint32_t));
> +		if (val == old_val) {
> +			igt_debug("val_before_write[%d]: %08x\n", vals_changed, val);
> +			igt_assert_eq(pwrite(vm_fd, &new_val, sizeof(uint32_t), addr),
> +				      sizeof(uint32_t));
> +			igt_assert_eq(pread(vm_fd, &val, sizeof(uint32_t), addr),
> +				      sizeof(uint32_t));
> +			igt_debug("val_before_fsync[%d]: %08x\n", vals_changed, val);
> +			fsync(vm_fd);
> +			igt_assert_eq(pread(vm_fd, &val, sizeof(uint32_t), addr),
> +				      sizeof(uint32_t));
> +			igt_debug("val_after_fsync[%d]: %08x\n", vals_changed, val);
> +			igt_assert_eq_u32(val, new_val);
> +			vals_changed++;
> +		}
> +		addr += sizeof(uint32_t);
> +	}
> +}
> +
> +static void eu_attention_resume_caching_trigger(struct xe_eudebug_debugger *d,
> +						struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *att = (void *)e;
> +	struct online_debug_data *data = d->ptr;
> +	static int counter;
> +	static int kernel_in_bb;

Reusing this function (currently it is used once) may be error prone.
Shouldn't this be put in debugger private data?

> +	struct dim_t s_dim = surface_dimensions(get_number_of_threads(d->flags));
> +	int val;
> +	uint32_t instr_usdw;
> +	struct gpgpu_shader *kernel;
> +	const uint32_t breakpoint_bit = 1 << 30;
> +	struct gpgpu_shader *shader_preamble;
> +	struct gpgpu_shader *shader_write_instr;
> +
> +	shader_preamble = gpgpu_shader_create(d->master_fd);
> +	gpgpu_shader__write_dword(shader_preamble, SHADER_CANARY, 0);
> +	gpgpu_shader__nop(shader_preamble);
> +	gpgpu_shader__breakpoint(shader_preamble);
> +
> +	shader_write_instr = gpgpu_shader_create(d->master_fd);
> +	gpgpu_shader__common_target_write_u32(shader_write_instr, 0, 0);
> +
> +	if (!kernel_in_bb) {
> +		kernel = get_shader(d->master_fd, d->flags);
> +		kernel_in_bb = find_kernel_in_bb(kernel, data);
> +		gpgpu_shader_destroy(kernel);
> +	}
> +
> +	/* set breakpoint on next write instruction */
> +	if (counter < caching_get_instruction_count(d->master_fd, s_dim.x, d->flags)) {
> +		igt_assert_eq(pread(data->vm_fd, &instr_usdw, sizeof(instr_usdw),
> +				    data->bb_offset + kernel_in_bb + shader_preamble->size * 4 +
> +				    shader_write_instr->size * 4 * counter), sizeof(instr_usdw));
> +		instr_usdw |= breakpoint_bit;
> +		igt_assert_eq(pwrite(data->vm_fd, &instr_usdw, sizeof(instr_usdw),
> +				     data->bb_offset + kernel_in_bb + shader_preamble->size * 4 +
> +				     shader_write_instr->size * 4 * counter), sizeof(instr_usdw));
> +		fsync(data->vm_fd);
> +	}
> +
> +	/* restore current instruction */
> +	if (counter && counter <= caching_get_instruction_count(d->master_fd, s_dim.x, d->flags))
> +		overwrite_immediate_value_in_common_target_write(data->vm_fd,
> +								 data->bb_offset + kernel_in_bb +
> +								 shader_preamble->size * 4 +
> +								 shader_write_instr->size * 4 * (counter - 1),
> +								 CACHING_POISON_VALUE,
> +								 CACHING_VALUE(counter - 1));
> +
> +	/* poison next instruction */
> +	if (counter < caching_get_instruction_count(d->master_fd, s_dim.x, d->flags))
> +		overwrite_immediate_value_in_common_target_write(data->vm_fd,
> +								 data->bb_offset + kernel_in_bb +
> +								 shader_preamble->size * 4 +
> +								 shader_write_instr->size * 4 * counter,
> +								 CACHING_VALUE(counter),
> +								 CACHING_POISON_VALUE);
> +
> +	gpgpu_shader_destroy(shader_write_instr);
> +	gpgpu_shader_destroy(shader_preamble);
> +
> +	for (int i = 0; i < data->target_size; i += sizeof(uint32_t)) {
> +		igt_assert_eq(pread(data->vm_fd, &val, sizeof(val), data->target_offset + i),
> +			      sizeof(val));
> +		igt_assert_f(val != CACHING_POISON_VALUE, "Poison value found at %04d!\n", i);
> +	}
> +
> +	eu_ctl_resume(d->master_fd, d->fd, att->client_handle,
> +		      att->exec_queue_handle, att->lrc_handle,
> +		      att->bitmask, att->bitmask_size);
> +
> +	counter++;
> +}
> +
> +static struct intel_bb *xe_bb_create_on_offset(int fd, uint32_t exec_queue, uint32_t vm,
> +					       uint64_t offset, uint32_t size)
> +{
> +	struct intel_bb *ibb;
> +
> +	ibb = intel_bb_create_with_context(fd, exec_queue, vm, NULL, size);
> +
> +	/* update intel bb offset */
> +	intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset, ibb->size);
> +	intel_bb_add_object(ibb, ibb->handle, ibb->size, offset, ibb->alignment, false);
> +	ibb->batch_offset = offset;
> +
> +	return ibb;
> +}
> +
> +static size_t get_bb_size(int flags)
> +{
> +	if ((flags & SHADER_CACHING_SRAM) || (flags & SHADER_CACHING_VRAM))
> +		return 32768;
> +
> +	return 4096;
> +}
> +
> +static void run_online_client(struct xe_eudebug_client *c)
> +{
> +	int threads = get_number_of_threads(c->flags);
> +	const uint64_t target_offset = 0x1a000000;
> +	const uint64_t bb_offset = 0x1b000000;
> +	const size_t bb_size = get_bb_size(c->flags);
> +	struct online_debug_data *data = c->ptr;
> +	struct drm_xe_engine_class_instance hwe = data->hwe;
> +	struct drm_xe_ext_set_property ext = {
> +		.base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY,
> +		.property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_EUDEBUG,
> +		.value = DRM_XE_EXEC_QUEUE_EUDEBUG_FLAG_ENABLE,
> +	};
> +	struct drm_xe_exec_queue_create create = {
> +		.instances = to_user_pointer(&hwe),
> +		.width = 1,
> +		.num_placements = 1,
> +		.extensions = c->flags & DISABLE_DEBUG_MODE ? 0 : to_user_pointer(&ext)
> +	};
> +	struct dim_t w_dim = walker_dimensions(threads);
> +	struct dim_t s_dim = surface_dimensions(threads);
> +	struct timespec ts = { };
> +	struct gpgpu_shader *sip, *shader;
> +	uint32_t metadata_id[2];
> +	uint64_t *metadata[2];
> +	struct intel_bb *ibb;
> +	struct intel_buf *buf;
> +	uint32_t *ptr;
> +	int fd;
> +
> +	metadata[0] = calloc(2, sizeof(*metadata));
> +	metadata[1] = calloc(2, sizeof(*metadata));
> +	igt_assert(metadata[0]);
> +	igt_assert(metadata[1]);
> +
> +	fd = xe_eudebug_client_open_driver(c);
> +	xe_device_get(fd);

Not necessary.

> +
> +	/* Additional memory for steering control */
> +	if (c->flags & SHADER_LOOP || c->flags & SHADER_SINGLE_STEP)
> +		s_dim.y++;
> +	/* Additional memory for caching check */
> +	if ((c->flags & SHADER_CACHING_SRAM) || (c->flags & SHADER_CACHING_VRAM))
> +		s_dim.y += caching_get_instruction_count(fd, s_dim.x, c->flags);
> +	buf = create_uc_buf(fd, s_dim.x, s_dim.y);
> +
> +	buf->addr.offset = target_offset;
> +
> +	metadata[0][0] = bb_offset;
> +	metadata[0][1] = bb_size;
> +	metadata[1][0] = target_offset;
> +	metadata[1][1] = buf->size;
> +	metadata_id[0] = xe_eudebug_client_metadata_create(c, fd, DRM_XE_DEBUG_METADATA_ELF_BINARY,
> +							   2 * sizeof(*metadata), metadata[0]);
> +	metadata_id[1] = xe_eudebug_client_metadata_create(c, fd,
> +							   DRM_XE_DEBUG_METADATA_PROGRAM_MODULE,
> +							   2 * sizeof(*metadata), metadata[1]);
> +
> +	create.vm_id = xe_eudebug_client_vm_create(c, fd, DRM_XE_VM_CREATE_FLAG_LR_MODE, 0);
> +	xe_eudebug_client_exec_queue_create(c, fd, &create);
> +
> +	ibb = xe_bb_create_on_offset(fd, create.exec_queue_id, create.vm_id,
> +				     bb_offset, bb_size);
> +	intel_bb_set_lr_mode(ibb, true);
> +
> +	sip = get_sip(fd, c->flags);
> +	shader = get_shader(fd, c->flags);
> +
> +	igt_nsec_elapsed(&ts);
> +	gpgpu_shader_exec(ibb, buf, w_dim.x, w_dim.y, shader, sip, 0, 0);
> +
> +	gpgpu_shader_destroy(sip);
> +	gpgpu_shader_destroy(shader);
> +
> +	intel_bb_sync(ibb);
> +
> +	if (c->flags & TRIGGER_RECONNECT)
> +		xe_eudebug_client_wait_stage(c, DEBUGGER_REATTACHED);
> +	else
> +		/* Make sure it wasn't the timeout. */
> +		igt_assert(igt_nsec_elapsed(&ts) < XE_EUDEBUG_DEFAULT_TIMEOUT_SEC * NSEC_PER_SEC);
> +
> +	if (!(c->flags & DO_NOT_EXPECT_CANARIES)) {
> +		ptr = xe_bo_mmap_ext(fd, buf->handle, buf->size, PROT_READ);
> +		data->threads_count = count_canaries_neq(ptr, w_dim, 0);
> +		igt_assert_f(data->threads_count, "No canaries found, nothing executed?\n");
> +
> +		if ((c->flags & SHADER_BREAKPOINT || c->flags & TRIGGER_RESUME_SET_BP ||
> +		     c->flags & SHADER_N_NOOP_BREAKPOINT) && !(c->flags & DISABLE_DEBUG_MODE)) {
> +			uint32_t aip = ptr[0];
> +
> +			igt_assert_f(aip != SHADER_CANARY,
> +				     "Workload executed but breakpoint not hit!\n");
> +			igt_assert_eq(count_canaries_eq(ptr, w_dim, aip), data->threads_count);
> +			igt_debug("Breakpoint hit in %d threads, AIP=0x%08x\n", data->threads_count,
> +				  aip);
> +		}
> +
> +		munmap(ptr, buf->size);
> +	}
> +
> +	intel_bb_destroy(ibb);
> +
> +	xe_eudebug_client_exec_queue_destroy(c, fd, &create);
> +	xe_eudebug_client_vm_destroy(c, fd,  create.vm_id);
> +
> +	xe_eudebug_client_metadata_destroy(c, fd, metadata_id[0], DRM_XE_DEBUG_METADATA_ELF_BINARY,
> +					   2 * sizeof(*metadata));
> +	xe_eudebug_client_metadata_destroy(c, fd, metadata_id[1],
> +					   DRM_XE_DEBUG_METADATA_PROGRAM_MODULE,
> +					   2 * sizeof(*metadata));
> +
> +	xe_device_put(fd);

Same.

> +	xe_eudebug_client_close_driver(c, fd);
> +}
> +
> +static bool intel_gen_has_lockstep_eus(int fd)
> +{
> +	const uint32_t id = intel_get_drm_devid(fd);
> +
> +	/*
> +	 * Lockstep (or in some parlance, fused) EUs are pair of EUs
> +	 * that work in sync, supposedly same clock and same control flow.
> +	 * Thus for attentions, if the control has breakpoint, both will be
> +	 * excepted into SIP. In this level, the hardware has only one attention
> +	 * thread bit for units. PVC is the first one without lockstepping.
> +	 */
> +	return !(intel_graphics_ver(id) == IP_VER(12, 60) || intel_gen(id) >= 20);
> +}
> +
> +static int query_attention_bitmask_size(int fd, int gt)
> +{
> +	const unsigned int threads = 8;
> +	struct drm_xe_query_topology_mask *c_dss = NULL, *g_dss = NULL, *eu_per_dss = NULL;
> +	struct drm_xe_query_topology_mask *topology;
> +	struct drm_xe_device_query query = {
> +		.extensions = 0,
> +		.query = DRM_XE_DEVICE_QUERY_GT_TOPOLOGY,
> +		.size = 0,
> +		.data = 0,
> +	};
> +	int pos = 0, eus;
> +	uint8_t *any_dss;
> +
> +	igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> +	igt_assert_neq(query.size, 0);
> +
> +	topology = malloc(query.size);
> +	igt_assert(topology);
> +
> +	query.data = to_user_pointer(topology);
> +	igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> +
> +	while (query.size >= sizeof(struct drm_xe_query_topology_mask)) {
> +		struct drm_xe_query_topology_mask *topo;
> +		int sz;
> +
> +		topo = (struct drm_xe_query_topology_mask *)((unsigned char *)topology + pos);
> +		sz = sizeof(struct drm_xe_query_topology_mask) + topo->num_bytes;
> +
> +		query.size -= sz;
> +		pos += sz;
> +
> +		if (topo->gt_id != gt)
> +			continue;
> +
> +		if (topo->type == DRM_XE_TOPO_DSS_GEOMETRY)
> +			g_dss = topo;
> +		else if (topo->type == DRM_XE_TOPO_DSS_COMPUTE)
> +			c_dss = topo;
> +		else if (topo->type == DRM_XE_TOPO_EU_PER_DSS ||
> +			 topo->type == DRM_XE_TOPO_SIMD16_EU_PER_DSS)
> +			eu_per_dss = topo;
> +	}
> +
> +	igt_assert(g_dss && c_dss && eu_per_dss);
> +	igt_assert_eq_u32(c_dss->num_bytes, g_dss->num_bytes);
> +
> +	any_dss = malloc(c_dss->num_bytes);

Assert if NULL.

> +
> +	for (int i = 0; i < c_dss->num_bytes; i++)
> +		any_dss[i] = c_dss->mask[i] | g_dss->mask[i];
> +
> +	eus = count_set_bits(any_dss, c_dss->num_bytes);
> +	eus *= count_set_bits(eu_per_dss->mask, eu_per_dss->num_bytes);
> +
> +	if (intel_gen_has_lockstep_eus(fd))
> +		eus /= 2;
> +
> +	free(any_dss);
> +	free(topology);
> +
> +	return eus * threads / 8;
> +}
> +
> +static struct drm_xe_eudebug_event_exec_queue *
> +match_attention_with_exec_queue(struct xe_eudebug_event_log *log,
> +				struct drm_xe_eudebug_event_eu_attention *ea)
> +{
> +	struct drm_xe_eudebug_event_exec_queue *ee;
> +	struct drm_xe_eudebug_event *event = NULL, *current = NULL, *matching_destroy = NULL;
> +	int lrc_idx;
> +
> +	xe_eudebug_for_each_event(event, log) {
> +		if (event->type == DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE &&
> +		    event->flags == DRM_XE_EUDEBUG_EVENT_CREATE) {
> +			ee = (struct drm_xe_eudebug_event_exec_queue *)event;
> +
> +			if (ee->exec_queue_handle != ea->exec_queue_handle)
> +				continue;
> +
> +			if (ee->client_handle != ea->client_handle)
> +				continue;
> +
> +			for (lrc_idx = 0; lrc_idx < ee->width; lrc_idx++) {
> +				if (ee->lrc_handle[lrc_idx] == ea->lrc_handle)
> +					break;
> +			}
> +
> +			if (lrc_idx >= ee->width) {
> +				igt_debug("No matching lrc handle within matching exec_queue!");
> +				continue;
> +			}
> +
> +			/* event logs are sorted, every found next would not be present. */
> +			if (ea->base.seqno < ee->base.seqno)
> +				break;
> +
> +			/* sanity check whether attention did
> +			 * not appear yet on already destroyed exec_queue
> +			 */
> +			current = event;
> +			xe_eudebug_for_each_event(current, log) {
> +				if (current->type == DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE &&
> +				    current->flags == DRM_XE_EUDEBUG_EVENT_DESTROY) {
> +					uint8_t offset = sizeof(struct drm_xe_eudebug_event);
> +
> +					if (memcmp((uint8_t *)current + offset,
> +						   (uint8_t *)event + offset,
> +						   current->len - offset) == 0) {
> +						matching_destroy = current;
> +					}
> +				}
> +			}
> +
> +			if (!matching_destroy || ea->base.seqno > matching_destroy->seqno)
> +				continue;
> +
> +			return ee;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +static void online_session_check(struct xe_eudebug_session *s, int flags)
> +{
> +	struct drm_xe_eudebug_event_eu_attention *ea = NULL;
> +	struct drm_xe_eudebug_event *event = NULL;
> +	struct online_debug_data *data = s->client->ptr;
> +	bool expect_exception = flags & DISABLE_DEBUG_MODE ? false : true;
> +	int sum = 0;
> +	int bitmask_size;
> +
> +	xe_eudebug_session_check(s, true, XE_EUDEBUG_FILTER_EVENT_VM_BIND |
> +					  XE_EUDEBUG_FILTER_EVENT_VM_BIND_OP |
> +					  XE_EUDEBUG_FILTER_EVENT_VM_BIND_UFENCE);
> +
> +	bitmask_size = query_attention_bitmask_size(s->debugger->master_fd, data->hwe.gt_id);
> +
> +	xe_eudebug_for_each_event(event, s->debugger->log) {
> +		if (event->type == DRM_XE_EUDEBUG_EVENT_EU_ATTENTION) {
> +			ea = (struct drm_xe_eudebug_event_eu_attention *)event;
> +
> +			igt_assert(event->flags == DRM_XE_EUDEBUG_EVENT_STATE_CHANGE);
> +			igt_assert_eq(ea->bitmask_size, bitmask_size);
> +			sum += count_set_bits(ea->bitmask, bitmask_size);
> +			igt_assert(match_attention_with_exec_queue(s->debugger->log, ea));
> +		}
> +	}
> +
> +	/*
> +	 * We can expect attention to sum up only
> +	 * if we have a breakpoint set and we resume all threads always.
> +	 */
> +	if (flags == SHADER_BREAKPOINT || flags == TRIGGER_UFENCE_SET_BREAKPOINT)
> +		igt_assert_eq(sum, data->threads_count);
> +
> +	if (expect_exception)
> +		igt_assert(sum > 0);
> +	else
> +		igt_assert(sum == 0);
> +}
> +
> +static void ufence_ack_trigger(struct xe_eudebug_debugger *d,
> +			       struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_vm_bind_ufence *ef = (void *)e;
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_CREATE)
> +		xe_eudebug_ack_ufence(d->fd, ef);
> +}
> +
> +static void ufence_ack_set_bp_trigger(struct xe_eudebug_debugger *d,
> +				      struct drm_xe_eudebug_event *e)
> +{
> +	struct drm_xe_eudebug_event_vm_bind_ufence *ef = (void *)e;
> +	struct online_debug_data *data = d->ptr;
> +
> +	set_breakpoint_once(d, data);
> +
> +	if (e->flags & DRM_XE_EUDEBUG_EVENT_CREATE)
> +		xe_eudebug_ack_ufence(d->fd, ef);
> +}
> +
> +/**
> + * SUBTEST: basic-breakpoint
> + * Description:
> + *	Check whether KMD sends attention events
> + *	for workload in debug mode stopped on breakpoint.
> + *
> + * SUBTEST: breakpoint-not-in-debug-mode
> + * Description:
> + *	Check whether KMD resets the GPU when it spots an attention
> + *	coming from workload not in debug mode.
> + *
> + * SUBTEST: stopped-thread
> + * Description:
> + *	Hits breakpoint on runalone workload and
> + *	reads attention for fixed time.
> + *
> + * SUBTEST: resume-%s
> + * Description:
> + *	Resumes stopped on a breakpoint workload
> + *	with granularity of %arg[1].
> + *
> + *
> + * arg[1]:
> + *
> + * @one:	one thread
> + * @dss:	threads running on one subslice
> + */
> +static void test_basic_online(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s);
> +	online_session_check(s, s->flags);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: set-breakpoint
> + * Description:
> + *	Checks for attention after setting a dynamic breakpoint in the ufence event.
> + */
> +
> +static void test_set_breakpoint_online(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_set_bp_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +
> +	xe_eudebug_session_run(s);
> +	online_session_check(s, s->flags);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: preempt-breakpoint
> + * Description:
> + *	Verify that eu debugger disables preemption timeout to
> + *	prevent reset of workload stopped on breakpoint.
> + */
> +static void test_preemption(int fd, struct drm_xe_engine_class_instance *hwe)
> +{
> +	int flags = SHADER_BREAKPOINT | TRIGGER_RESUME_DELAYED;
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +	struct xe_eudebug_client *other;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +	other = xe_eudebug_client_create(fd, run_online_client, SHADER_NOP, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, s->client), 0);
> +	xe_eudebug_debugger_start_worker(s->debugger);
> +
> +	xe_eudebug_client_start(s->client);
> +	sleep(1); /* make sure s->client starts first */

If client would write token it has started this sleep wouldn't be
necessary. I mean inside xe_eudebug_client_start() do
token_signal/wait_for_client.

> +	xe_eudebug_client_start(other);
> +
> +	xe_eudebug_client_wait_done(s->client);
> +	xe_eudebug_client_wait_done(other);
> +
> +	xe_eudebug_debugger_stop_worker(s->debugger, 1);
> +
> +	xe_eudebug_session_destroy(s);
> +	xe_eudebug_client_destroy(other);
> +
> +	igt_assert_f(data->last_eu_control_seqno != 0,
> +		     "Workload with breakpoint has ended without resume!\n");
> +
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: reset-with-attention
> + * Description:
> + *	Check whether GPU is usable after resetting with attention raised
> + *	(stopped on breakpoint) by running the same workload again.
> + */
> +static void test_reset_with_attention_online(int fd, struct drm_xe_engine_class_instance *hwe,
> +					     int flags)
> +{
> +	struct xe_eudebug_session *s1, *s2;
> +	struct online_debug_data *data;
> +
> +	data = online_debug_data_create(hwe);
> +	s1 = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s1->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_reset_trigger);
> +	xe_eudebug_debugger_add_trigger(s1->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s1);
> +	xe_eudebug_session_destroy(s1);
> +
> +	s2 = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +	xe_eudebug_debugger_add_trigger(s2->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s2->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s2);
> +
> +	online_session_check(s2, s2->flags);
> +
> +	xe_eudebug_session_destroy(s2);
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: interrupt-all
> + * Description:
> + *	Schedules EU workload which should last about a few seconds, then
> + *	interrupts all threads, checks whether attention event came, and
> + *	resumes stopped threads back.
> + *
> + * SUBTEST: interrupt-all-set-breakpoint
> + * Description:
> + *	Schedules EU workload which should last about a few seconds, then
> + *	interrupts all threads, once attention event come it sets breakpoint on
> + *	the very next instruction and resumes stopped threads back. It expects
> + *	that every thread hits the breakpoint.
> + */
> +static void test_interrupt_all(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +	uint32_t val;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, s->client), 0);
> +	xe_eudebug_debugger_start_worker(s->debugger);
> +	xe_eudebug_client_start(s->client);
> +
> +	/* wait for workload to start */
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		/* collect needed data from triggers */
> +		if (READ_ONCE(data->vm_fd) == -1 || READ_ONCE(data->target_size) == 0)
> +			continue;
> +
> +		if (pread(data->vm_fd, &val, sizeof(val), data->target_offset) == sizeof(val))
> +			if (val != 0)
> +				break;
> +	}
> +
> +	pthread_mutex_lock(&data->mutex);
> +	igt_assert(data->client_handle != -1);
> +	igt_assert(data->exec_queue_handle != -1);
> +	eu_ctl_interrupt_all(s->debugger->fd, data->client_handle,
> +			     data->exec_queue_handle, data->lrc_handle);
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	xe_eudebug_client_wait_done(s->client);
> +
> +	xe_eudebug_debugger_stop_worker(s->debugger, 1);
> +
> +	xe_eudebug_event_log_print(s->debugger->log, true);
> +	xe_eudebug_event_log_print(s->client->log, true);
> +
> +	online_session_check(s, s->flags);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +static void reset_debugger_log(struct xe_eudebug_debugger *d)
> +{
> +	unsigned int max_size;
> +	char log_name[80];
> +
> +	/* Don't pull the rug out from under an active debugger */
> +	igt_assert(d->target_pid == 0);
> +
> +	max_size = d->log->max_size;
> +	strncpy(log_name, d->log->name, sizeof(d->log->name) - 1);
> +	log_name[79] = '\0';
> +	xe_eudebug_event_log_destroy(d->log);
> +	d->log = xe_eudebug_event_log_create(log_name, max_size);
> +}
> +
> +/**
> + * SUBTEST: interrupt-other-debuggable
> + * Description:
> + *	Schedules EU workload in runalone mode with never ending loop, while
> + *	it is not under debug, tries to interrupt all threads using the different
> + *	client attached to debugger.
> + *
> + * SUBTEST: interrupt-other
> + * Description:
> + *	Schedules EU workload with a never ending loop and, while it is not
> + *	configured for debugging, tries to interrupt all threads using the client
> + *	attached to debugger.
> + */
> +static void test_interrupt_other(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct online_debug_data *data;
> +	struct online_debug_data *debugee_data;
> +	struct xe_eudebug_session *s;
> +	struct xe_eudebug_client *debugee;
> +	int debugee_flags = SHADER_LOOP | DO_NOT_EXPECT_CANARIES;
> +	int val;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN, open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, s->client), 0);
> +	xe_eudebug_debugger_start_worker(s->debugger);
> +	xe_eudebug_client_start(s->client);
> +
> +	/* wait for workload to start */
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		if (READ_ONCE(data->vm_fd) == -1 || READ_ONCE(data->target_size) == 0)
> +			continue;
> +
> +		if (pread(data->vm_fd, &val, sizeof(val), data->target_offset) == sizeof(val))
> +			if (val != 0)
> +				break;
> +	}
> +	igt_assert_f(val != 0, "Workload execution is not yet started\n");
> +
> +	xe_eudebug_debugger_detach(s->debugger);
> +	reset_debugger_log(s->debugger);
> +
> +	debugee_data = online_debug_data_create(hwe);
> +	s->debugger->ptr = debugee_data;
> +	debugee = xe_eudebug_client_create(fd, run_online_client, debugee_flags, debugee_data);
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, debugee), 0);
> +	xe_eudebug_client_start(debugee);
> +
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		if (READ_ONCE(debugee_data->vm_fd) == -1 || READ_ONCE(debugee_data->target_size) == 0)
> +			continue;
> +	}
> +
> +	pthread_mutex_lock(&debugee_data->mutex);
> +	igt_assert(debugee_data->client_handle != -1);
> +	igt_assert(debugee_data->exec_queue_handle != -1);
> +
> +	/*
> +	 * Interrupting the other client should return invalid state
> +	 * as it is running in runalone mode
> +	 */
> +	igt_assert_eq(__eu_ctl(s->debugger->fd, debugee_data->client_handle,
> +		      debugee_data->exec_queue_handle, debugee_data->lrc_handle, NULL, 0,
> +		      DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL, NULL), -EINVAL);
> +	pthread_mutex_unlock(&debugee_data->mutex);
> +
> +	xe_force_gt_reset_async(s->debugger->master_fd, debugee_data->hwe.gt_id);
> +
> +	xe_eudebug_client_wait_done(debugee);
> +	xe_eudebug_debugger_stop_worker(s->debugger, 1);
> +
> +	xe_eudebug_event_log_print(s->debugger->log, true);
> +	xe_eudebug_event_log_print(debugee->log, true);
> +
> +	xe_eudebug_session_check(s, true, XE_EUDEBUG_FILTER_EVENT_VM_BIND |
> +				 XE_EUDEBUG_FILTER_EVENT_VM_BIND_OP |
> +				 XE_EUDEBUG_FILTER_EVENT_VM_BIND_UFENCE);
> +
> +	xe_eudebug_client_destroy(debugee);
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +	online_debug_data_destroy(debugee_data);
> +}
> +
> +/**
> + * SUBTEST: tdctl-parameters
> + * Description:
> + *	Schedules EU workload which should last about a few seconds, then
> + *	checks negative scenarios of EU_THREADS ioctl usage, interrupts all threads,
> + *	checks whether attention event came, and resumes stopped threads back.
> + */
> +static void test_tdctl_parameters(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +	uint32_t val;
> +	uint32_t random_command;
> +	uint32_t bitmask_size = query_attention_bitmask_size(fd, hwe->gt_id);
> +	uint8_t *attention_bitmask = malloc(bitmask_size * sizeof(uint8_t));
> +
> +	igt_assert(attention_bitmask);
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, s->client), 0);
> +	xe_eudebug_debugger_start_worker(s->debugger);
> +	xe_eudebug_client_start(s->client);
> +
> +	/* wait for workload to start */
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		/* collect needed data from triggers */
> +		if (READ_ONCE(data->vm_fd) == -1 || READ_ONCE(data->target_size) == 0)
> +			continue;
> +
> +		if (pread(data->vm_fd, &val, sizeof(val), data->target_offset) == sizeof(val))
> +			if (val != 0)
> +				break;
> +	}
> +
> +	pthread_mutex_lock(&data->mutex);
> +	igt_assert(data->client_handle != -1);
> +	igt_assert(data->exec_queue_handle != -1);
> +	igt_assert(data->lrc_handle != -1);
> +
> +	/* fail on invalid lrc_handle */
> +	igt_assert(__eu_ctl(s->debugger->fd, data->client_handle,
> +			    data->exec_queue_handle, data->lrc_handle + 1,
> +			    attention_bitmask, &bitmask_size,
> +			    DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL, NULL) == -EINVAL);
> +
> +	/* fail on invalid exec_queue_handle */
> +	igt_assert(__eu_ctl(s->debugger->fd, data->client_handle,
> +			    data->exec_queue_handle + 1, data->lrc_handle,
> +			    attention_bitmask, &bitmask_size,
> +			    DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL, NULL) == -EINVAL);
> +
> +	/* fail on invalid client */
> +	igt_assert(__eu_ctl(s->debugger->fd, data->client_handle + 1,
> +			    data->exec_queue_handle, data->lrc_handle,
> +			    attention_bitmask, &bitmask_size,
> +			    DRM_XE_EUDEBUG_EU_CONTROL_CMD_INTERRUPT_ALL, NULL) == -EINVAL);
> +
> +	/*
> +	 * bitmask size must be aligned to sizeof(u32) for all commands
> +	 * and be zero for interrupt all
> +	 */
> +	bitmask_size = sizeof(uint32_t) - 1;
> +	igt_assert(__eu_ctl(s->debugger->fd, data->client_handle,
> +			    data->exec_queue_handle, data->lrc_handle,
> +			    attention_bitmask, &bitmask_size,
> +			    DRM_XE_EUDEBUG_EU_CONTROL_CMD_STOPPED, NULL) == -EINVAL);
> +	bitmask_size = 0;
> +
> +	/* fail on invalid command */
> +	random_command = random() | (DRM_XE_EUDEBUG_EU_CONTROL_CMD_RESUME + 1);
> +	igt_assert(__eu_ctl(s->debugger->fd, data->client_handle,
> +			    data->exec_queue_handle, data->lrc_handle,
> +			    attention_bitmask, &bitmask_size, random_command, NULL) == -EINVAL);
> +
> +	free(attention_bitmask);
> +
> +	eu_ctl_interrupt_all(s->debugger->fd, data->client_handle,
> +			     data->exec_queue_handle, data->lrc_handle);
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	xe_eudebug_client_wait_done(s->client);
> +
> +	xe_eudebug_debugger_stop_worker(s->debugger, 1);
> +
> +	xe_eudebug_event_log_print(s->debugger->log, true);
> +	xe_eudebug_event_log_print(s->client->log, true);
> +
> +	online_session_check(s, s->flags);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +static void eu_attention_debugger_detach_trigger(struct xe_eudebug_debugger *d,
> +						 struct drm_xe_eudebug_event *event)
> +{
> +	struct online_debug_data *data = d->ptr;
> +	uint64_t c_pid;
> +	int ret;
> +
> +	c_pid = d->target_pid;
> +
> +	/* Reset VM data so the re-triggered VM open handler works properly */
> +	data->vm_fd = -1;
> +
> +	xe_eudebug_debugger_detach(d);
> +
> +	/* Let the KMD scan function notice unhandled EU attention */
> +	if (!(d->flags & SHADER_N_NOOP_BREAKPOINT))
> +		sleep(1);
> +
> +	/*
> +	 * New session that is created by EU debugger on reconnect restarts
> +	 * seqno, causing isses with log sorting. To avoid that, create
> +	 * a new event log.
> +	 */
> +	reset_debugger_log(d);
> +
> +	ret = xe_eudebug_connect(d->master_fd, c_pid, 0);
> +	igt_assert(ret >= 0);
> +	d->fd = ret;
> +	d->target_pid = c_pid;
> +
> +	/* Let the discovery worker discover resources */
> +	sleep(2);
> +
> +	if (!(d->flags & SHADER_N_NOOP_BREAKPOINT))
> +		xe_eudebug_debugger_signal_stage(d, DEBUGGER_REATTACHED);
> +}
> +
> +/**
> + * SUBTEST: interrupt-reconnect
> + * Description:
> + *	Schedules EU workload which should last about a few seconds,
> + *	interrupts all threads and detaches debugger when attention is
> + *	raised. The test checks if KMD resets the workload when there's
> + *	no debugger attached and does the event playback on discovery.
> + */
> +static void test_interrupt_reconnect(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct drm_xe_eudebug_event *e = NULL;
> +	struct online_debug_data *data;
> +	struct xe_eudebug_session *s;
> +	uint32_t val;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debugger_detach_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->debugger, s->client), 0);
> +	xe_eudebug_debugger_start_worker(s->debugger);
> +	xe_eudebug_client_start(s->client);
> +
> +	/* wait for workload to start */
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		/* collect needed data from triggers */
> +		if (READ_ONCE(data->vm_fd) == -1 || READ_ONCE(data->target_size) == 0)
> +			continue;
> +
> +		if (pread(data->vm_fd, &val, sizeof(val), data->target_offset) == sizeof(val))
> +			if (val != 0)
> +				break;
> +	}
> +
> +	pthread_mutex_lock(&data->mutex);
> +	igt_assert(data->client_handle != -1);
> +	igt_assert(data->exec_queue_handle != -1);
> +	eu_ctl_interrupt_all(s->debugger->fd, data->client_handle,
> +			     data->exec_queue_handle, data->lrc_handle);
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	xe_eudebug_client_wait_done(s->client);
> +
> +	xe_eudebug_debugger_stop_worker(s->debugger, 1);

I wondered where's log cleared and I've noticed eu_attention_debugger_detach_trigger
is responsible for this.

> +
> +	xe_eudebug_event_log_print(s->debugger->log, true);
> +	xe_eudebug_event_log_print(s->client->log, true);
> +
> +	xe_eudebug_session_check(s, true, XE_EUDEBUG_FILTER_EVENT_VM_BIND |
> +					  XE_EUDEBUG_FILTER_EVENT_VM_BIND_OP |
> +					  XE_EUDEBUG_FILTER_EVENT_VM_BIND_UFENCE);

That's my question here - if log for debugger is cleared then filled
again on reconnect, will vm-bind-ufence events match?

> +
> +	/* We expect workload reset, so no attention should be raised */
> +	xe_eudebug_for_each_event(e, s->debugger->log)
> +		igt_assert(e->type != DRM_XE_EUDEBUG_EVENT_EU_ATTENTION);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: single-step
> + * Description:
> + *	Schedules EU workload with 16 nops after breakpoint, then single-steps
> + *	through the shader, advances all threads each step, checking if all
> + *	threads advanced every step.
> + *
> + * SUBTEST: single-step-one
> + * Description:
> + *	Schedules EU workload with 16 nops after breakpoint, then single-steps
> + *	through the shader, advances one thread each step, checking if one
> + *	thread advanced every step. Due to the time constraint, only first two
> + *	shader instructions after breakpoint are validated.
> + */
> +static void test_single_step(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_single_step_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s);
> +	online_session_check(s, s->flags);
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +static void eu_attention_debugger_ndetach_trigger(struct xe_eudebug_debugger *d,
> +						  struct drm_xe_eudebug_event *event)
> +{
> +	struct online_debug_data *data = d->ptr;
> +	static int debugger_detach_count;
> +
> +	if (debugger_detach_count < (SHADER_LOOP_N - 1)) {
> +		/* Make sure the resume command was issued before detaching the debugger */
> +		if (data->last_eu_control_seqno > event->seqno)
> +			return;
> +		eu_attention_debugger_detach_trigger(d, event);
> +		debugger_detach_count++;
> +	} else {
> +		igt_debug("Reached Nth breakpoint hence preventing the debugger detach\n");
> +	}
> +}
> +
> +/**
> + * SUBTEST: debugger-reopen
> + * Description:
> + *	Check whether the debugger is able to reopen the connection and
> + *	capture the events of already running client.
> + */
> +static void test_debugger_reopen(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +
> +	data = online_debug_data_create(hwe);
> +
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debugger_ndetach_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +/**
> + * SUBTEST: writes-caching-%s
> + * Description:
> + *	Write incrementing values to 2-page-long target surface, poisoning the data one breakpoint
> + *	before each write instruction and restoring it when the poisoned instruction breakpoint
> + *	is hit. Expect to never see poison values in target surface.
> + *
> + *
> + * arg[1]:
> + *
> + * @sram:	Use page size of SRAM
> + * @vram:	Use page size of VRAM
> + */
> +static void test_caching(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
> +{
> +	struct xe_eudebug_session *s;
> +	struct online_debug_data *data;
> +
> +	if (flags & SHADER_CACHING_VRAM)
> +		igt_skip_on_f(!xe_has_vram(fd), "Device does not have VRAM.\n");
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_resume_caching_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	xe_eudebug_session_run(s);
> +	online_session_check(s, s->flags);
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
> +static int wait_for_exception(struct online_debug_data *data, int timeout)
> +{
> +	int ret = -ETIMEDOUT;
> +
> +	igt_for_milliseconds(timeout) {
> +		pthread_mutex_lock(&data->mutex);
> +		if ((data->exception_arrived.tv_sec |
> +		     data->exception_arrived.tv_nsec) != 0)
> +			ret = 0;
> +		pthread_mutex_unlock(&data->mutex);
> +
> +		if (!ret)
> +			break;
> +		usleep(1000);
> +	}
> +
> +	return ret;
> +}
> +
> +#define is_compute_on_gt(__e, __gt) (((__e)->engine_class == DRM_XE_ENGINE_CLASS_RENDER || \
> +				      (__e)->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE) && \
> +				      (__e)->gt_id == (__gt))
> +
> +struct xe_engine_list_entry {
> +	struct igt_list_head link;
> +	struct drm_xe_engine_class_instance *hwe;
> +};
> +
> +#define MAX_TILES	2
> +static int find_suitable_engines(struct drm_xe_engine_class_instance *hwes[GEM_MAX_ENGINES],
> +				 int fd, bool many_tiles)
> +{
> +	struct xe_device *xe_dev;
> +	struct drm_xe_engine_class_instance *e;
> +	struct xe_engine_list_entry *en, *tmp;
> +	struct igt_list_head compute_engines[MAX_TILES];
> +	int gt_id;
> +	int tile_id, i, engine_count = 0, tile_count = 0;
> +
> +	xe_dev = xe_device_get(fd);
> +
> +	for (i = 0; i < MAX_TILES; i++)
> +		IGT_INIT_LIST_HEAD(&compute_engines[i]);
> +
> +	xe_for_each_gt(fd, gt_id) {
> +		xe_for_each_engine(fd, e) {
> +			if (is_compute_on_gt(e, gt_id)) {
> +				tile_id = xe_dev->gt_list->gt_list[gt_id].tile_id;
> +
> +				en = malloc(sizeof(struct xe_engine_list_entry));
> +				en->hwe = e;
> +
> +				igt_list_add_tail(&en->link, &compute_engines[tile_id]);
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < MAX_TILES; i++) {
> +		if (igt_list_empty(&compute_engines[i]))
> +			continue;
> +
> +		if (many_tiles) {
> +			en = igt_list_first_entry(&compute_engines[i], en, link);
> +			hwes[engine_count++] = en->hwe;
> +			tile_count++;
> +		} else {
> +			if (igt_list_length(&compute_engines[i]) > 1) {
> +				igt_list_for_each_entry(en, &compute_engines[i], link)
> +					hwes[engine_count++] = en->hwe;
> +				break;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < MAX_TILES; i++) {
> +		igt_list_for_each_entry_safe(en, tmp, &compute_engines[i], link) {
> +			igt_list_del(&en->link);
> +			free(en);
> +		}
> +	}
> +
> +	if (many_tiles)
> +		igt_require_f(tile_count > 1, "Mulit-tile scenario requires more tiles\n");
> +
> +	return engine_count;
> +}
> +
> +/**
> + * SUBTEST: breakpoint-many-sessions-single-tile
> + * Description:
> + *	Schedules EU workload with preinstalled breakpoint on every compute engine
> + *	available on the tile. Checks if the contexts hit breakpoint in sequence
> + *	and resumes them.
> + *
> + * SUBTEST: breakpoint-many-sessions-tiles
> + * Description:
> + *	Schedules EU workload with preinstalled breakpoint on selected compute
> + *      engines, with one per tile. Checks if each context hit breakpoint and
> + *      resumes them.
> + */
> +static void test_many_sessions_on_tiles(int fd, bool multi_tile)
> +{
> +	int n = 0, flags = SHADER_BREAKPOINT | SHADER_MIN_THREADS;
> +	struct xe_eudebug_session *s[GEM_MAX_ENGINES] = {};
> +	struct online_debug_data *data[GEM_MAX_ENGINES] = {};
> +	struct drm_xe_engine_class_instance *hwe[GEM_MAX_ENGINES] = {};a

GEM_MAX_ENGINES?

> +	struct drm_xe_eudebug_event_eu_attention *eus;
> +	uint64_t current_t, next_t, diff;
> +	int i;
> +
> +	n = find_suitable_engines(hwe, fd, multi_tile);
> +
> +	igt_require_f(n > 1, "Test requires at least two parallel compute engines!\n");
> +
> +	for (i = 0; i < n; i++) {
> +		data[i] = online_debug_data_create(hwe[i]);
> +		s[i] = xe_eudebug_session_create(fd, run_online_client, flags, data[i]);
> +
> +		xe_eudebug_debugger_add_trigger(s[i]->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +						eu_attention_debug_trigger);
> +		xe_eudebug_debugger_add_trigger(s[i]->debugger, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +						save_first_exception_trigger);
> +		xe_eudebug_debugger_add_trigger(s[i]->debugger, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +						ufence_ack_trigger);
> +
> +		igt_assert_eq(xe_eudebug_debugger_attach(s[i]->debugger, s[i]->client), 0);
> +
> +		xe_eudebug_debugger_start_worker(s[i]->debugger);
> +		xe_eudebug_client_start(s[i]->client);
> +	}
> +
> +	for (i = 0; i < n; i++) {
> +		/* XXX: Sometimes racy, expects clients to execute in sequence */
> +		igt_assert(!wait_for_exception(data[i], STARTUP_TIMEOUT_MS));
> +
> +		eus = (struct drm_xe_eudebug_event_eu_attention *)data[i]->exception_event;
> +
> +		/* Delay all but the last workload to check serialization */
> +		if (i < n - 1)
> +			usleep(WORKLOAD_DELAY_US);
> +
> +		eu_ctl_resume(s[i]->debugger->master_fd, s[i]->debugger->fd,
> +			      eus->client_handle, eus->exec_queue_handle,
> +			      eus->lrc_handle, eus->bitmask, eus->bitmask_size);
> +		free(eus);
> +	}
> +
> +	for (i = 0; i < n - 1; i++) {
> +		/* Convert timestamps to microseconds */
> +		current_t = data[i]->exception_arrived.tv_nsec * 1000;
> +		next_t = data[i + 1]->exception_arrived.tv_nsec * 1000;
> +		diff = current_t < next_t ? next_t - current_t : current_t - next_t;
> +
> +		if (multi_tile)
> +			igt_assert_f(diff < WORKLOAD_DELAY_US,
> +				     "Expected to execute workloads concurrently. Actual delay: %lu ms\n",
> +				     diff);
> +		else
> +			igt_assert_f(diff >= WORKLOAD_DELAY_US,
> +				     "Expected a serialization of workloads. Actual delay: %lu ms\n",
> +				     diff);
> +	}
> +
> +	for (i = 0; i < n; i++) {
> +		xe_eudebug_client_wait_done(s[i]->client);
> +		xe_eudebug_debugger_stop_worker(s[i]->debugger, 1);
> +
> +		xe_eudebug_event_log_print(s[i]->debugger->log, true);
> +		online_session_check(s[i], flags);
> +
> +		xe_eudebug_session_destroy(s[i]);
> +		online_debug_data_destroy(data[i]);
> +	}
> +}
> +
> +static struct drm_xe_engine_class_instance *pick_compute(int fd, int gt)
> +{
> +	struct drm_xe_engine_class_instance *hwe;
> +	int count = 0;
> +
> +	xe_for_each_engine(fd, hwe)
> +		if (is_compute_on_gt(hwe, gt))
> +			count++;
> +
> +	xe_for_each_engine(fd, hwe)
> +		if (is_compute_on_gt(hwe, gt) && rand() % count-- == 0)
> +			return hwe;
> +
> +	return NULL;
> +}
> +
> +#define test_gt_render_or_compute(t, i915, __hwe) \
> +	igt_subtest_with_dynamic(t) \
> +		for (int gt = 0; (__hwe = pick_compute(i915, gt)); gt++) \

i915?

I haven't spotted any other issues and generally apart of bit
operations looks correct.

--
Zbigniew
> +			igt_dynamic_f("%s%d", xe_engine_class_string(__hwe->engine_class), \
> +				      hwe->engine_instance)
> +
> +igt_main
> +{
> +	struct drm_xe_engine_class_instance *hwe;
> +	bool was_enabled;
> +	int fd;
> +
> +	igt_fixture {
> +		fd = drm_open_driver(DRIVER_XE);
> +		intel_allocator_multiprocess_start();
> +		igt_srandom();
> +		was_enabled = xe_eudebug_enable(fd, true);
> +	}
> +
> +	test_gt_render_or_compute("basic-breakpoint", fd, hwe)
> +		test_basic_online(fd, hwe, SHADER_BREAKPOINT);
> +
> +	test_gt_render_or_compute("preempt-breakpoint", fd, hwe)
> +		test_preemption(fd, hwe);
> +
> +	test_gt_render_or_compute("set-breakpoint", fd, hwe)
> +		test_set_breakpoint_online(fd, hwe, SHADER_NOP | TRIGGER_UFENCE_SET_BREAKPOINT);
> +
> +	test_gt_render_or_compute("breakpoint-not-in-debug-mode", fd, hwe)
> +		test_basic_online(fd, hwe, SHADER_BREAKPOINT | DISABLE_DEBUG_MODE);
> +
> +	test_gt_render_or_compute("stopped-thread", fd, hwe)
> +		test_basic_online(fd, hwe, SHADER_BREAKPOINT | TRIGGER_RESUME_DELAYED);
> +
> +	test_gt_render_or_compute("resume-one", fd, hwe)
> +		test_basic_online(fd, hwe, SHADER_BREAKPOINT | TRIGGER_RESUME_ONE);
> +
> +	test_gt_render_or_compute("resume-dss", fd, hwe)
> +		test_basic_online(fd, hwe, SHADER_BREAKPOINT | TRIGGER_RESUME_DSS);
> +
> +	test_gt_render_or_compute("interrupt-all", fd, hwe)
> +		test_interrupt_all(fd, hwe, SHADER_LOOP);
> +
> +	test_gt_render_or_compute("interrupt-other-debuggable", fd, hwe)
> +		test_interrupt_other(fd, hwe, SHADER_LOOP);
> +
> +	test_gt_render_or_compute("interrupt-other", fd, hwe)
> +		test_interrupt_other(fd, hwe, SHADER_LOOP | DISABLE_DEBUG_MODE);
> +
> +	test_gt_render_or_compute("interrupt-all-set-breakpoint", fd, hwe)
> +		test_interrupt_all(fd, hwe, SHADER_LOOP | TRIGGER_RESUME_SET_BP);
> +
> +	test_gt_render_or_compute("tdctl-parameters", fd, hwe)
> +		test_tdctl_parameters(fd, hwe, SHADER_LOOP);
> +
> +	test_gt_render_or_compute("reset-with-attention", fd, hwe)
> +		test_reset_with_attention_online(fd, hwe, SHADER_BREAKPOINT);
> +
> +	test_gt_render_or_compute("interrupt-reconnect", fd, hwe)
> +		test_interrupt_reconnect(fd, hwe, SHADER_LOOP | TRIGGER_RECONNECT);
> +
> +	test_gt_render_or_compute("single-step", fd, hwe)
> +		test_single_step(fd, hwe, SHADER_SINGLE_STEP | SIP_SINGLE_STEP |
> +				 TRIGGER_RESUME_PARALLEL_WALK);
> +
> +	test_gt_render_or_compute("single-step-one", fd, hwe)
> +		test_single_step(fd, hwe, SHADER_SINGLE_STEP | SIP_SINGLE_STEP |
> +				 TRIGGER_RESUME_SINGLE_WALK);
> +
> +	test_gt_render_or_compute("debugger-reopen", fd, hwe)
> +		test_debugger_reopen(fd, hwe, SHADER_N_NOOP_BREAKPOINT);
> +
> +	test_gt_render_or_compute("writes-caching-sram", fd, hwe)
> +		test_caching(fd, hwe, SHADER_CACHING_SRAM);
> +
> +	test_gt_render_or_compute("writes-caching-vram", fd, hwe)
> +		test_caching(fd, hwe, SHADER_CACHING_VRAM);
> +
> +	igt_subtest("breakpoint-many-sessions-single-tile")
> +		test_many_sessions_on_tiles(fd, false);
> +
> +	igt_subtest("breakpoint-many-sessions-tiles")
> +		test_many_sessions_on_tiles(fd, true);
> +
> +	igt_fixture {
> +		xe_eudebug_enable(fd, was_enabled);
> +
> +		intel_allocator_multiprocess_stop();
> +		drm_close_driver(fd);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 43e8516f4..e5d8852f3 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -321,6 +321,7 @@ intel_xe_progs = [
>  intel_xe_eudebug_progs = [
>  	'xe_eudebug',
>  	'xe_exec_sip_eudebug',
> +	'xe_eudebug_online',
>  ]
>  
>  if build_xe_eudebug
> -- 
> 2.34.1
>