[PATCH i-g-t v2 58/66] tests/xe_eudebug_online: Add interrupt-reconnect test

Piatkowski, Dominik Karol dominik.karol.piatkowski at intel.com
Mon Aug 5 07:53:36 UTC 2024


Reviewed-by: Dominik Karol Piątkowski <dominik.karol.piatkowski at intel.com>

> -----Original Message-----
> From: Manszewski, Christoph <christoph.manszewski at intel.com>
> Sent: Tuesday, July 30, 2024 1:45 PM
> To: igt-dev at lists.freedesktop.org
> Cc: Kempczynski, Zbigniew <zbigniew.kempczynski at intel.com>; Kamil
> Konieczny <kamil.konieczny at linux.intel.com>; Grzegorzek, Dominik
> <dominik.grzegorzek at intel.com>; Patelczyk, Maciej
> <maciej.patelczyk at intel.com>; Piatkowski, Dominik Karol
> <dominik.karol.piatkowski at intel.com>; Sikora, Pawel
> <pawel.sikora at intel.com>; Hajda, Andrzej <andrzej.hajda at intel.com>;
> Kolanupaka Naveena <kolanupaka.naveena at intel.com>; Kuoppala, Mika
> <mika.kuoppala at intel.com>; Mun, Gwan-gyeong <gwan-
> gyeong.mun at intel.com>
> Subject: [PATCH i-g-t v2 58/66] tests/xe_eudebug_online: Add interrupt-
> reconnect test
> 
> From: Karolina Stolarek <karolina.stolarek at intel.com>
> 
> Introduce interrupt-reconnect test case where the debugger is closed and
> reopened on attention event. Check if the workload is reset when there is no
> active debugger detected.
> 
> Signed-off-by: Karolina Stolarek <karolina.stolarek at intel.com>
> Cc: Christoph Manszewski <christoph.manszewski at intel.com>
> Cc: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> ---
>  tests/intel/xe_eudebug_online.c | 126
> +++++++++++++++++++++++++++++++-
>  1 file changed, 123 insertions(+), 3 deletions(-)
> 
> diff --git a/tests/intel/xe_eudebug_online.c b/tests/intel/xe_eudebug_online.c
> index c101bde2f..9f55cec74 100644
> --- a/tests/intel/xe_eudebug_online.c
> +++ b/tests/intel/xe_eudebug_online.c
> @@ -22,11 +22,14 @@
> 
>  #define SHADER_BREAKPOINT	(1 << 0)
>  #define SHADER_LOOP		(1 << 1)
> +#define TRIGGER_RECONNECT	(1 << 27)
>  #define TRIGGER_RESUME_SET_BP	(1 << 28)
>  #define TRIGGER_RESUME_DELAYED	(1 << 29)
>  #define TRIGGER_RESUME_DSS	(1 << 30)
>  #define TRIGGER_RESUME_ONE	(1 << 31)
> 
> +#define DEBUGGER_REATTACHED	1
> +
>  #define STEERING_END_LOOP	0xdeadca11
> 
>  #define SHADER_CANARY 0x01010101
> @@ -682,9 +685,12 @@ static void run_online_client(struct
> xe_eudebug_client *c)
> 
>  	intel_bb_sync(ibb);
> 
> -	/* Make sure it wasn't the timeout. */
> -	igt_assert(igt_nsec_elapsed(&ts) <
> -		   XE_EUDEBUG_DEFAULT_TIMEOUT_MS / MSEC_PER_SEC *
> NSEC_PER_SEC);
> +	if (c->flags & TRIGGER_RECONNECT)
> +		xe_eudebug_client_wait_stage(c, DEBUGGER_REATTACHED);
> +	else
> +		/* Make sure it wasn't the timeout. */
> +		igt_assert(igt_nsec_elapsed(&ts) <
> +			   XE_EUDEBUG_DEFAULT_TIMEOUT_MS /
> MSEC_PER_SEC * NSEC_PER_SEC);
> 
>  	ptr = xe_bo_mmap_ext(fd, buf->handle, buf->size, PROT_READ);
>  	data->threads_count = count_canaries_neq(ptr, w_dim, 0); @@ -
> 1158,6 +1164,117 @@ static void test_tdctl_parameters(int fd, struct
> drm_xe_engine_class_instance *h
>  	online_debug_data_destroy(data);
>  }
> 
> +static void eu_attention_debugger_detach_trigger(struct
> xe_eudebug_debugger *d,
> +						 struct
> drm_xe_eudebug_event *event) {
> +	struct online_debug_data *data = d->ptr;
> +	unsigned int max_size;
> +	uint64_t c_pid;
> +	int ret;
> +
> +	c_pid = d->target_pid;
> +
> +	/* Reset VM data so the re-triggered VM open handler works properly
> */
> +	data->vm_fd = -1;
> +
> +	xe_eudebug_debugger_dettach(d);
> +
> +	/* Let the KMD scan function notice unhandled EU attention */
> +	sleep(1);
> +
> +	/*
> +	 * New session that is created by EU debugger on reconnect restarts
> +	 * seqno, causing isses with log sorting. To avoid that, create
> +	 * a new event log.
> +	 */
> +	max_size = d->log->max_size;
> +	xe_eudebug_event_log_destroy(d->log);
> +	d->log = xe_eudebug_event_log_create("debugger-reconnect",
> max_size);
> +
> +	ret = xe_eudebug_connect(d->master_fd, c_pid, 0);
> +	igt_assert(ret >= 0);
> +	d->fd = ret;
> +	d->target_pid = c_pid;
> +
> +	/* Let the discovery worker discover resources */
> +	sleep(2);
> +
> +	xe_eudebug_debugger_signal_stage(d, DEBUGGER_REATTACHED); }
> +
> +/**
> + * SUBTEST: interrupt-reconnect
> + * Description:
> + *	Schedules EU workload which should last about a few seconds,
> + *	interrupts all threads and detaches debugger when attention is
> + *	raised. The test checks if KMD resets the workload when there's
> + *	no debugger attached and does the event playback on discovery.
> + */
> +static void test_interrupt_reconnect(int fd, struct
> +drm_xe_engine_class_instance *hwe, int flags) {
> +	struct drm_xe_eudebug_event *e = NULL;
> +	struct online_debug_data *data;
> +	struct xe_eudebug_session *s;
> +	uint32_t val;
> +
> +	data = online_debug_data_create(hwe);
> +	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
> +
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_OPEN,
> +					open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE,
> +					exec_queue_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +					eu_attention_debug_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
> +
> 	eu_attention_debugger_detach_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_METADATA,
> +					create_metadata_trigger);
> +	xe_eudebug_debugger_add_trigger(s->d,
> DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
> +					ufence_ack_trigger);
> +
> +	igt_assert_eq(xe_eudebug_debugger_attach(s->d, s->c), 0);
> +	xe_eudebug_debugger_start_worker(s->d);
> +	xe_eudebug_client_start(s->c);
> +
> +	/* wait for workload to start */
> +	igt_for_milliseconds(STARTUP_TIMEOUT_MS) {
> +		/* collect needed data from triggers */
> +		if (READ_ONCE(data->vm_fd) == -1 || READ_ONCE(data-
> >target_size) == 0)
> +			continue;
> +
> +		if (pread(data->vm_fd, &val, sizeof(val), data->target_offset)
> == sizeof(val))
> +			if (val != 0)
> +				break;
> +	}
> +
> +	pthread_mutex_lock(&data->mutex);
> +	igt_assert(data->client_handle != -1);
> +	igt_assert(data->exec_queue_handle != -1);
> +	eu_ctl_interrupt_all(s->d->fd, data->client_handle,
> +			     data->exec_queue_handle, data->lrc_handle);
> +	pthread_mutex_unlock(&data->mutex);
> +
> +	xe_eudebug_client_wait_done(s->c);
> +
> +	xe_eudebug_debugger_stop_worker(s->d, 1);
> +
> +	xe_eudebug_event_log_print(s->d->log, true);
> +	xe_eudebug_event_log_print(s->c->log, true);
> +
> +	xe_eudebug_session_check(s, true,
> XE_EUDEBUG_FILTER_EVENT_VM_BIND |
> +
> XE_EUDEBUG_FILTER_EVENT_VM_BIND_OP |
> +
> XE_EUDEBUG_FILTER_EVENT_VM_BIND_UFENCE);
> +
> +	/* We expect workload reset, so no attention should be raised */
> +	xe_eudebug_for_each_event(e, s->d->log)
> +		igt_assert(e->type !=
> DRM_XE_EUDEBUG_EVENT_EU_ATTENTION);
> +
> +	xe_eudebug_session_destroy(s);
> +	online_debug_data_destroy(data);
> +}
> +
>  static struct drm_xe_engine_class_instance *pick_compute(int fd, int gt)  {
>  	struct drm_xe_engine_class_instance *hwe; @@ -1220,6 +1337,9
> @@ igt_main
>  	test_gt_render_or_compute("reset-with-attention", fd, hwe)
>  		test_reset_with_attention_online(fd, hwe,
> SHADER_BREAKPOINT);
> 
> +	test_gt_render_or_compute("interrupt-reconnect", fd, hwe)
> +		test_interrupt_reconnect(fd, hwe, SHADER_LOOP |
> TRIGGER_RECONNECT);
> +
>  	igt_fixture {
>  		xe_eudebug_enable(fd, was_enabled);
> 
> --
> 2.34.1



More information about the igt-dev mailing list