[PATCH 59/66] tests/xe_eudebug_online: Add single-step and single-step-one tests

Christoph Manszewski christoph.manszewski at intel.com
Mon Jul 29 16:01:52 UTC 2024


From: Dominik Karol Piątkowski <dominik.karol.piatkowski at intel.com>

Add single-step test that walks debugger over shader
instruction by instruction in every thread in parallel.

Add single-step-one test that walks debugger over shader
instruction by instruction in one thread at a time.

Signed-off-by: Dominik Karol Piątkowski <dominik.karol.piatkowski at intel.com>
Cc: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
---
 tests/intel/xe_eudebug_online.c | 260 ++++++++++++++++++++++++++++++--
 1 file changed, 246 insertions(+), 14 deletions(-)

diff --git a/tests/intel/xe_eudebug_online.c b/tests/intel/xe_eudebug_online.c
index 9f55cec74..cabe2101e 100644
--- a/tests/intel/xe_eudebug_online.c
+++ b/tests/intel/xe_eudebug_online.c
@@ -20,16 +20,23 @@
 #include "intel_mocs.h"
 #include "gpgpu_shader.h"
 
-#define SHADER_BREAKPOINT	(1 << 0)
-#define SHADER_LOOP		(1 << 1)
-#define TRIGGER_RECONNECT	(1 << 27)
-#define TRIGGER_RESUME_SET_BP	(1 << 28)
-#define TRIGGER_RESUME_DELAYED	(1 << 29)
-#define TRIGGER_RESUME_DSS	(1 << 30)
-#define TRIGGER_RESUME_ONE	(1 << 31)
+#define SHADER_BREAKPOINT		(1 << 0)
+#define SHADER_LOOP			(1 << 1)
+#define SHADER_SINGLE_STEP		(1 << 2)
+#define SIP_SINGLE_STEP			(1 << 3)
+#define TRIGGER_RESUME_SINGLE_WALK	(1 << 25)
+#define TRIGGER_RESUME_PARALLEL_WALK	(1 << 26)
+#define TRIGGER_RECONNECT		(1 << 27)
+#define TRIGGER_RESUME_SET_BP		(1 << 28)
+#define TRIGGER_RESUME_DELAYED		(1 << 29)
+#define TRIGGER_RESUME_DSS		(1 << 30)
+#define TRIGGER_RESUME_ONE		(1 << 31)
 
 #define DEBUGGER_REATTACHED	1
 
+#define SINGLE_STEP_COUNT	16
+#define STEERING_SINGLE_STEP	0
+#define STEERING_CONTINUE	0x00c0ffee
 #define STEERING_END_LOOP	0xdeadca11
 
 #define SHADER_CANARY 0x01010101
@@ -92,7 +99,8 @@ static struct intel_buf *create_uc_buf(int fd, int width, int height)
 
 static int get_number_of_threads(uint64_t flags)
 {
-	if (flags & (TRIGGER_RESUME_ONE))
+	if (flags & (TRIGGER_RESUME_ONE | TRIGGER_RESUME_SINGLE_WALK |
+		     TRIGGER_RESUME_PARALLEL_WALK))
 		return 32;
 
 	return 512;
@@ -114,21 +122,30 @@ static struct gpgpu_shader *get_shader(int fd, const unsigned int flags)
 		gpgpu_shader__write_dword(shader, SHADER_CANARY, 0);
 		gpgpu_shader__jump_neq(shader, 0, w_dim.y, STEERING_END_LOOP);
 		gpgpu_shader__write_dword(shader, SHADER_CANARY, 0);
+	} else if (flags & SHADER_SINGLE_STEP) {
+		gpgpu_shader__nop(shader);
+		gpgpu_shader__breakpoint(shader);
+		for (int i = 0; i < SINGLE_STEP_COUNT; i++)
+			gpgpu_shader__nop(shader);
 	}
 
 	gpgpu_shader__eot(shader);
 	return shader;
 }
 
-static struct gpgpu_shader *get_sip(int fd)
+static struct gpgpu_shader *get_sip(int fd, const unsigned int flags)
 {
+	struct dim_t w_dim = walker_dimensions(get_number_of_threads(flags));
 	static struct gpgpu_shader *sip;
 
 	sip = gpgpu_shader_create(fd);
 	gpgpu_shader__write_aip(sip, 0);
 
 	gpgpu_shader__wait(sip);
-	gpgpu_shader__end_system_routine(sip, true);
+	if (flags & SIP_SINGLE_STEP)
+		gpgpu_shader__end_system_routine_step_if_eq(sip, w_dim.y, 0);
+	else
+		gpgpu_shader__end_system_routine(sip, true);
 	return sip;
 }
 
@@ -281,6 +298,10 @@ struct online_debug_data {
 	size_t bb_size;
 	int vm_fd;
 	uint32_t first_aip;
+	uint64_t *aips_offset_table;
+	uint32_t steps_done;
+	uint8_t *single_step_bitmask;
+	int stepped_threads_count;
 	struct timespec exception_arrived;
 	int last_eu_control_seqno;
 };
@@ -298,13 +319,14 @@ online_debug_data_create(struct drm_xe_engine_class_instance *hwe)
 	data->exec_queue_handle = -1ULL;
 	data->lrc_handle = -1ULL;
 	data->vm_fd = -1;
+	data->stepped_threads_count = -1;
 
 	return data;
 }
 
 static void online_debug_data_destroy(struct online_debug_data *data)
 {
-
+	free(data->aips_offset_table);
 	munmap(data, ALIGN(sizeof(*data), PAGE_SIZE));
 }
 
@@ -366,6 +388,25 @@ static void copy_first_bit(uint8_t *dst, uint8_t *src, int size)
 	}
 }
 
+static void copy_nth_bit(uint8_t *dst, uint8_t *src, int size, int n)
+{
+	int count = 0;
+
+	for (int i = 0; i < size; i++) {
+		uint32_t tmp = src[i];
+		for (int j = 7; j >= 0; j--) {
+			if (tmp & (1 << j)) {
+				count++;
+				if (count == n)
+					dst[i] |= (1 << j);
+				else
+					dst[i] &= ~(1 << j);
+			} else
+				dst[i] &= ~(1 << j);
+		}
+	}
+}
+
 /*
  * Searches for the first instruction. It stands on assumption,
  * that shader kernel is placed before sip within the bb.
@@ -431,6 +472,57 @@ static void set_breakpoint_once(struct xe_eudebug_debugger *d,
 	gpgpu_shader_destroy(kernel);
 }
 
+static void get_aips_offset_table(struct online_debug_data *data, int threads)
+{
+	size_t sz = sizeof(uint32_t);
+	uint32_t aip;
+	uint32_t first_aip;
+	int table_index = 0;
+
+	if (data->aips_offset_table)
+		return;
+
+	data->aips_offset_table = malloc(threads * sizeof(uint64_t));
+	igt_assert(data->aips_offset_table);
+
+	igt_assert_eq(pread(data->vm_fd, &first_aip, sz, data->target_offset), sz);
+	data->first_aip = first_aip;
+	data->aips_offset_table[table_index++] = 0;
+
+	fsync(data->vm_fd);
+	for (int i = 1; i < data->target_size; i++) {
+		igt_assert_eq(pread(data->vm_fd, &aip, sz, data->target_offset + i), sz);
+		if (aip == first_aip)
+			data->aips_offset_table[table_index++] = i;
+	}
+
+	igt_assert_eq(threads, table_index);
+
+	igt_debug("AIPs offset table:\n");
+	for (int i = 0; i < threads; i++) {
+		igt_debug("%lx\n", data->aips_offset_table[i]);
+	}
+}
+
+static int get_stepped_threads_count(struct online_debug_data *data, int threads)
+{
+	int count = 0;
+	size_t sz = sizeof(uint32_t);
+	uint32_t aip;
+
+	fsync(data->vm_fd);
+	for (int i = 0; i < threads; i++) {
+		igt_assert_eq(pread(data->vm_fd, &aip, sz,
+				    data->target_offset + data->aips_offset_table[i]), sz);
+		if (aip != data->first_aip) {
+			igt_assert(aip == data->first_aip + 0x10);
+			count++;
+		}
+	}
+
+	return count;
+}
+
 #define MAX_PREEMPT_TIMEOUT 10ull
 static void eu_attention_resume_trigger(struct xe_eudebug_debugger *d,
 					struct drm_xe_eudebug_event *e)
@@ -493,6 +585,98 @@ static void eu_attention_resume_trigger(struct xe_eudebug_debugger *d,
 	free(bitmask);
 }
 
+static void eu_attention_resume_single_step_trigger(struct xe_eudebug_debugger *d,
+						    struct drm_xe_eudebug_event *e)
+{
+	struct drm_xe_eudebug_event_eu_attention *att = (void *) e;
+	struct online_debug_data *data = d->ptr;
+	const int threads = get_number_of_threads(d->flags);
+	uint32_t val;
+	size_t sz = sizeof(uint32_t);
+
+	get_aips_offset_table(data, threads);
+
+	if (d->flags & TRIGGER_RESUME_PARALLEL_WALK) {
+		if (data->stepped_threads_count != -1)
+			if (data->steps_done < SINGLE_STEP_COUNT) {
+				int stepped_threads_count_after_resume =
+						get_stepped_threads_count(data, threads);
+				igt_debug("Stepped threads after: %d\n",
+					  stepped_threads_count_after_resume);
+
+				if (stepped_threads_count_after_resume == threads) {
+					data->first_aip += 0x10;
+					data->steps_done++;
+				}
+
+				igt_debug("Shader steps: %d\n", data->steps_done);
+				igt_assert(data->stepped_threads_count == 0);
+				igt_assert(stepped_threads_count_after_resume == threads);
+			}
+
+		if (data->steps_done < SINGLE_STEP_COUNT) {
+			data->stepped_threads_count = get_stepped_threads_count(data, threads);
+			igt_debug("Stepped threads before: %d\n", data->stepped_threads_count);
+		}
+
+		val = data->steps_done < SINGLE_STEP_COUNT ? STEERING_SINGLE_STEP :
+							     STEERING_CONTINUE;
+	} else if (d->flags & TRIGGER_RESUME_SINGLE_WALK) {
+		if (data->stepped_threads_count != -1)
+			if (data->steps_done < 2) {
+				int stepped_threads_count_after_resume =
+						get_stepped_threads_count(data, threads);
+				igt_debug("Stepped threads after: %d\n",
+					  stepped_threads_count_after_resume);
+
+				if (stepped_threads_count_after_resume == threads) {
+					data->first_aip += 0x10;
+					data->steps_done++;
+					free(data->single_step_bitmask);
+					data->single_step_bitmask = 0;
+				}
+
+				igt_debug("Shader steps: %d\n", data->steps_done);
+				igt_assert(data->stepped_threads_count +
+					   (intel_gen_needs_resume_wa(d->master_fd) ? 2 : 1) ==
+					   stepped_threads_count_after_resume);
+			}
+
+		if (data->steps_done < 2) {
+			data->stepped_threads_count = get_stepped_threads_count(data, threads);
+			igt_debug("Stepped threads before: %d\n", data->stepped_threads_count);
+			if (intel_gen_needs_resume_wa(d->master_fd)) {
+				if (!data->single_step_bitmask) {
+					data->single_step_bitmask = malloc(att->bitmask_size *
+									   sizeof(uint8_t));
+					igt_assert(data->single_step_bitmask);
+					memcpy(data->single_step_bitmask, att->bitmask,
+					       att->bitmask_size);
+				}
+
+				copy_first_bit(att->bitmask, data->single_step_bitmask,
+					       att->bitmask_size);
+			} else
+				copy_nth_bit(att->bitmask, att->bitmask, att->bitmask_size,
+					     data->stepped_threads_count + 1);
+		}
+
+		val = data->steps_done < 2 ? STEERING_SINGLE_STEP : STEERING_CONTINUE;
+	}
+
+	igt_assert_eq(pwrite(data->vm_fd, &val, sz,
+			     data->target_offset + steering_offset(threads)), sz);
+	fsync(data->vm_fd);
+
+	eu_ctl_resume(d->master_fd, d->fd, att->client_handle,
+		      att->exec_queue_handle, att->lrc_handle,
+		      att->bitmask, att->bitmask_size);
+
+	if (data->single_step_bitmask)
+		for (int i = 0; i < att->bitmask_size; i++)
+			data->single_step_bitmask[i] &= ~att->bitmask[i];
+}
+
 static void open_trigger(struct xe_eudebug_debugger *d,
 			 struct drm_xe_eudebug_event *e)
 {
@@ -614,7 +798,7 @@ static struct intel_bb *xe_bb_create_on_offset(int fd, uint32_t exec_queue, uint
 
 static void run_online_client(struct xe_eudebug_client *c)
 {
-	const int threads = c->flags & (TRIGGER_RESUME_ONE) ? 64 : 512;
+	int threads = get_number_of_threads(c->flags);
 	const uint64_t target_offset = 0x1a000000;
 	const uint64_t bb_offset = 0x1b000000;
 	const size_t bb_size = 4096;
@@ -651,7 +835,7 @@ static void run_online_client(struct xe_eudebug_client *c)
 	xe_device_get(fd);
 
 	/* Additional memory for steering control */
-	if (c->flags & SHADER_LOOP)
+	if (c->flags & SHADER_LOOP || c->flags & SHADER_SINGLE_STEP)
 		s_dim.y++;
 	buf = create_uc_buf(fd, s_dim.x, s_dim.y);
 
@@ -674,7 +858,7 @@ static void run_online_client(struct xe_eudebug_client *c)
 				     bb_offset, bb_size);
 	intel_bb_set_lr_mode(ibb, true);
 
-	sip = get_sip(fd);
+	sip = get_sip(fd, c->flags);
 	shader = get_shader(fd, c->flags);
 
 	igt_nsec_elapsed(&ts);
@@ -1275,6 +1459,46 @@ static void test_interrupt_reconnect(int fd, struct drm_xe_engine_class_instance
 	online_debug_data_destroy(data);
 }
 
+/**
+ * SUBTEST: single-step
+ * Description:
+ *	Schedules EU workload with 16 nops after breakpoint, then single-steps
+ *	through the shader, advances all threads each step, checking if all
+ *	threads advanced every step.
+ *
+ * SUBTEST: single-step-one
+ * Description:
+ *	Schedules EU workload with 16 nops after breakpoint, then single-steps
+ *	through the shader, advances one thread each step, checking if one
+ *	thread advanced every step. Due to the time constraint, only first two
+ *	shader instructions after breakpoint are validated.
+ */
+static void test_single_step(int fd, struct drm_xe_engine_class_instance *hwe, int flags)
+{
+	struct xe_eudebug_session *s;
+	struct online_debug_data *data;
+
+	data = online_debug_data_create(hwe);
+	s = xe_eudebug_session_create(fd, run_online_client, flags, data);
+
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_OPEN,
+					open_trigger);
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
+					eu_attention_debug_trigger);
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_EU_ATTENTION,
+					eu_attention_resume_single_step_trigger);
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_VM, vm_open_trigger);
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_METADATA,
+					create_metadata_trigger);
+	xe_eudebug_debugger_add_trigger(s->d, DRM_XE_EUDEBUG_EVENT_VM_BIND_UFENCE,
+					ufence_ack_trigger);
+
+	xe_eudebug_session_run(s);
+	online_session_check(s, s->flags);
+	xe_eudebug_session_destroy(s);
+	online_debug_data_destroy(data);
+}
+
 static struct drm_xe_engine_class_instance *pick_compute(int fd, int gt)
 {
 	struct drm_xe_engine_class_instance *hwe;
@@ -1340,6 +1564,14 @@ igt_main
 	test_gt_render_or_compute("interrupt-reconnect", fd, hwe)
 		test_interrupt_reconnect(fd, hwe, SHADER_LOOP | TRIGGER_RECONNECT);
 
+	test_gt_render_or_compute("single-step", fd, hwe)
+		test_single_step(fd, hwe, SHADER_SINGLE_STEP | SIP_SINGLE_STEP |
+				 TRIGGER_RESUME_PARALLEL_WALK);
+
+	test_gt_render_or_compute("single-step-one", fd, hwe)
+		test_single_step(fd, hwe, SHADER_SINGLE_STEP | SIP_SINGLE_STEP |
+				 TRIGGER_RESUME_SINGLE_WALK);
+
 	igt_fixture {
 		xe_eudebug_enable(fd, was_enabled);
 
-- 
2.34.1



More information about the igt-dev mailing list