[PATCH i-g-t 3/3] tests/xe/xe_perf_pmu: Test for engine busyness

Riana Tauro riana.tauro at intel.com
Fri Dec 8 08:52:51 UTC 2023


Engine busyness percentage is calculated by reading busy-ticks and
total-active-ticks. Add a test that runs a spinner and reads busy-ticks
and total active ticks to calculate engine busyness percentage
and checks if it is within tolerance.

% busyness = ((busy-ticks) * 100) / total-active-ticks

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 include/drm-uapi/xe_drm.h |  24 ++++++
 tests/intel/xe_perf_pmu.c | 151 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)

diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index 590f7b7af..e60b14ec4 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -1100,6 +1100,10 @@ struct drm_xe_wait_user_fence {
  *	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
  */
 
+enum drm_xe_pmu_engine_sample {
+	DRM_XE_PMU_SAMPLE_BUSY_TICKS = 0,
+};
+
 /*
  * Top bits of every counter are GT id.
  */
@@ -1108,10 +1112,30 @@ struct drm_xe_wait_user_fence {
 #define ___DRM_XE_PMU_OTHER(gt, x) \
 	(((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT))
 
+#define __DRM_XE_PMU_SAMPLE_BITS (4)
+#define __DRM_XE_PMU_SAMPLE_INSTANCE_BITS (8)
+#define __DRM_XE_PMU_CLASS_SHIFT \
+	(__DRM_XE_PMU_SAMPLE_BITS + __DRM_XE_PMU_SAMPLE_INSTANCE_BITS)
+
+/*
+ * Engine configs offset - 0x1000
+ */
+#define __DRM_XE_PMU_ENGINE_OFFSET(gt) \
+	(___DRM_XE_PMU_OTHER(gt, 0xfff) + 1)
+
+#define __DRM_XE_PMU_ENGINE(gt, class, instance, sample) \
+	(((class) << __DRM_XE_PMU_CLASS_SHIFT | \
+	(instance) << __DRM_XE_PMU_SAMPLE_BITS | \
+	(sample)) + __DRM_XE_PMU_ENGINE_OFFSET(gt))
+
+#define DRM_XE_PMU_ENGINE_BUSY_TICKS(gt, class, instance) \
+	__DRM_XE_PMU_ENGINE(gt, class, instance, DRM_XE_PMU_SAMPLE_BUSY_TICKS)
+
 #define DRM_XE_PMU_RENDER_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 0)
 #define DRM_XE_PMU_COPY_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 1)
 #define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt)		___DRM_XE_PMU_OTHER(gt, 2)
 #define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___DRM_XE_PMU_OTHER(gt, 3)
+#define DRM_XE_PMU_TOTAL_ACTIVE_TICKS(gt)	___DRM_XE_PMU_OTHER(gt, 4)
 
 #if defined(__cplusplus)
 }
diff --git a/tests/intel/xe_perf_pmu.c b/tests/intel/xe_perf_pmu.c
index 42cf62729..28122e7a7 100644
--- a/tests/intel/xe_perf_pmu.c
+++ b/tests/intel/xe_perf_pmu.c
@@ -45,6 +45,30 @@ static int open_pmu(int fd, uint64_t config)
 	return perf_fd;
 }
 
+static int open_group(int fd, uint64_t config, int group)
+{
+	int perf_fd;
+
+	perf_fd = igt_perf_open_group(xe_perf_type_id(fd), config, group);
+	igt_skip_on(perf_fd < 0 && errno == ENODEV);
+	igt_assert(perf_fd >= 0);
+
+	return perf_fd;
+}
+
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+
+	igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
+
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+
+	return buf[1];
+}
+
 static uint64_t engine_group_get_config(int gt, int class)
 {
 	uint64_t config;
@@ -66,6 +90,123 @@ static uint64_t engine_group_get_config(int gt, int class)
 	return config;
 }
 
+static unsigned int measured_usleep(unsigned int usec)
+{
+	struct timespec ts = { };
+	unsigned int slept;
+
+	slept = igt_nsec_elapsed(&ts);
+	igt_assert(slept == 0);
+	do {
+		usleep(usec - slept);
+		slept = igt_nsec_elapsed(&ts) / 1000;
+	} while (slept < usec);
+
+	return igt_nsec_elapsed(&ts);
+}
+
+/**
+ * SUBTEST: engine-busy-ticks
+ * Description:
+ *      Run a test to measure the busy-ticks for each engine
+ *      and total active ticks by running a spinner. Calculate
+ *      the busy percentage. Check if the engine busyness%
+ *      is within tolerance
+ * Run type: FULL
+ */
+static void test_engine_busy_ticks(int fd, struct drm_xe_engine_class_instance *eci)
+{
+	uint64_t addr = 0x1a0000;
+	struct drm_xe_sync sync[2] = {
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+	};
+	struct drm_xe_exec exec = {
+		.num_batch_buffer = 1,
+		.num_syncs = 2,
+		.syncs = to_user_pointer(sync),
+	};
+	struct xe_spin_opts spin_opts = { .addr = addr, .preempt = false };
+	struct xe_spin *spin;
+	uint64_t busy_ticks, total_active_ticks;
+	uint64_t before[2], after[2];
+	double busy_percent;
+	uint32_t exec_queue;
+	uint32_t syncobj;
+	uint32_t bo = 0;
+	size_t bo_size;
+	int pmu_fd[2];
+	uint32_t vm;
+
+	pmu_fd[0] = open_group(fd, DRM_XE_PMU_ENGINE_BUSY_TICKS(eci->gt_id,
+								eci->engine_class,
+								eci->engine_instance), -1);
+	pmu_fd[1] = open_group(fd, DRM_XE_PMU_TOTAL_ACTIVE_TICKS(eci->gt_id), pmu_fd[0]);
+
+	vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT, 0);
+	bo_size = sizeof(*spin);
+	bo_size = ALIGN(bo_size + xe_cs_prefetch_size(fd),
+			xe_get_default_alignment(fd));
+
+	bo = xe_bo_create(fd, vm, bo_size, vram_if_possible(fd, eci->gt_id), 0);
+	spin = xe_bo_map(fd, bo, bo_size);
+
+	exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
+	syncobj = syncobj_create(fd, 0);
+
+	sync[0].handle = syncobj_create(fd, 0);
+	xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
+
+	xe_spin_init(spin, &spin_opts);
+
+	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].handle = syncobj;
+
+	exec.exec_queue_id = exec_queue;
+	exec.address = addr;
+	xe_exec(fd, &exec);
+
+	xe_spin_wait_started(spin);
+
+	pmu_read_multi(pmu_fd[0], 2, before);
+	measured_usleep(500000); /* 500ms */
+	pmu_read_multi(pmu_fd[0], 2, after);
+
+	igt_assert(!syncobj_wait(fd, &syncobj, 1, 1, 0, NULL));
+	xe_spin_end(spin);
+
+	igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
+	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+	busy_ticks = after[0] - before[0];
+	total_active_ticks = after[1] - before[1];
+
+	busy_percent = (double)(busy_ticks * 100) / (double)total_active_ticks;
+
+	igt_debug("busy-ticks:  after %ld, before %ld delta %ld\n",
+		  after[0], before[0], busy_ticks);
+	igt_debug("total-active-ticks: after %ld, before %ld delta %ld\n",
+		  after[1], before[1], total_active_ticks);
+	igt_debug("busyness percent %f\n", busy_percent);
+
+	sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1);
+	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+	syncobj_destroy(fd, sync[0].handle);
+	syncobj_destroy(fd, syncobj);
+
+	xe_exec_queue_destroy(fd, exec_queue);
+	munmap(spin, bo_size);
+	gem_close(fd, bo);
+	xe_vm_destroy(fd, vm);
+	close(pmu_fd[0]);
+	close(pmu_fd[1]);
+
+	igt_assert(busy_percent >= 90 && busy_percent <= 110);
+}
+
 /**
  * Test: Basic test for measure the active time when engine of any class active
  *
@@ -324,6 +465,16 @@ igt_main
 		xe_for_each_engine(fd, hwe)
 			test_any_engine_busyness(fd, hwe);
 
+	igt_describe("Validate engine busyness");
+	igt_subtest("engine-busy-ticks") {
+		char buf[80];
+		bool has_busy_ticks = igt_perf_event_found(xe_perf_device
+							  (fd, buf, sizeof(buf)), "busy-ticks");
+		igt_require(has_busy_ticks);
+		xe_for_each_engine(fd, hwe)
+			test_engine_busy_ticks(fd, hwe);
+	}
+
 	igt_fixture {
 		xe_device_put(fd);
 		close(fd);
-- 
2.40.0



More information about the igt-dev mailing list