[PATCH i-g-t 3/3] tests/intel/xe_compute: Added eu-busy-10s test
nishit.sharma at intel.com
nishit.sharma at intel.com
Mon Aug 18 07:54:46 UTC 2025
From: Nishit Sharma <nishit.sharma at intel.com>
Added multi threading to utilize available CCS instances as per num_slices
available in /sys/class/drm/cardX/device/tileX/gtX/num_cslices.
Depending upon num_slices, number of threads will be created and
schedule compute workload in different CCS instances. Sleep is
called via loop_kernel_duration so that HW engine runs for sleep duration.
During execution per EU utlization percentage is grabbed and displayed
on console.
Signed-off-by: Nishit Sharma <nishit.sharma at intel.com>
---
tests/intel/igt_eu_perf.h | 255 ++++++++++++++++++++++++++++++++++++++
tests/intel/xe_compute.c | 222 +++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
3 files changed, 478 insertions(+)
create mode 100644 tests/intel/igt_eu_perf.h
diff --git a/tests/intel/igt_eu_perf.h b/tests/intel/igt_eu_perf.h
new file mode 100644
index 000000000..b2758a941
--- /dev/null
+++ b/tests/intel/igt_eu_perf.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef IGT_EU_PERF_H
+#define IGT_EU_PERF_H
+
+#include "igt_perf.h"
+
+#define engine_ptr(pmu_device, n) (&(pmu_device)->engine + (n))
+
+bool eu_busy, stop_top;
+double eu_perc;
+
+struct xe_pmu_pair {
+ uint64_t cur;
+ uint64_t prev;
+};
+
+struct xe_pmu_counter {
+ uint64_t type;
+ uint64_t config;
+ unsigned int idx;
+ struct xe_pmu_pair val;
+ int fd;
+ bool present;
+};
+
+struct xe_engine {
+ const char *name;
+ struct drm_xe_engine_class_instance drm_xe_engine;
+ struct xe_pmu_counter engine_active_ticks;
+ struct xe_pmu_counter engine_total_ticks;
+};
+
+struct xe_pmu_device {
+ unsigned int num_engines;
+ unsigned int num_counters;
+ int fd;
+ char *device;
+ struct xe_engine engine;
+};
+
+struct thread_data {
+ pthread_t thread;
+ pthread_mutex_t *mutex;
+ pthread_cond_t *cond;
+ int class;
+ int fd;
+ int gt;
+ struct user_execenv *execenv;
+ struct drm_xe_engine_class_instance *eci;
+ bool *go;
+};
+
+static char *pmu_name(int fd)
+{
+ char device[30];
+
+ xe_perf_device(fd, device, sizeof(device));
+
+ return strdup(device);
+}
+
+static int
+_open_pmu(uint64_t type, unsigned int *cnt,
+ struct xe_pmu_counter *pmu, int *fd)
+{
+ int fd__ = igt_perf_open_group(type, pmu->config, *fd);
+
+ if (fd__ >= 0) {
+ if (*fd == -1)
+ *fd = fd__;
+ pmu->present = true;
+ pmu->idx = (*cnt)++;
+ pmu->fd = fd__;
+ }
+
+ return fd__;
+}
+
+static struct xe_pmu_device
+*xe_init_engines(int dev_fd)
+{
+ struct xe_pmu_device *engines;
+ int ret = 0, engine_count = 0;
+ char device[30];
+ struct drm_xe_engine_class_instance *hwe;
+ int fd;
+ uint32_t engine_class, engine_instance, gt_shift;
+ uint64_t engine_active_config, engine_total_config;
+ uint64_t type = igt_perf_type_id(xe_perf_device(dev_fd, device, sizeof(device)));
+
+ xe_device_get(dev_fd);
+
+ xe_for_each_engine(dev_fd, hwe)
+ if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
+ engine_count++;
+
+ engines = calloc(1, sizeof(struct xe_pmu_device) +
+ engine_count * sizeof(struct xe_engine));
+ if (!engines)
+ return NULL;
+
+ engines->num_engines = 0;
+ engines->device = pmu_name(dev_fd);
+ engines->fd = -1;
+ engines->num_counters = 0;
+
+ perf_event_format(engines->device, "gt", >_shift);
+ perf_event_format(engines->device, "engine_class", &engine_class);
+ perf_event_format(engines->device, "engine_instance", &engine_instance);
+ ret = perf_event_config(engines->device,
+ "engine-active-ticks",
+ &engine_active_config);
+ if (ret < 0)
+ return NULL;
+ ret = perf_event_config(engines->device,
+ "engine-total-ticks",
+ &engine_total_config);
+ if (ret < 0)
+ return NULL;
+ xe_for_each_engine(dev_fd, hwe) {
+ if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE) {
+ uint64_t param_config;
+ struct xe_engine *engine;
+
+ engine = engine_ptr(engines, engines->num_engines);
+ param_config = (uint64_t)hwe->gt_id << gt_shift |
+ hwe->engine_class << engine_class |
+ hwe->engine_instance << engine_instance;
+ engine->drm_xe_engine = *hwe;
+ engine->engine_active_ticks.config = engine_active_config | param_config;
+ engine->engine_total_ticks.config = engine_total_config | param_config;
+
+ if (engine->engine_active_ticks.config == -1 ||
+ engine->engine_total_ticks.config == -1) {
+ ret = ENOENT;
+ break;
+ }
+
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_active_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return NULL;
+
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_total_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return NULL;
+
+ engines->num_engines++;
+ }
+ }
+
+ return engines;
+}
+
+static void
+eu_util_free(struct xe_pmu_device *pmu_device)
+{
+ struct xe_engine *eng;
+ struct xe_pmu_counter pmu;
+
+ igt_info("EU cleanup process\n");
+
+ if (pmu_device) {
+ for (int j = 0; j < pmu_device->num_engines ; j++) {
+ eng = engine_ptr(pmu_device, j);
+ pmu = eng->engine_active_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+
+ pmu = eng->engine_total_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+ }
+ free(pmu_device);
+ }
+}
+
+static void
+update_sample(struct xe_pmu_counter *counter, uint64_t *val)
+{
+ if (counter->present) {
+ counter->val.prev = counter->val.cur;
+ counter->val.cur = val[counter->idx];
+ }
+}
+
+static void xe_pmu_device_sample(const void *obj)
+{
+ struct xe_pmu_device *engines = ((struct xe_pmu_device *)obj);
+ const int num_val = engines->num_counters;
+ uint64_t val[2 + num_val];
+ uint64_t buf[2 + num_val];
+ unsigned int i;
+ ssize_t len;
+
+ memset(buf, 0, sizeof(buf));
+ len = read(engines->fd, buf, sizeof(buf));
+ assert(len == sizeof(buf));
+
+ for (i = 0; i < num_val; i++)
+ val[i] = buf[2 + i];
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct xe_engine *engine = engine_ptr(engines, i);
+
+ update_sample(&engine->engine_active_ticks, val);
+ update_sample(&engine->engine_total_ticks, val);
+ }
+}
+
+static double
+pmu_active_percentage(struct xe_engine *engine)
+{
+ double pmu_active_ticks = engine->engine_active_ticks.val.cur -
+ engine->engine_active_ticks.val.prev;
+ double pmu_total_ticks = engine->engine_total_ticks.val.cur -
+ engine->engine_total_ticks.val.prev;
+ double percentage;
+
+ percentage = (pmu_active_ticks * 100) / pmu_total_ticks;
+ return percentage;
+}
+
+static void xe_print_perc(const void *obj)
+{
+ struct xe_pmu_device *pmu_device = ((struct xe_pmu_device *)obj);
+
+ for (unsigned int i = 0; i < pmu_device->num_engines; i++) {
+ double percentage;
+ struct xe_engine *engine = engine_ptr(pmu_device, i);
+
+ igt_assert(engine);
+
+ percentage = pmu_active_percentage(engine);
+ eu_perc = percentage;
+
+ igt_debug("Engine_instance :%d EU busyness :%5.1f\n",
+ engine->drm_xe_engine.engine_instance, percentage);
+ if (!percentage)
+ igt_debug("No workload scheduled, BU busyness :%5.1f expected\n",
+ percentage);
+ else if (percentage > 10.0) {
+ eu_busy = false;
+ igt_info("Workload scheduled, ccs engine :%d utilized :%5.1f\n",
+ engine->drm_xe_engine.engine_instance, percentage);
+ }
+ }
+}
+#endif
+
diff --git a/tests/intel/xe_compute.c b/tests/intel/xe_compute.c
index eec5fe37f..97cd62036 100644
--- a/tests/intel/xe_compute.c
+++ b/tests/intel/xe_compute.c
@@ -19,6 +19,8 @@
#include "xe/xe_ioctl.h"
#include "xe/xe_query.h"
+#include "igt_eu_perf.h"
+
#define LOOP_DURATION (1000000ull)
static int gt_sysfs_open(int gt)
@@ -241,6 +243,222 @@ test_compute_kernel_loop(uint64_t loop_duration)
drm_close_driver(fd);
}
+static void
+*intel_compute_thread(void *data)
+{
+ struct thread_data *t = (struct thread_data *)data;
+
+ pthread_mutex_lock(t->mutex);
+ while (*t->go == 0)
+ pthread_cond_wait(t->cond, t->mutex);
+ pthread_mutex_unlock(t->mutex);
+
+ igt_info("Compute kernel executing on engine class :%s instance :%d gt: GT-%d\n",
+ xe_engine_class_string(t->eci->engine_class), t->eci->engine_instance,
+ t->eci->gt_id);
+
+ igt_assert_f(xe_run_intel_compute_kernel_on_engine(t->fd,
+ t->eci,
+ t->execenv,
+ EXECENV_PREF_VRAM_IF_POSSIBLE),
+ "Unable to run compute kernel successfully\n");
+ return NULL;
+}
+
+static void *show_eu_util(void *data)
+{
+ struct xe_pmu_device *pmu_device = NULL;
+ int dev_fd;
+ long n;
+
+ n = -1;
+ dev_fd = drm_open_driver(DRIVER_XE);
+
+ pmu_device = xe_init_engines(dev_fd);
+ if (!pmu_device) {
+ fprintf(stderr,
+ "Failed to initialize engines! (%s)\n",
+ strerror(errno));
+ drm_close_driver(dev_fd);
+ return NULL;
+ }
+
+ xe_pmu_device_sample(pmu_device);
+
+ while ((n != 0) && !stop_top) {
+ xe_pmu_device_sample(pmu_device);
+ xe_print_perc(pmu_device);
+ usleep(2 * LOOP_DURATION);
+ }
+ eu_util_free(pmu_device);
+ drm_close_driver(dev_fd);
+ return NULL;
+}
+
+static void
+thread_init_eu_utils(void)
+{
+ pthread_t eu_utils;
+
+ pthread_create(&eu_utils, NULL, show_eu_util, NULL);
+}
+
+/* To avoid calling igt_skip in threads which can cause issue */
+static void
+igt_check_supported_pipeline(void)
+{
+ int fd;
+ unsigned int ip_ver;
+ const struct intel_compute_kernels *kernels;
+ struct user_execenv execenv = { 0 };
+ struct drm_xe_engine_class_instance *hwe;
+
+ fd = drm_open_driver(DRIVER_XE);
+ ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
+ kernels = intel_compute_square_kernels;
+
+ while (kernels->kernel) {
+ if (ip_ver == kernels->ip_ver)
+ break;
+ kernels++;
+ }
+
+ /* loop_kernel_duration used as sleep to make EU busy for loop_duration */
+ execenv.loop_kernel_duration = 1000000;
+ execenv.kernel = kernels->loop_kernel;
+ execenv.kernel_size = kernels->loop_kernel_size;
+
+ xe_for_each_engine(fd, hwe) {
+ if (hwe->engine_class != DRM_XE_ENGINE_CLASS_COMPUTE)
+ continue;
+
+ igt_assert_f(xe_run_intel_compute_kernel_on_engine(fd, hwe, &execenv, EXECENV_PREF_SYSTEM),
+ "Unable to run compute kernel successfully\n");
+ }
+ drm_close_driver(fd);
+}
+
+/**
+ * SUBTEST: eu-busy-10s
+ * Functionality: OpenCL kernel
+ * Description: Run loop_kernel for 10 seconds and observe EU business
+ */
+static void
+test_eu_busy(int fd, uint64_t duration_sec)
+{
+ struct user_execenv execenv = { 0 };
+ struct thread_data *threads_data;
+ struct drm_xe_engine_class_instance *hwe;
+ const struct intel_compute_kernels *kernels;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ u32 gt, n_threads = 0, iterations = 0, n_instances = 0, i;
+ bool go = false;
+ int ccs_mode, gt_fd;
+ u32 num_slices, ip_ver;
+ uint64_t gt_mask = get_gt_mask();
+
+ for_each_bit(gt_mask, gt) {
+ if (!get_num_cslices(gt, &num_slices))
+ continue;
+
+ gt_fd = gt_sysfs_open(gt);
+ igt_assert(igt_sysfs_printf(gt_fd, "ccs_mode", "%u", num_slices) > 0);
+ igt_assert(igt_sysfs_scanf(gt_fd, "ccs_mode", "%u", &ccs_mode) > 0);
+ close(gt_fd);
+ }
+
+ igt_skip_on_f(ccs_mode <= 1, "Skipping test as ccs_mode <=1 not matching criteria :%d\n",
+ ccs_mode);
+
+ thread_init_eu_utils();
+ usleep(2 * LOOP_DURATION);
+
+ fd = drm_open_driver(DRIVER_XE);
+ ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
+ kernels = intel_compute_square_kernels;
+ while (kernels->kernel) {
+ if (ip_ver == kernels->ip_ver)
+ break;
+ kernels++;
+ }
+
+ /*
+ * User should use different kernel if loop_kernel_duration not set
+ * With loop kernel and loop duration it assumes we stop it via memory write
+ *
+ */
+
+ execenv.loop_kernel_duration = duration_sec;
+ execenv.kernel = kernels->loop_kernel;
+ execenv.kernel_size = kernels->loop_kernel_size;
+ drm_close_driver(fd);
+
+ /*
+ * Calling igt_skip in threads can cause issue during execution
+ * To avoid this check igt_check_supported_pipeline function called
+ * to make sure igt_skip happens before creating threads
+ *
+ */
+ if (ip_ver < IP_VER(20, 0))
+ igt_check_supported_pipeline();
+
+ gt_mask = get_gt_mask();
+
+ for_each_bit(gt_mask, gt) {
+ fd = drm_open_driver(DRIVER_XE);
+ xe_for_each_engine(fd, hwe) {
+ igt_assert(hwe);
+ if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
+ ++n_instances;
+ }
+ drm_close_driver(fd);
+ }
+
+ threads_data = calloc(n_instances, sizeof(*threads_data));
+ igt_assert(threads_data);
+
+ pthread_mutex_init(&mutex, 0);
+ pthread_cond_init(&cond, 0);
+
+ gt_mask = get_gt_mask();
+ fd = drm_open_driver(DRIVER_XE);
+ for_each_bit(gt_mask, gt) {
+ xe_for_each_engine(fd, hwe) {
+ if (hwe->gt_id != gt ||
+ hwe->engine_class != DRM_XE_ENGINE_CLASS_COMPUTE)
+ continue;
+
+ threads_data[i].mutex = &mutex;
+ threads_data[i].cond = &cond;
+ threads_data[i].fd = fd;
+ threads_data[i].eci = hwe;
+ threads_data[i].go = &go;
+ threads_data[i].execenv = &execenv;
+ ++n_threads;
+ pthread_create(&threads_data[i].thread, 0, intel_compute_thread,
+ &threads_data[i]);
+ ++i;
+ ++iterations;
+ }
+ pthread_mutex_lock(&mutex);
+ go = true;
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mutex);
+
+ for (int val = 0; val < i; ++val)
+ pthread_join(threads_data[val].thread, NULL);
+
+ i = 0;
+ n_threads = 0;
+ iterations = 0;
+ stop_top = true;
+ eu_busy = true;
+ }
+ free(threads_data);
+ drm_close_driver(fd);
+}
+
/**
* SUBTEST: compute-square
* Mega feature: WMTP
@@ -282,4 +500,8 @@ igt_main
/* To test compute function stops after loop_kernel_duration */
igt_subtest("loop-duration-2s")
test_compute_kernel_loop(2 * LOOP_DURATION);
+
+ /* test to check available EU utilisation in multi-ccs case */
+ igt_subtest("eu-busy-10s")
+ test_eu_busy(xe, 10 * LOOP_DURATION);
}
diff --git a/tests/meson.build b/tests/meson.build
index 5c01c64e9..a4f3f3dc0 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -408,6 +408,7 @@ extra_dependencies = {
'sw_sync': [ libatomic ],
'xe_fault_injection': [ lib_igt_xe_oa ],
'xe_oa': [ lib_igt_xe_oa ],
+ 'xe_compute': [ igt_deps,lib_igt_perf,lib_igt_profiling,math ],
}
test_executables = []
--
2.43.0
More information about the igt-dev
mailing list