[PATCH i-g-t 2/2] tests/intel/xe_compute: Add Compute workload Scheduling and Display EU busyness
nishit.sharma at intel.com
nishit.sharma at intel.com
Tue Jun 10 13:53:18 UTC 2025
From: Nishit Sharma <nishit.sharma at intel.com>
Adds compute workload scheduling and execution on multi-ccs available.
This also adds graphical respresentation of per engine busyness while
workload is running on multiple CCS engine instances.
Signed-off-by: Nishit Sharma <nishit.sharma at intel.com>
---
lib/intel_compute.c | 29 +-
lib/intel_compute.h | 2 +
tests/intel/xe_compute.c | 755 +++++++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
4 files changed, 784 insertions(+), 3 deletions(-)
diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index bfb9024ba..252fa2f81 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -249,8 +249,14 @@ static void bo_execenv_bind(struct bo_execenv *execenv,
break;
}
- bo_dict[i].handle = xe_bo_create(fd, execenv->vm, bo_dict[i].size,
- placement, flags);
+ if (!execenv->user)
+ bo_dict[i].handle = xe_bo_create(fd, execenv->vm, bo_dict[i].size,
+ placement, flags);
+ else
+ bo_dict[i].handle = xe_bo_create_caching(fd, execenv->vm,
+ bo_dict[i].size,
+ placement, flags,
+ DRM_XE_GEM_CPU_CACHING_WC);
bo_dict[i].data = xe_bo_map(fd, bo_dict[i].handle, bo_dict[i].size);
xe_vm_bind_async(fd, vm, 0, bo_dict[i].handle, 0, bo_dict[i].addr,
bo_dict[i].size, &sync, 1);
@@ -1788,6 +1794,15 @@ static void xelpg_compute_exec(int fd, const unsigned char *kernel,
bo_execenv_destroy(&execenv);
}
+static void bo_execenv_exec_async_wait(struct bo_execenv *execenv)
+{
+ xe_wait_ufence(execenv->fd, &execenv->bo_sync->sync, USER_FENCE_VALUE,
+ execenv->exec_queue, INT64_MAX);
+
+ munmap(execenv->bo_sync, sizeof(*execenv->bo_sync));
+ gem_close(execenv->fd, execenv->bo);
+}
+
/**
* xe2lpg_compute_exec - run a pipeline compatible with XE2
*
@@ -1867,7 +1882,15 @@ static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
OFFSET_KERNEL, 0, false,
execenv.array_size);
- bo_execenv_exec(&execenv, ADDR_BATCH);
+ if (!user)
+ bo_execenv_exec(&execenv, ADDR_BATCH);
+ else if (user->loop_kernel_duration) {
+ bo_execenv_exec_async(&execenv, ADDR_BATCH);
+ igt_measured_usleep(user->loop_kernel_duration);
+ ((int *)bo_dict[4].data)[0] = MAGIC_LOOP_STOP;
+ bo_execenv_exec_async_wait(&execenv);
+ user->skip_results_check = 1;
+ }
for (int i = 0; i < execenv.array_size; i++) {
float input = input_data[i];
diff --git a/lib/intel_compute.h b/lib/intel_compute.h
index 412791d07..19977933f 100644
--- a/lib/intel_compute.h
+++ b/lib/intel_compute.h
@@ -63,6 +63,8 @@ struct user_execenv {
uint64_t input_addr;
/** @output_addr: override default address of the output array if provided */
uint64_t output_addr;
+ /** @loop_kernel_duration: duration till kernel should execute in gpu **/
+ uint32_t loop_kernel_duration;
};
enum execenv_alloc_prefs {
diff --git a/tests/intel/xe_compute.c b/tests/intel/xe_compute.c
index 955edf082..31ad13a5d 100644
--- a/tests/intel/xe_compute.c
+++ b/tests/intel/xe_compute.c
@@ -12,6 +12,7 @@
*/
#include <string.h>
+#include <sys/ioctl.h>
#include "igt.h"
#include "igt_sysfs.h"
@@ -19,6 +20,51 @@
#include "xe/xe_ioctl.h"
#include "xe/xe_query.h"
+#include "tools/gputop/utils.h"
+#include "tools/gputop/xe_gputop.h"
+#include "igt_drm_clients.h"
+
+static const char * const drivers[] = {
+ "xe",
+ /* Keep the last one as NULL */
+ NULL
+};
+
+/**
+ * Number of supported drivers needs to be adjusted as per the length of
+ * the drivers[] array.
+ */
+#define NUM_DRIVER 1
+#define LOOP_DURATION (1000000ull)
+#define engine_ptr(engines, n) (&(engines)->engine + (n))
+
+static const char * const bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+/*
+ * devices[] array of type struct gputop_device
+ */
+struct gputop_device devices[] = {
+ {false, 0, NULL}
+};
+
+enum utilization_type {
+ UTILIZATION_TYPE_ENGINE_TIME,
+ UTILIZATION_TYPE_TOTAL_CYCLES,
+};
+
+pthread_barrier_t barrier;
+struct thread_data {
+ pthread_t thread;
+ pthread_mutex_t *mutex;
+ pthread_cond_t *cond;
+ int class;
+ int fd;
+ int gt;
+ struct user_execenv *execenv;
+ struct drm_xe_engine_class_instance *eci;
+ bool *go;
+};
+
static int gt_sysfs_open(int gt)
{
int fd, gt_fd;
@@ -178,6 +224,711 @@ test_compute_square(int fd)
"GPU not supported\n");
}
+static void
+*intel_compute_thread(void *data)
+{
+ struct thread_data *t = (struct thread_data *)data;
+
+ igt_info("Compute kernel executing on engine class :%s instance :%d gt: GT-%d\n",
+ xe_engine_class_string(t->eci->engine_class), t->eci->engine_instance,
+ t->eci->gt_id);
+
+ pthread_mutex_lock(t->mutex);
+ while (*t->go == 0)
+ pthread_cond_wait(t->cond, t->mutex);
+ pthread_mutex_unlock(t->mutex);
+
+ igt_assert_f(xe_run_intel_compute_kernel_on_engine(t->fd,
+ t->eci,
+ t->execenv,
+ EXECENV_PREF_VRAM_IF_POSSIBLE),
+ "Unable to run compute kernel successfully\n");
+ return NULL;
+}
+
+static volatile bool stop_top;
+
+static void
+update_console_size(int *w, int *h)
+{
+ struct winsize ws = {};
+
+ if (ioctl(0, TIOCGWINSZ, &ws) == -1)
+ return;
+
+ *w = ws.ws_col;
+ *h = ws.ws_row;
+
+ if (*w == 0 && *h == 0) {
+ /* Serial console. */
+ *w = 80;
+ *h = 24;
+ }
+}
+
+static int
+__client_id_cmp(const struct igt_drm_client *a,
+ const struct igt_drm_client *b)
+{
+ if (a->id > b->id)
+ return 1;
+ else if (a->id < b->id)
+ return -1;
+ else
+ return 0;
+}
+
+static int
+client_cmp(const void *_a, const void *_b, void *unused)
+{
+ const struct igt_drm_client *a = _a;
+ const struct igt_drm_client *b = _b;
+ long val_a, val_b;
+
+ /* DRM cards into consecutive buckets first. */
+ val_a = a->drm_minor;
+ val_b = b->drm_minor;
+ if (val_a > val_b)
+ return 1;
+ else if (val_b > val_a)
+ return -1;
+
+ /*
+ * Within buckets sort by last sampling period aggregated runtime, with
+ * client id as a tie-breaker.
+ */
+ val_a = a->agg_delta_engine_time;
+ val_b = b->agg_delta_engine_time;
+ if (val_a == val_b)
+ return __client_id_cmp(a, b);
+ else if (val_b > val_a)
+ return 1;
+ else
+ return -1;
+
+}
+
+static void clrscr(void)
+{
+ printf("\033[H\033[J");
+}
+
+static int
+pmu_format_shift(int xe, const char *name)
+{
+ uint32_t start;
+ int format;
+ char device[80];
+
+ format = perf_event_format(xe_perf_device(xe, device, sizeof(device)),
+ name, &start);
+ if (format)
+ return 0;
+
+ return start;
+}
+
+static const char
+*class_display_name(unsigned int class)
+{
+ switch (class) {
+ case DRM_XE_ENGINE_CLASS_RENDER:
+ return "Render/3D";
+ case DRM_XE_ENGINE_CLASS_COPY:
+ return "Blitter";
+ case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
+ return "Video";
+ case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return "VideoEnhance";
+ case DRM_XE_ENGINE_CLASS_COMPUTE:
+ return "Compute";
+ default:
+ return "[unknown]";
+ }
+}
+
+static int
+engine_cmp(const void *__a, const void *__b)
+{
+ const struct xe_engine *a = (struct xe_engine *)__a;
+ const struct xe_engine *b = (struct xe_engine *)__b;
+
+ if (a->drm_xe_engine.engine_class != b->drm_xe_engine.engine_class)
+ return a->drm_xe_engine.engine_class - b->drm_xe_engine.engine_class;
+ else
+ return a->drm_xe_engine.engine_instance - b->drm_xe_engine.engine_instance;
+}
+
+static void
+*xe_init_engines(const void *obj)
+{
+ struct igt_device_card *card = ((struct xe_gputop *)obj)->card;
+ struct xe_pmu_device *engines;
+ int ret = 0;
+ char device[30];
+ struct drm_xe_engine_class_instance *hwe;
+ int card_fd;
+ uint64_t engine_class, engine_instance, gt_shift;
+ uint64_t engine_active_config, engine_total_config;
+
+ if (!card || !strlen(card->card) || !strlen(card->render))
+ return NULL;
+
+ if (strlen(card->card)) {
+ card_fd = igt_open_card(card);
+ } else if (strlen(card->render)) {
+ card_fd = igt_open_render(card);
+ } else {
+ fprintf(stderr, "Failed to detect device!\n");
+ return NULL;
+ }
+ xe_device_get(card_fd);
+ engines = malloc(sizeof(struct xe_pmu_device) +
+ xe_number_engines(card_fd) * sizeof(struct xe_engine));
+ if (!engines)
+ return NULL;
+
+ memset(engines, 0, sizeof(struct xe_pmu_device) +
+ xe_number_engines(card_fd) * sizeof(struct xe_engine));
+
+ engines->num_engines = 0;
+ engines->device = ((struct xe_gputop *)obj)->pmu_device;
+ gt_shift = pmu_format_shift(card_fd, "gt");
+ engine_class = pmu_format_shift(card_fd, "engine_class");
+ engine_instance = pmu_format_shift(card_fd, "engine_instance");
+ xe_perf_device(card_fd, device, sizeof(device));
+ ret = perf_event_config(device,
+ "engine-active-ticks",
+ &engine_active_config);
+ if (ret < 0)
+ return NULL;
+ ret = perf_event_config(device,
+ "engine-total-ticks",
+ &engine_total_config);
+ if (ret < 0)
+ return NULL;
+ xe_for_each_engine(card_fd, hwe) {
+ uint64_t param_config;
+ struct xe_engine *engine;
+
+ engine = engine_ptr(engines, engines->num_engines);
+ param_config = (uint64_t)hwe->gt_id << gt_shift | hwe->engine_class << engine_class
+ | hwe->engine_instance << engine_instance;
+ engine->drm_xe_engine = *hwe;
+ engine->engine_active_ticks.config = engine_active_config | param_config;
+ engine->engine_total_ticks.config = engine_total_config | param_config;
+
+ if (engine->engine_active_ticks.config == -1 ||
+ engine->engine_total_ticks.config == -1) {
+ ret = ENOENT;
+ break;
+ }
+
+ ret = asprintf(&engine->display_name, "%s/%u",
+ class_display_name(engine->drm_xe_engine.engine_class),
+ engine->drm_xe_engine.engine_instance);
+
+ if (ret <= 0) {
+ ret = errno;
+ break;
+ }
+
+ engines->num_engines++;
+ }
+
+ if (!ret) {
+ errno = ret;
+ return NULL;
+ }
+
+ qsort(engine_ptr(engines, 0), engines->num_engines,
+ sizeof(struct xe_engine), engine_cmp);
+
+ ((struct xe_gputop *)obj)->eng_obj = engines;
+
+ return engines;
+}
+
+static int
+_open_pmu(uint64_t type, unsigned int *cnt,
+ struct xe_pmu_counter *pmu, int *fd)
+{
+ int fd__ = igt_perf_open_group(type, pmu->config, *fd);
+
+ if (fd__ >= 0) {
+ if (*fd == -1)
+ *fd = fd__;
+ pmu->present = true;
+ pmu->idx = (*cnt)++;
+ pmu->fd = fd__;
+ }
+
+ return fd__;
+}
+
+int xe_pmu_init(const void *obj)
+{
+ struct xe_pmu_device *engines = ((struct xe_gputop *)obj)->eng_obj;
+ unsigned int i;
+ int fd;
+ struct xe_engine *engine;
+ uint64_t type = igt_perf_type_id(engines->device);
+
+ engines->fd = -1;
+ engines->num_counters = 0;
+
+ for (i = 0; i < engines->num_engines; i++) {
+ engine = engine_ptr(engines, i);
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_active_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return -1;
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_total_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return -1;
+ }
+ return 0;
+}
+
+void xe_clean_up(void *obj, int len)
+{
+ struct xe_engine *eng;
+ struct xe_pmu_counter pmu;
+ struct xe_gputop *dev = (struct xe_gputop *)obj;
+
+ for (int i = 0; i < len; i++) {
+ if ((dev + i)->card)
+ free((dev + i)->card);
+ if ((dev + i)->eng_obj) {
+
+ for (int j = 0; j < ((struct xe_pmu_device *)(dev + i)->eng_obj)->num_engines ; j++) {
+ eng = engine_ptr((struct xe_pmu_device *)(dev + i)->eng_obj, j);
+ if (eng->display_name)
+ free(eng->display_name);
+
+ pmu = eng->engine_active_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+
+ pmu = eng->engine_total_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+ }
+ free(dev->eng_obj);
+ }
+ if ((dev + i)->pmu_device)
+ free(dev->pmu_device);
+ }
+}
+
+static void
+eu_util_free(void)
+{
+ for (int i = 0; drivers[i]; i++) {
+ xe_clean_up(devices[i].instances, devices[i].len);
+ free(devices[i].instances);
+ devices[i].driver_present = false;
+ devices[i].len = 0;
+ }
+}
+
+static int
+find_driver(struct igt_device_card *card)
+{
+ for (int i = 0; drivers[i]; i++) {
+ if (strcmp(drivers[i], card->driver) == 0)
+ return i;
+ }
+ return -1;
+}
+
+static char
+*pmu_name(struct igt_device_card *card)
+{
+ int card_fd;
+ char device[30];
+ char *path;
+
+ if (strlen(card->card))
+ card_fd = igt_open_card(card);
+ else if (strlen(card->render))
+ card_fd = igt_open_render(card);
+
+ if (card_fd == -1)
+ return NULL;
+
+ xe_perf_device(card_fd, device, sizeof(device));
+ path = strdup(device);
+ close(card_fd);
+ return path;
+}
+
+void xe_gputop_init(void *ptr, struct igt_device_card *card)
+{
+ struct xe_gputop *obj = (struct xe_gputop *)ptr;
+
+ obj->pmu_device = pmu_name(card);
+ if (!obj->pmu_device) {
+ fprintf(stderr, "%s : pmu_device path returned NULL", card->pci_slot_name);
+ exit(EXIT_FAILURE);
+ }
+ obj->card = card;
+}
+
+static int populate_device_instances(const char *filter)
+{
+ struct igt_device_card *cards = NULL;
+ struct igt_device_card *card_inplace = NULL;
+ struct gputop_device *dev = NULL;
+ int driver_no;
+ int count, final_count = 0;
+
+ count = igt_device_card_match_all(filter, &cards);
+ for (int j = 0; j < count; j++) {
+ if (strcmp((cards + j)->subsystem, "pci") != 0)
+ continue;
+
+ driver_no = find_driver(cards + j);
+ if (driver_no < 0)
+ continue;
+
+ dev = devices + driver_no;
+ if (!dev->driver_present)
+ dev->driver_present = true;
+ dev->len++;
+ dev->instances = realloc(dev->instances,
+ dev->len * sizeof(struct xe_gputop));
+ if (!dev->instances) {
+ fprintf(stderr,
+ "Device instance realloc failed (%s)\n",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ card_inplace = (struct igt_device_card *)
+ calloc(1, sizeof(struct igt_device_card));
+ memcpy(card_inplace, cards + j, sizeof(struct igt_device_card));
+ xe_gputop_init((struct xe_gputop *)(dev->instances + dev->len - 1),
+ card_inplace);
+ final_count++;
+ }
+ if (count)
+ free(cards);
+ return final_count;
+}
+
+static uint64_t
+pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+ uint64_t buf[2 + num];
+ unsigned int i;
+ ssize_t len;
+
+ memset(buf, 0, sizeof(buf));
+
+ len = read(fd, buf, sizeof(buf));
+ assert(len == sizeof(buf));
+
+ for (i = 0; i < num; i++)
+ val[i] = buf[2 + i];
+
+ return buf[1];
+}
+
+static void
+__update_sample(struct xe_pmu_counter *counter, uint64_t val)
+{
+ counter->val.prev = counter->val.cur;
+ counter->val.cur = val;
+}
+
+static void
+update_sample(struct xe_pmu_counter *counter, uint64_t *val)
+{
+ if (counter->present)
+ __update_sample(counter, val[counter->idx]);
+}
+
+void xe_pmu_sample(const void *obj)
+{
+ struct xe_pmu_device *engines = ((struct xe_gputop *)obj)->eng_obj;
+ const int num_val = engines->num_counters;
+ uint64_t val[2 + num_val];
+ unsigned int i;
+
+ pmu_read_multi(engines->fd, num_val, val);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct xe_engine *engine = engine_ptr(engines, i);
+
+ update_sample(&engine->engine_active_ticks, val);
+ update_sample(&engine->engine_total_ticks, val);
+ }
+}
+
+static double
+pmu_active_percentage(struct xe_engine *engine)
+{
+ double pmu_active_ticks = engine->engine_active_ticks.val.cur -
+ engine->engine_active_ticks.val.prev;
+ double pmu_total_ticks = engine->engine_total_ticks.val.cur -
+ engine->engine_total_ticks.val.prev;
+ double percentage;
+
+ percentage = (pmu_active_ticks * 100) / pmu_total_ticks;
+ return percentage;
+}
+
+void n_spaces(const unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ putchar(' ');
+}
+
+void print_percentage_bar(double percent, int max_len)
+{
+ int bar_len, i, len = max_len - 1;
+ const int w = PERCLIENT_ENGINE_WIDTH;
+
+ len -= printf("|%5.1f%% ", percent);
+
+ /* no space left for bars, do what we can */
+ if (len < 0)
+ len = 0;
+
+ bar_len = ceil(w * percent * len / 100.0);
+ if (bar_len > w * len)
+ bar_len = w * len;
+
+ for (i = bar_len; i >= w; i -= w)
+ printf("%s", bars[w]);
+ if (i)
+ printf("%s", bars[i]);
+
+ len -= (bar_len + (w - 1)) / w;
+ n_spaces(len);
+
+ putchar('|');
+}
+
+static int
+print_engine(struct xe_pmu_device *engines, unsigned int i,
+ int lines, int con_w, int con_h)
+{
+ struct xe_engine *engine = engine_ptr(engines, i);
+ double percentage = pmu_active_percentage(engine);
+
+ printf("%*s", (int)(strlen(" ENGINES")), engine->display_name);
+ print_percentage_bar(percentage, con_w - strlen(" ENGINES"));
+ printf("\n");
+
+ return ++lines;
+}
+
+int xe_print_engines(const void *obj, int lines, int w, int h)
+{
+ struct xe_pmu_device *show = ((struct xe_gputop *)obj)->eng_obj;
+
+ for (unsigned int i = 0; i < show->num_engines && lines < h; i++)
+ lines = print_engine(show, i, lines, w, h);
+
+ return lines;
+}
+
+static void *show_eu_util(void *data)
+{
+ struct igt_drm_clients *clients = NULL;
+ int con_w = -1, con_h = -1;
+ int ret;
+ long n;
+
+ n = -1;
+ if (!populate_device_instances("device:subsystem=pci,card=all")) {
+ printf("No device found.\n");
+ eu_util_free();
+ exit(1);
+ }
+
+ for (int i = 0; drivers[i]; i++) {
+ if (devices[i].driver_present) {
+ for (int j = 0; j < devices[i].len; j++) {
+ if (!xe_init_engines(devices[i].instances + j)) {
+ fprintf(stderr,
+ "Failed to initialize engines! (%s)\n",
+ strerror(errno));
+ eu_util_free();
+ return NULL;
+ }
+ ret = xe_pmu_init(devices[i].instances + j);
+
+ if (ret) {
+ fprintf(stderr,
+ "Failed to initialize PMU! (%s)\n",
+ strerror(errno));
+ if (errno == EACCES && geteuid())
+ fprintf(stderr,
+ "\n"
+ "When running as a normal user CAP_PERFMON is required to access performance\n"
+ "monitoring. See \"man 7 capabilities\", \"man 8 setcap\", or contact your\n"
+ "distribution vendor for assistance.\n"
+ "\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n");
+
+ igt_devices_free();
+ eu_util_free();
+ return NULL;
+ }
+ }
+ }
+ }
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++)
+ xe_pmu_sample(devices[i].instances + j);
+ }
+
+ clients = igt_drm_clients_init(NULL);
+ if (!clients)
+ exit(1);
+
+ sleep(2);
+ igt_drm_clients_scan(clients, NULL, NULL, 0, NULL, 0);
+ while ((n != 0) && !stop_top) {
+ int lines = 0;
+
+ igt_drm_clients_scan(clients, NULL, NULL, 0, NULL, 0);
+
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++)
+ xe_pmu_sample(devices[i].instances + j);
+ }
+
+ igt_drm_clients_sort(clients, client_cmp);
+
+ update_console_size(&con_w, &con_h);
+ clrscr();
+
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++) {
+ lines = xe_print_engines(devices[i].instances + j,
+ lines, con_w, con_h);
+ }
+ }
+
+ if (!clients->num_clients) {
+ const char *msg = " (No GPU clients yet. Start workload to see stats)";
+
+ printf(ANSI_HEADER "%-*s" ANSI_RESET "\n",
+ (int)(con_w - strlen(msg) - 1), msg);
+ }
+ }
+ igt_drm_clients_free(clients);
+ eu_util_free();
+
+ return NULL;
+}
+
+static void
+thread_init_eu_utils(void)
+{
+ pthread_t eu_utils;
+ /* Creating thread to display EU utilization */
+ pthread_create(&eu_utils, NULL, show_eu_util, NULL);
+}
+
+/**
+ * SUBTEST: eu-busy-10-sec
+ * Functionality: OpenCL kernel
+ * Description:
+ * Run an openCL long rinning Kernel that returns output[i] = input[i] * input[i],
+ */
+static void
+test_eu_busy(int fd, int num_gt, u32 duration_sec)
+{
+ struct user_execenv execenv = { 0 };
+ struct thread_data *threads_data;
+ struct drm_xe_engine_class_instance *hwe;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ u32 gt, n_threads = 0, iterations = 0, n_instances = 0, i;
+ bool go = false;
+ int ccs_mode, gt_fd, ccs_mode_set;
+
+ unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
+ const struct intel_compute_kernels *kernels = intel_compute_square_kernels;
+
+ for (gt = 0; gt < num_gt; gt++) {
+ gt_fd = gt_sysfs_open(gt);
+ if (igt_sysfs_scanf(gt_fd, "ccs_mode", "%u", &ccs_mode) > 0) {
+ ccs_mode_set = ccs_mode;
+ }
+ }
+
+ igt_assert(ccs_mode_set > 1);
+ thread_init_eu_utils();
+
+ while (kernels->kernel) {
+ if (ip_ver == kernels->ip_ver)
+ break;
+ kernels++;
+ }
+
+ execenv.loop_kernel_duration = duration_sec;
+ execenv.kernel = kernels->loop_kernel;
+ execenv.kernel_size = kernels->loop_kernel_size;
+
+ for (gt = 0; gt < num_gt; gt++) {
+ xe_for_each_engine(fd, hwe) {
+ igt_assert(hwe);
+ if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
+ ++n_instances;
+ }
+ }
+
+ threads_data = calloc(n_instances, sizeof(*threads_data));
+ igt_assert(threads_data);
+
+ pthread_mutex_init(&mutex, 0);
+ pthread_cond_init(&cond, 0);
+
+ for (gt = 0; gt < num_gt; gt++) {
+ xe_for_each_engine(fd, hwe) {
+ if (hwe->gt_id != gt ||
+ hwe->engine_class != DRM_XE_ENGINE_CLASS_COMPUTE)
+ continue;
+
+ threads_data[i].mutex = &mutex;
+ threads_data[i].cond = &cond;
+ threads_data[i].fd = fd;
+ threads_data[i].eci = hwe;
+ threads_data[i].go = &go;
+ threads_data[i].execenv = &execenv;
+ ++n_threads;
+ pthread_create(&threads_data[i].thread, 0, intel_compute_thread,
+ &threads_data[i]);
+ ++i;
+ ++iterations;
+ }
+
+ pthread_mutex_lock(&mutex);
+ go = true;
+ pthread_cond_broadcast(&cond);
+ pthread_mutex_unlock(&mutex);
+
+ for (int val = 0; val < i; ++val) {
+ pthread_join(threads_data[val].thread, NULL);
+ }
+
+ i = 0;
+ n_threads = 0;
+ iterations = 0;
+ stop_top = true;
+ }
+ free(threads_data);
+}
+
igt_main
{
int xe, num_gt;
@@ -190,6 +941,10 @@ igt_main
igt_subtest("compute-square")
test_compute_square(xe);
+ /* test to check available EU utilisation for multi_ccs */
+ igt_subtest("eu-busy-10-sec")
+ test_eu_busy(xe, num_gt, 10 * LOOP_DURATION);
+
igt_fixture
drm_close_driver(xe);
diff --git a/tests/meson.build b/tests/meson.build
index 55bcf57ec..3340e137d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -404,6 +404,7 @@ extra_dependencies = {
'sw_sync': [ libatomic ],
'xe_fault_injection': [ lib_igt_xe_oa ],
'xe_oa': [ lib_igt_xe_oa ],
+ 'xe_compute': [ igt_deps,lib_igt_perf,lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math ],
}
test_executables = []
--
2.43.0
More information about the igt-dev
mailing list