[igt-dev] [PATCH] tests/xe/perf_pmu: Tests for the XE pmu interface
Venkata Ramana Nayana
venkata.ramana.nayana at intel.com
Tue Jun 27 16:31:53 UTC 2023
There are set of engine group busyness counters provided by HW which are
exposed via PMU events. Adding a basic unit tests to read those counters.
Signed-off-by: Venkata Ramana Nayana <venkata.ramana.nayana at intel.com>
---
include/drm-uapi/xe_drm.h | 21 +++
lib/igt_perf.c | 36 ++++
lib/igt_perf.h | 5 +
tests/meson.build | 1 +
tests/xe/xe_perf_pmu.c | 335 ++++++++++++++++++++++++++++++++++++++
5 files changed, 398 insertions(+)
create mode 100644 tests/xe/xe_perf_pmu.c
diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index 432bd87ca..7bfd46c02 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -732,6 +732,27 @@ struct drm_xe_engine_create {
__u64 reserved[2];
};
+/**
+ * DOC: perf_events exposed by xe through /sys/bus/event_sources/drivers/xe
+ *
+ */
+
+
+/* PMU event config IDs */
+
+/*
+ * Top 4 bits of every counter are GT id.
+ */
+#define __XE_PMU_GT_SHIFT (60)
+
+#define ___XE_PMU_OTHER(gt, x) \
+ (((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT))
+
+#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1)
+#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2)
+#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3)
+#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 4)
+
struct drm_xe_engine_get_property {
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
diff --git a/lib/igt_perf.c b/lib/igt_perf.c
index ffe078adc..3866c6d77 100644
--- a/lib/igt_perf.c
+++ b/lib/igt_perf.c
@@ -69,6 +69,36 @@ const char *i915_perf_device(int i915, char *buf, int buflen)
return buf;
}
+const char *xe_perf_device(int xe, char *buf, int buflen)
+{
+ char *s;
+ char pref[] = "xe_";
+ int len = strlen(pref);
+
+
+ if (!buf || buflen < len)
+ return "xe";
+
+ memcpy(buf, pref, len);
+
+ if (!bus_address(xe, buf + len, buflen - len))
+ buf[len - 1] = '\0';
+
+ /* Convert all colons in the address to '_', thanks perf! */
+ for (s = buf; *s; s++)
+ if (*s == ':')
+ *s = '_';
+
+ return buf;
+}
+
+uint64_t xe_perf_type_id(int xe)
+{
+ char buf[80];
+
+ return igt_perf_type_id(xe_perf_device(xe, buf, sizeof(buf)));
+}
+
uint64_t i915_perf_type_id(int i915)
{
char buf[80];
@@ -147,6 +177,12 @@ int perf_igfx_open_group(uint64_t config, int group)
PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
}
+int perf_xe_open(int xe, uint64_t config)
+{
+ return _perf_open(xe_perf_type_id(xe), config, -1,
+ PERF_FORMAT_TOTAL_TIME_ENABLED);
+}
+
int perf_i915_open(int i915, uint64_t config)
{
return _perf_open(i915_perf_type_id(i915), config, -1,
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 4d86e31ae..3d9ba2917 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -61,10 +61,15 @@ int igt_perf_open_group(uint64_t type, uint64_t config, int group);
const char *i915_perf_device(int i915, char *buf, int buflen);
uint64_t i915_perf_type_id(int i915);
+const char *xe_perf_device(int xe, char *buf, int buflen);
+uint64_t xe_perf_type_id(int);
+
int perf_igfx_open(uint64_t config);
int perf_igfx_open_group(uint64_t config, int group);
int perf_i915_open(int i915, uint64_t config);
int perf_i915_open_group(int i915, uint64_t config, int group);
+int perf_xe_open(int xe, uint64_t config);
+
#endif /* I915_PERF_H */
diff --git a/tests/meson.build b/tests/meson.build
index 85ea7e74e..e64d8232e 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -270,6 +270,7 @@ xe_progs = [
'xe_vm',
'xe_waitfence',
'xe_spin_batch',
+ 'xe_perf_pmu',
]
msm_progs = [
diff --git a/tests/xe/xe_perf_pmu.c b/tests/xe/xe_perf_pmu.c
new file mode 100644
index 000000000..97c2f84a3
--- /dev/null
+++ b/tests/xe/xe_perf_pmu.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+/**
+ * TEST: Basic tests for verify pmu perf interface
+ * Category: Hardware building block
+ * Sub-category: pmu interface
+ * Functionality: pmu
+ * Test category: functionality test
+ */
+
+#include <fcntl.h>
+
+#include "igt.h"
+#include "lib/igt_syncobj.h"
+#include "lib/intel_reg.h"
+#include "lib/igt_perf.h"
+#include "xe_drm.h"
+
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+#include "xe/xe_spin.h"
+#include <string.h>
+
+#define MAX_INSTANCE 9
+
+static uint64_t pmu_read(int fd)
+{
+ uint64_t data[2];
+
+ igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+ return data[0];
+}
+
+static int open_pmu(int fd, uint64_t config)
+{
+ int perf_fd;
+
+ perf_fd = perf_xe_open(fd, config);
+ igt_skip_on(perf_fd < 0 && errno == ENODEV);
+ igt_assert(perf_fd >= 0);
+
+ return perf_fd;
+}
+
+static uint64_t engine_group_get_config(int gt, int class)
+{
+ uint64_t config;
+
+ switch (class) {
+ case DRM_XE_ENGINE_CLASS_COPY:
+ config = XE_PMU_COPY_GROUP_BUSY(gt);
+ break;
+ case DRM_XE_ENGINE_CLASS_RENDER:
+ case DRM_XE_ENGINE_CLASS_COMPUTE:
+ config = XE_PMU_RENDER_GROUP_BUSY(gt);
+ break;
+ case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
+ case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ config = XE_PMU_MEDIA_GROUP_BUSY(gt);
+ break;
+ }
+
+ return config;
+}
+
+/**
+ * Test: Basic test for measure the active time when engine of any class active
+ *
+ * SUBTEST: any-engine-group-busy
+ * Description:
+ * Run a test to measure the global activity time by submitting
+ * the WL to all existing engines.
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ */
+static void test_any_engine_busyness(int fd, struct drm_xe_engine_class_instance *eci)
+{
+ uint32_t vm;
+ uint64_t addr = 0x1a0000;
+ struct drm_xe_sync sync[2] = {
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ };
+ struct drm_xe_exec exec = {
+ .num_batch_buffer = 1,
+ .num_syncs = 2,
+ .syncs = to_user_pointer(sync),
+ };
+ uint32_t engine;
+ uint32_t syncobj;
+ size_t bo_size;
+ uint32_t bo = 0;
+ struct xe_spin *spin;
+ uint32_t pmu_fd;
+ uint64_t count, idle = 0;
+
+ vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+ bo_size = sizeof(*spin);
+ bo_size = ALIGN(bo_size + xe_cs_prefetch_size(fd),
+ xe_get_default_alignment(fd));
+
+ bo = xe_bo_create(fd, eci->gt_id, vm, bo_size);
+ spin = xe_bo_map(fd, bo, bo_size);
+
+ engine = xe_engine_create(fd, vm, eci, 0);
+ syncobj = syncobj_create(fd, 0);
+
+ sync[0].handle = syncobj_create(fd, 0);
+ xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
+
+ pmu_fd = open_pmu(fd, XE_PMU_ANY_ENGINE_GROUP_BUSY(eci->gt_id));
+
+ xe_spin_init(spin, addr, false);
+
+ sync[0].flags &= ~DRM_XE_SYNC_SIGNAL;
+ sync[1].flags |= DRM_XE_SYNC_SIGNAL;
+ sync[1].handle = syncobj;
+
+ exec.engine_id = engine;
+ exec.address = addr;
+ xe_exec(fd, &exec);
+
+ xe_spin_wait_started(spin);
+ usleep(50000);
+
+ igt_assert(!syncobj_wait(fd, &syncobj, 1, 1, 0, NULL));
+ xe_spin_end(spin);
+
+ igt_assert(syncobj_wait(fd, &syncobj, 1, INT64_MAX, 0, NULL));
+ igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+ sync[0].flags |= DRM_XE_SYNC_SIGNAL;
+ xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1);
+ igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+ syncobj_destroy(fd, sync[0].handle);
+ syncobj_destroy(fd, syncobj);
+
+ count = pmu_read(pmu_fd);
+ igt_assert_lt_u64(idle, count);
+ igt_debug("Incrementing counter all-busy-group %ld ns\n", count);
+
+ xe_engine_destroy(fd, engine);
+ munmap(spin, bo_size);
+ gem_close(fd, bo);
+ xe_vm_destroy(fd, vm);
+ close(pmu_fd);
+}
+
+/**
+ * Test: Basic test for measure the active time across engine class
+ *
+ * SUBTEST: render-busy
+ * Description:
+ * Run a test to measure the active engine class time by submitting the
+ * WL to all instances of a class
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ *
+ * SUBTEST: compute-busy
+ * Description: Run copy-group-busy test
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ *
+ * SUBTEST: copy-busy
+ * Description: Run copy-group-busy test
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ *
+ * SUBTEST: vcs-busy
+ * Description: Run copy-group-busy test
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ *
+ * SUBTEST: vecs-busy
+ * Description: Run copy-group-busy test
+ * Run type: FULL
+ * TODO: change ``'Run type' == FULL`` to a better category
+ *
+ */
+
+static void test_engine_group_busyness(int fd, int gt, int class, const char *name)
+{
+ uint32_t vm;
+ uint64_t addr = 0x1a0000;
+ struct drm_xe_sync sync[2] = {
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ };
+ struct drm_xe_exec exec = {
+ .num_batch_buffer = 1,
+ .num_syncs = 2,
+ .syncs = to_user_pointer(sync),
+ };
+ uint32_t engines[MAX_INSTANCE];
+ uint32_t syncobjs[MAX_INSTANCE];
+ int pmu_fd;
+ size_t bo_size;
+ uint32_t bo = 0, i = 0;
+ struct {
+ struct xe_spin spin;
+ } *data;
+ struct drm_xe_engine_class_instance *hwe;
+ struct drm_xe_engine_class_instance eci[MAX_INSTANCE];
+ int num_placements = 0;
+ uint64_t config, count, idle = 0;
+
+ config = engine_group_get_config(gt, class);
+
+ xe_for_each_hw_engine(fd, hwe) {
+ if (hwe->engine_class != class || hwe->gt_id != gt)
+ continue;
+
+ eci[num_placements++] = *hwe;
+ }
+
+ if (!num_placements) {
+ igt_info("Engine class:%d gt:%d not enabled on this platform\n", class, gt);
+ return;
+ }
+
+ vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+ bo_size = sizeof(*data) * num_placements;
+ bo_size = ALIGN(bo_size + xe_cs_prefetch_size(fd), xe_get_default_alignment(fd));
+
+ bo = xe_bo_create(fd, gt, vm, bo_size);
+ data = xe_bo_map(fd, bo, bo_size);
+
+ for (i = 0; i < num_placements; i++) {
+ struct drm_xe_engine_create create = {
+ .vm_id = vm,
+ .width = 1,
+ .num_placements = num_placements,
+ .instances = to_user_pointer(eci),
+ };
+
+ igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_ENGINE_CREATE,
+ &create), 0);
+ engines[i] = create.engine_id;
+ syncobjs[i] = syncobj_create(fd, 0);
+ };
+
+ sync[0].handle = syncobj_create(fd, 0);
+ xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
+
+ pmu_fd = open_pmu(fd, config);
+
+ for (i = 0; i < num_placements; i++) {
+ uint64_t spin_offset = (char *)&data[i].spin - (char *)data;
+ uint64_t spin_addr = addr + spin_offset;
+
+ xe_spin_init(&data[i].spin, spin_addr, false);
+ sync[0].flags &= ~DRM_XE_SYNC_SIGNAL;
+ sync[1].flags |= DRM_XE_SYNC_SIGNAL;
+ sync[1].handle = syncobjs[i];
+
+ exec.engine_id = engines[i];
+ exec.address = spin_addr;
+ xe_exec(fd, &exec);
+ xe_spin_wait_started(&data[i].spin);
+ }
+
+ for (i = 0; i < num_placements; i++) {
+ xe_spin_end(&data[i].spin);
+ igt_assert(syncobj_wait(fd, &syncobjs[i], 1, INT64_MAX, 0,
+ NULL));
+ }
+
+ igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+ sync[0].flags |= DRM_XE_SYNC_SIGNAL;
+ xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1);
+ igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+
+ syncobj_destroy(fd, sync[0].handle);
+ for (i = 0; i < num_placements; i++) {
+ syncobj_destroy(fd, syncobjs[i]);
+ xe_engine_destroy(fd, engines[i]);
+ }
+
+ count = pmu_read(pmu_fd);
+ igt_assert_lt_u64(idle, count);
+ igt_debug("Incrementing counter %s-gt-%d %ld ns\n", name, gt, count);
+
+ munmap(data, bo_size);
+ gem_close(fd, bo);
+ xe_vm_destroy(fd, vm);
+ close(pmu_fd);
+}
+
+igt_main
+{
+ struct drm_xe_engine_class_instance *hwe;
+ const struct section {
+ const char *name;
+ int class;
+ } sections[] = {
+ { "render-busy", DRM_XE_ENGINE_CLASS_RENDER },
+ { "compute-busy", DRM_XE_ENGINE_CLASS_COMPUTE },
+ { "copy-busy", DRM_XE_ENGINE_CLASS_COPY },
+ { "vcs-busy", DRM_XE_ENGINE_CLASS_VIDEO_DECODE },
+ { "vecs-busy", DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE },
+ { NULL },
+ };
+ int gt;
+ int class;
+ int fd;
+
+ igt_fixture {
+ fd = drm_open_driver(DRIVER_XE);
+ xe_device_get(fd);
+ }
+
+ for (const struct section *s = sections; s->name; s++) {
+ igt_subtest_f("%s", s->name)
+ xe_for_each_gt(fd, gt)
+ xe_for_each_hw_engine_class(class)
+ if (class == s->class)
+ test_engine_group_busyness(fd, gt, class, s->name);
+ }
+
+ igt_subtest("any-engine-group-busy")
+ xe_for_each_hw_engine(fd, hwe)
+ test_any_engine_busyness(fd, hwe);
+
+ igt_fixture {
+ xe_device_put(fd);
+ close(fd);
+ }
+}
--
2.25.1
More information about the igt-dev
mailing list