[PATCH i-g-t 06/18] lib/xe: Complete xe_oa lib changes
Ashutosh Dixit
ashutosh.dixit at intel.com
Fri Feb 16 23:16:51 UTC 2024
Add various functionality in lib/xe for OA. This includes:
* Support for OA metrics generation
* intel_perf_for_devinfo and intel_perf_for_fd support
* intel_perf_load_perf_configs
* xe_perf_ioctl
* drm_xe_query_oa_units
Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
lib/intel_device_info.c | 1 +
lib/meson.build | 74 +++++
lib/xe/xe_oa.c | 699 ++++++++++++++++++++++++++++++++++++++++
lib/xe/xe_oa.h | 328 +++++++++++++++++++
lib/xe/xe_query.c | 38 +++
lib/xe/xe_query.h | 5 +
6 files changed, 1145 insertions(+)
create mode 100644 lib/xe/xe_oa.c
create mode 100644 lib/xe/xe_oa.h
diff --git a/lib/intel_device_info.c b/lib/intel_device_info.c
index 64b5246b7783..83ca0a5ed149 100644
--- a/lib/intel_device_info.c
+++ b/lib/intel_device_info.c
@@ -510,6 +510,7 @@ static const struct intel_device_info intel_lunarlake_info = {
.display_ver = 20,
.has_4tile = true,
.has_flatccs = true,
+ .has_oam = true,
.is_lunarlake = true,
.codename = "lunarlake",
.cmds_info = &xe2_cmds_info,
diff --git a/lib/meson.build b/lib/meson.build
index 6122861d8b7a..34de0e1b6ae9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -374,6 +374,79 @@ install_headers(
subdir : 'i915-perf'
)
+xe_oa_files = [
+ 'igt_list.c',
+ 'xe/xe_oa.c',
+]
+
+xe_oa_hardware = [
+ 'lnl',
+]
+
+xe_xml_files = []
+foreach hw : xe_oa_hardware
+ xe_xml_files += files('xe/oa-configs/oa- at 0@.xml'.format(hw))
+endforeach
+
+xe_oa_files += custom_target(
+ 'xe-oa-equations',
+ input : [ 'xe/oa-configs/oa-equations-codegen.py' ] + xe_xml_files,
+ output : [ 'xe_oa_equations.c', 'xe_oa_equations.h' ],
+ command : [
+ python3, '@INPUT0@',
+ '--code', '@OUTPUT0@',
+ '--header', '@OUTPUT1@',
+ xe_xml_files,
+ ])
+
+foreach hw : xe_oa_hardware
+ xe_oa_files += custom_target(
+ 'xe-oa-registers- at 0@'.format(hw),
+ input : [ 'xe/oa-configs/oa-registers-codegen.py',
+ 'xe/oa-configs/oa- at 0@.xml'.format(hw) ],
+ output : [ 'xe_oa_registers_ at 0@.c'.format(hw),
+ 'xe_oa_registers_ at 0@.h'.format(hw), ],
+ command : [
+ python3, '@INPUT0@',
+ '--code', '@OUTPUT0@',
+ '--header', '@OUTPUT1@',
+ '--xml-file', '@INPUT1@'
+ ])
+ xe_oa_files += custom_target(
+ 'xe-oa-metrics- at 0@'.format(hw),
+ input : [ 'xe/oa-configs/oa-metricset-codegen.py',
+ 'xe/oa-configs/oa- at 0@.xml'.format(hw) ],
+ output : [ 'xe_oa_metrics_ at 0@.c'.format(hw),
+ 'xe_oa_metrics_ at 0@.h'.format(hw), ],
+ command : [
+ python3, '@INPUT0@',
+ '--code', '@OUTPUT0@',
+ '--header', '@OUTPUT1@',
+ '--equations-include', 'xe_oa_equations.h',
+ '--registers-include', 'xe_oa_registers_ at 0@.h'.format(hw),
+ '--xml-file', '@INPUT1@',
+ ])
+endforeach
+
+lib_igt_xe_oa_build = shared_library(
+ 'xe_oa',
+ xe_oa_files,
+ dependencies: [lib_igt_chipset,lib_igt,pciaccess],
+ include_directories : inc,
+ install: true,
+ soversion: '1.5')
+
+lib_igt_xe_oa = declare_dependency(
+ link_with : lib_igt_xe_oa_build,
+ include_directories : inc)
+
+install_headers(
+ 'igt_list.h',
+ 'intel_chipset.h',
+ 'xe/xe_oa.h',
+ subdir : 'xe-oa'
+)
+
pkgconf = configuration_data()
pkgconf.set('prefix', get_option('prefix'))
@@ -381,6 +454,7 @@ pkgconf.set('exec_prefix', '${prefix}')
pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir')))
pkgconf.set('i915_perf_version', '1.5.1')
+pkgconf.set('xe_oa_version', '1.5.1')
configure_file(
input : 'i915-perf.pc.in',
diff --git a/lib/xe/xe_oa.c b/lib/xe/xe_oa.c
new file mode 100644
index 000000000000..346b23349b4f
--- /dev/null
+++ b/lib/xe/xe_oa.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "drmtest.h"
+#include "i915_pciids.h"
+#include "i915_pciids_local.h"
+#include "intel_chipset.h"
+#include "intel_hwconfig_types.h"
+#include "ioctl_wrappers.h"
+#include "linux_scaffold.h"
+#include "xe_ioctl.h"
+#include "xe_oa.h"
+#include "xe_query.h"
+
+#include "xe_oa_metrics_lnl.h"
+
+static struct intel_perf_logical_counter_group *
+intel_perf_logical_counter_group_new(struct intel_perf *perf,
+ struct intel_perf_logical_counter_group *parent,
+ const char *name)
+{
+ struct intel_perf_logical_counter_group *group = calloc(1, sizeof(*group));
+
+ group->name = strdup(name);
+
+ IGT_INIT_LIST_HEAD(&group->counters);
+ IGT_INIT_LIST_HEAD(&group->groups);
+
+ if (parent)
+ igt_list_add_tail(&group->link, &parent->groups);
+ else
+ IGT_INIT_LIST_HEAD(&group->link);
+
+ return group;
+}
+
+static void
+intel_perf_logical_counter_group_free(struct intel_perf_logical_counter_group *group)
+{
+ struct intel_perf_logical_counter_group *child, *tmp;
+
+ igt_list_for_each_entry_safe(child, tmp, &group->groups, link) {
+ igt_list_del(&child->link);
+ intel_perf_logical_counter_group_free(child);
+ }
+
+ free(group->name);
+ free(group);
+}
+
+static void
+intel_perf_metric_set_free(struct intel_perf_metric_set *metric_set)
+{
+ free(metric_set->counters);
+ free(metric_set);
+}
+
+static bool
+slice_available(const struct drm_i915_query_topology_info *topo,
+ int s)
+{
+ return (topo->data[s / 8] >> (s % 8)) & 1;
+}
+
+static bool
+subslice_available(const struct drm_i915_query_topology_info *topo,
+ int s, int ss)
+{
+ return (topo->data[topo->subslice_offset +
+ s * topo->subslice_stride +
+ ss / 8] >> (ss % 8)) & 1;
+}
+
+static bool
+eu_available(const struct drm_i915_query_topology_info *topo,
+ int s, int ss, int eu)
+{
+ return (topo->data[topo->eu_offset +
+ (s * topo->max_subslices + ss) * topo->eu_stride +
+ eu / 8] >> (eu % 8)) & 1;
+}
+
+static struct intel_perf *
+unsupported_xe_oa_platform(struct intel_perf *perf)
+{
+ intel_perf_free(perf);
+ return NULL;
+}
+
+struct intel_perf *
+intel_perf_for_devinfo(uint32_t device_id,
+ uint32_t revision,
+ uint64_t timestamp_frequency,
+ uint64_t gt_min_freq,
+ uint64_t gt_max_freq,
+ const struct drm_i915_query_topology_info *topology)
+{
+ const struct intel_device_info *devinfo = intel_get_device_info(device_id);
+ struct intel_perf *perf;
+ uint32_t subslice_mask_len;
+ uint32_t eu_mask_len;
+ uint32_t half_max_subslices;
+ uint64_t half_subslices_mask;
+ int bits_per_subslice;
+
+ if (!devinfo)
+ return NULL;
+
+ perf = calloc(1, sizeof(*perf));;
+ perf->root_group = intel_perf_logical_counter_group_new(perf, NULL, "");
+
+ IGT_INIT_LIST_HEAD(&perf->metric_sets);
+
+ /* Initialize the device characterists first. Loading the
+ * metrics uses that information to detect whether some
+ * counters are available on a given device (for example BXT
+ * 2x6 does not have 2 samplers).
+ */
+ perf->devinfo.devid = device_id;
+ perf->devinfo.graphics_ver = devinfo->graphics_ver;
+ perf->devinfo.revision = revision;
+ perf->devinfo.timestamp_frequency = timestamp_frequency;
+ perf->devinfo.gt_min_freq = gt_min_freq;
+ perf->devinfo.gt_max_freq = gt_max_freq;
+
+ if (devinfo->codename) {
+ snprintf(perf->devinfo.devname, sizeof(perf->devinfo.devname),
+ "%s", devinfo->codename);
+ }
+
+ /* Store i915 topology. */
+ perf->devinfo.max_slices = topology->max_slices;
+ perf->devinfo.max_subslices_per_slice = topology->max_subslices;
+ perf->devinfo.max_eu_per_subslice = topology->max_eus_per_subslice;
+
+ subslice_mask_len =
+ topology->max_slices * topology->subslice_stride;
+ igt_assert(sizeof(perf->devinfo.subslice_masks) >= subslice_mask_len);
+ memcpy(perf->devinfo.subslice_masks,
+ &topology->data[topology->subslice_offset],
+ subslice_mask_len);
+
+ eu_mask_len = topology->eu_stride *
+ topology->max_subslices * topology->max_slices;
+ igt_assert(sizeof(perf->devinfo.eu_masks) >= eu_mask_len);
+ memcpy(perf->devinfo.eu_masks,
+ &topology->data[topology->eu_offset],
+ eu_mask_len);
+
+ bits_per_subslice = 8;
+ for (uint32_t s = 0; s < topology->max_slices; s++) {
+ if (!slice_available(topology, s))
+ continue;
+
+ perf->devinfo.slice_mask |= 1ULL << s;
+ for (uint32_t ss = 0; ss < topology->max_subslices; ss++) {
+ if (!subslice_available(topology, s, ss))
+ continue;
+
+ perf->devinfo.subslice_mask |= 1ULL << (s * bits_per_subslice + ss);
+
+ for (uint32_t eu = 0; eu < topology->max_eus_per_subslice; eu++) {
+ if (eu_available(topology, s, ss, eu))
+ perf->devinfo.n_eus++;
+ }
+ }
+ }
+
+ perf->devinfo.n_eu_slices = __builtin_popcount(perf->devinfo.slice_mask);
+ perf->devinfo.n_eu_sub_slices = __builtin_popcount(perf->devinfo.subslice_mask);
+
+ /* Compute number of subslices/dualsubslices in first half of
+ * the GPU.
+ */
+ half_max_subslices = topology->max_subslices / 2;
+ half_subslices_mask = perf->devinfo.subslice_mask &
+ ((1 << half_max_subslices) - 1);
+ perf->devinfo.n_eu_sub_slices_half_slices = __builtin_popcount(half_subslices_mask);
+
+ /* Valid on most generations except Gen9LP. */
+ perf->devinfo.eu_threads_count = 7;
+
+ /* Most platforms have full 32bit timestamps. */
+ perf->devinfo.oa_timestamp_mask = 0xffffffff;
+ perf->devinfo.oa_timestamp_shift = 0;
+
+ if (devinfo->is_lunarlake) {
+ intel_perf_load_metrics_lnl(perf);
+ } else {
+ return unsupported_xe_oa_platform(perf);
+ }
+
+ return perf;
+}
+
+static bool
+read_fd_uint64(int fd, uint64_t *out_value)
+{
+ char buf[32];
+ int n;
+
+ n = read(fd, buf, sizeof (buf) - 1);
+ if (n < 0)
+ return false;
+
+ buf[n] = '\0';
+ *out_value = strtoull(buf, 0, 0);
+
+ return true;
+}
+
+static bool
+read_sysfs(int sysfs_dir_fd, const char *file_path, uint64_t *out_value)
+{
+ int fd = openat(sysfs_dir_fd, file_path, O_RDONLY);
+ bool res;
+
+ if (fd < 0)
+ return false;
+
+ res = read_fd_uint64(fd, out_value);
+ close(fd);
+
+ return res;
+}
+
+static int
+open_master_sysfs_dir(int drm_fd)
+{
+ char path[128];
+ struct stat st;
+ int sysfs;
+
+ if (fstat(drm_fd, &st) || !S_ISCHR(st.st_mode))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/dev/char/%d:%d", major(st.st_rdev), minor(st.st_rdev));
+ sysfs = open(path, O_DIRECTORY);
+ if (sysfs < 0)
+ return sysfs;
+
+ if (minor(st.st_rdev) >= 128) {
+ /* If we were given a renderD* drm_fd, find it's associated cardX node. */
+ char device[100], cmp[100];
+ int device_len, cmp_len, i;
+
+ device_len = readlinkat(sysfs, "device", device, sizeof(device));
+ close(sysfs);
+ if (device_len < 0)
+ return device_len;
+
+ for (i = 0; i < 64; i++) {
+
+ snprintf(path, sizeof(path), "/sys/dev/char/%d:%d", major(st.st_rdev), i);
+ sysfs = open(path, O_DIRECTORY);
+ if (sysfs < 0)
+ continue;
+
+ cmp_len = readlinkat(sysfs, "device", cmp, sizeof(cmp));
+ if (cmp_len == device_len && !memcmp(cmp, device, cmp_len))
+ break;
+
+ close(sysfs);
+ sysfs = -1;
+ }
+ }
+
+ return sysfs;
+}
+
+static void process_hwconfig(void *data, uint32_t len,
+ struct drm_i915_query_topology_info *topinfo)
+{
+
+ uint32_t *d = (uint32_t*)data;
+ uint32_t l = len / 4;
+ uint32_t pos = 0;
+
+ while (pos + 2 < l) {
+ if (d[pos + 1] == 1) {
+ switch (d[pos]) {
+ case INTEL_HWCONFIG_MAX_SLICES_SUPPORTED:
+ topinfo->max_slices = d[pos + 2];
+ igt_debug("hwconfig: max_slices %d\n", topinfo->max_slices);
+ break;
+ case INTEL_HWCONFIG_MAX_SUBSLICE:
+ case INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED:
+ topinfo->max_subslices = d[pos + 2];
+ igt_debug("hwconfig: max_subslices %d\n", topinfo->max_subslices);
+ break;
+ case INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE:
+ case INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS:
+ topinfo->max_eus_per_subslice = d[pos + 2];
+ igt_debug("hwconfig: max_eus_per_subslice %d\n",
+ topinfo->max_eus_per_subslice);
+ break;
+ default:
+ break;
+ }
+ }
+ pos += 2 + d[pos + 1];
+ }
+}
+
+static void query_hwconfig(int fd, struct drm_i915_query_topology_info *topinfo)
+{
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_HWCONFIG,
+ .size = 0,
+ .data = 0,
+ };
+ void *hwconfig;
+
+ igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+ igt_assert(query.size);
+
+ hwconfig = malloc(query.size);
+ igt_assert(hwconfig);
+
+ query.data = to_user_pointer(hwconfig);
+ igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+
+ process_hwconfig(hwconfig, query.size, topinfo);
+ free(hwconfig);
+}
+
+struct drm_i915_query_topology_info *xe_fill_i915_topology_info(int drm_fd)
+{
+ struct drm_i915_query_topology_info i915_topinfo = {};
+ struct drm_i915_query_topology_info *i915_topo;
+ struct drm_xe_query_topology_mask *xe_topo;
+ int total_size, pos = 0;
+ u8 *ptr;
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_GT_TOPOLOGY,
+ .size = 0,
+ .data = 0,
+ };
+
+ query_hwconfig(drm_fd, &i915_topinfo);
+
+ i915_topinfo.subslice_offset = 1; /* always 1 */
+ i915_topinfo.subslice_stride = DIV_ROUND_UP(i915_topinfo.max_subslices, 8);
+ i915_topinfo.eu_offset = i915_topinfo.subslice_offset + i915_topinfo.subslice_stride;
+ i915_topinfo.eu_stride = DIV_ROUND_UP(i915_topinfo.max_eus_per_subslice, 8);
+
+ /* Allocate and start filling the struct to return */
+ total_size = sizeof(i915_topinfo) + i915_topinfo.eu_offset +
+ i915_topinfo.max_subslices * i915_topinfo.eu_stride;
+ i915_topo = malloc(total_size);
+ igt_assert(i915_topo);
+
+ memcpy(i915_topo, &i915_topinfo, sizeof(i915_topinfo));
+ ptr = (u8 *)i915_topo + sizeof(i915_topinfo);
+ *ptr++ = 0x1; /* slice mask */
+
+ /* Get xe topology masks */
+ igt_assert_eq(igt_ioctl(drm_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+ igt_assert_neq(query.size, 0);
+
+ xe_topo = malloc(query.size);
+ igt_assert(xe_topo);
+
+ query.data = to_user_pointer(xe_topo);
+ igt_assert_eq(igt_ioctl(drm_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+ igt_debug("Topology size: %d\n", query.size);
+
+ while (query.size >= sizeof(struct drm_xe_query_topology_mask)) {
+ struct drm_xe_query_topology_mask *topo =
+ (struct drm_xe_query_topology_mask*)((unsigned char*)xe_topo + pos);
+ int i, sz = sizeof(struct drm_xe_query_topology_mask) + topo->num_bytes;
+ u64 geom_mask, compute_mask;
+
+ igt_debug(" gt_id: %d type: %d n:%d [%d] ", topo->gt_id, topo->type, topo->num_bytes, sz);
+ for (int j=0; j< topo->num_bytes; j++)
+ igt_debug(" %02x", topo->mask[j]);
+ igt_debug("\n");
+
+ /* i915 only returns topology for gt 0, do the same here */
+ if (topo->gt_id)
+ goto next;
+
+ /* Follow the same order as in xe query_gt_topology() */
+ switch (topo->type) {
+ case DRM_XE_TOPO_DSS_GEOMETRY:
+ igt_assert_lte(i915_topo->subslice_stride, 8); /* Fit in u64 mask */
+ memcpy(&geom_mask, topo->mask, i915_topo->subslice_stride);
+ break;
+ case DRM_XE_TOPO_DSS_COMPUTE:
+ memcpy(&compute_mask, topo->mask, i915_topo->subslice_stride);
+ geom_mask |= compute_mask;
+ memcpy(ptr, &geom_mask, i915_topo->subslice_stride);
+ ptr += i915_topo->subslice_stride;
+ break;
+ case DRM_XE_TOPO_EU_PER_DSS:
+ for (i = 0; i < i915_topo->max_subslices; i++) {
+ memcpy(ptr, topo->mask, i915_topo->eu_stride);
+ ptr += i915_topo->eu_stride;
+ }
+ break;
+ default:
+ igt_assert(0);
+ }
+next:
+ query.size -= sz;
+ pos += sz;
+ }
+
+ free(xe_topo);
+
+ return i915_topo;
+}
+
+static struct intel_perf *
+xe_perf_for_fd(int drm_fd, int gt)
+{
+ uint32_t device_id;
+ uint32_t device_revision = 0;
+ uint32_t timestamp_frequency;
+ uint64_t gt_min_freq = 0;
+ uint64_t gt_max_freq = 0;
+ struct drm_i915_query_topology_info *topology;
+ struct intel_perf *ret;
+ int sysfs_dir_fd = open_master_sysfs_dir(drm_fd);
+ char path_min[64], path_max[64];
+
+ if (sysfs_dir_fd < 0) {
+ igt_warn("open_master_sysfs_dir failed\n");
+ return NULL;
+ }
+
+ if (IS_PONTEVECCHIO(xe_dev_id(drm_fd))) {
+ sprintf(path_min, "device/tile%d/gt%d/freq%d/min_freq", gt, gt, gt);
+ sprintf(path_max, "device/tile%d/gt%d/freq%d/max_freq", gt, gt, gt);
+ } else {
+ sprintf(path_min, "device/tile0/gt%d/freq%d/min_freq", gt, gt);
+ sprintf(path_max, "device/tile0/gt%d/freq%d/max_freq", gt, gt);
+ }
+
+ if (!read_sysfs(sysfs_dir_fd, path_min, >_min_freq) ||
+ !read_sysfs(sysfs_dir_fd, path_max, >_max_freq)) {
+ igt_warn("Unable to read freqs from sysfs\n");
+ close(sysfs_dir_fd);
+ return NULL;
+ }
+ close(sysfs_dir_fd);
+
+ device_id = intel_get_drm_devid(drm_fd);
+ timestamp_frequency = xe_oa_units(drm_fd)->oa_units[0].oa_timestamp_freq;
+
+ topology = xe_fill_i915_topology_info(drm_fd);
+ if (!topology) {
+ igt_warn("xe_fill_i915_topology_info failed\n");
+ return NULL;
+ }
+
+ ret = intel_perf_for_devinfo(device_id,
+ device_revision,
+ timestamp_frequency,
+ gt_min_freq * 1000000,
+ gt_max_freq * 1000000,
+ topology);
+ if (!ret)
+ igt_warn("intel_perf_for_devinfo failed\n");
+
+ free(topology);
+
+ return ret;
+}
+
+struct intel_perf *
+intel_perf_for_fd(int drm_fd, int gt)
+{
+ if (!is_xe_device(drm_fd))
+ return NULL;
+
+ return xe_perf_for_fd(drm_fd, gt);
+}
+
+void
+intel_perf_free(struct intel_perf *perf)
+{
+ struct intel_perf_metric_set *metric_set, *tmp;
+
+ intel_perf_logical_counter_group_free(perf->root_group);
+
+ igt_list_for_each_entry_safe(metric_set, tmp, &perf->metric_sets, link) {
+ igt_list_del(&metric_set->link);
+ intel_perf_metric_set_free(metric_set);
+ }
+
+ free(perf);
+}
+
+void
+intel_perf_add_logical_counter(struct intel_perf *perf,
+ struct intel_perf_logical_counter *counter,
+ const char *group_path)
+{
+ const char *group_path_end = group_path + strlen(group_path);
+ struct intel_perf_logical_counter_group *group = perf->root_group, *child_group = NULL;
+ const char *name = group_path;
+
+ while (name < group_path_end) {
+ const char *name_end = strstr(name, "/");
+ char group_name[128] = { 0, };
+ struct intel_perf_logical_counter_group *iter_group;
+
+ if (!name_end)
+ name_end = group_path_end;
+
+ memcpy(group_name, name, name_end - name);
+
+ child_group = NULL;
+ igt_list_for_each_entry(iter_group, &group->groups, link) {
+ if (!strcmp(iter_group->name, group_name)) {
+ child_group = iter_group;
+ break;
+ }
+ }
+
+ if (!child_group)
+ child_group = intel_perf_logical_counter_group_new(perf, group, group_name);
+
+ name = name_end + 1;
+ group = child_group;
+ }
+
+ igt_list_add_tail(&counter->link, &child_group->counters);
+}
+
+void
+intel_perf_add_metric_set(struct intel_perf *perf,
+ struct intel_perf_metric_set *metric_set)
+{
+ igt_list_add_tail(&metric_set->link, &perf->metric_sets);
+}
+
+static void
+load_metric_set_config(struct intel_perf_metric_set *metric_set, int drm_fd)
+{
+ struct drm_xe_oa_config config;
+ u8 *regs;
+ int ret;
+
+ memset(&config, 0, sizeof(config));
+
+ memcpy(config.uuid, metric_set->hw_config_guid, sizeof(config.uuid));
+
+ config.n_regs = metric_set->n_mux_regs +
+ metric_set->n_b_counter_regs +
+ metric_set->n_flex_regs;
+ config.regs_ptr = to_user_pointer(malloc(2 * config.n_regs * sizeof(u32)));
+ igt_assert(config.regs_ptr);
+ regs = (u8 *)config.regs_ptr;
+
+ memcpy(regs, metric_set->mux_regs, 2 * metric_set->n_mux_regs * sizeof(u32));
+ regs += 2 * metric_set->n_mux_regs * sizeof(u32);
+ memcpy(regs, metric_set->b_counter_regs, 2 * metric_set->n_b_counter_regs * sizeof(u32));
+ regs += 2 * metric_set->n_b_counter_regs * sizeof(u32);
+ memcpy(regs, metric_set->flex_regs, 2 * metric_set->n_flex_regs * sizeof(u32));
+ regs += 2 * metric_set->n_flex_regs * sizeof(u32);
+
+ ret = xe_perf_ioctl(drm_fd, DRM_XE_PERF_OP_ADD_CONFIG, &config);
+ if (ret >= 0)
+ metric_set->perf_oa_metrics_set = ret;
+
+ free((void *)config.regs_ptr);
+}
+
+void
+intel_perf_load_perf_configs(struct intel_perf *perf, int drm_fd)
+{
+ int sysfs_dir_fd = open_master_sysfs_dir(drm_fd);
+ struct dirent *entry;
+ int metrics_dir_fd;
+ DIR *metrics_dir;
+ struct intel_perf_metric_set *metric_set;
+
+ if (sysfs_dir_fd < 0)
+ return;
+
+ metrics_dir_fd = openat(sysfs_dir_fd, "metrics", O_DIRECTORY);
+ close(sysfs_dir_fd);
+ if (metrics_dir_fd < -1)
+ return;
+
+ metrics_dir = fdopendir(metrics_dir_fd);
+ if (!metrics_dir) {
+ close(metrics_dir_fd);
+ return;
+ }
+
+ while ((entry = readdir(metrics_dir))) {
+ bool metric_id_read;
+ uint64_t metric_id;
+ char path[256 + 4];
+ int id_fd;
+
+ if (entry->d_type != DT_DIR)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/id", entry->d_name);
+
+ id_fd = openat(metrics_dir_fd, path, O_RDONLY);
+ if (id_fd < 0)
+ continue;
+
+ metric_id_read = read_fd_uint64(id_fd, &metric_id);
+ close(id_fd);
+
+ if (!metric_id_read)
+ continue;
+
+ igt_list_for_each_entry(metric_set, &perf->metric_sets, link) {
+ if (!strcmp(metric_set->hw_config_guid, entry->d_name)) {
+ metric_set->perf_oa_metrics_set = metric_id;
+ break;
+ }
+ }
+ }
+
+ closedir(metrics_dir);
+
+ igt_list_for_each_entry(metric_set, &perf->metric_sets, link) {
+ if (metric_set->perf_oa_metrics_set)
+ continue;
+
+ load_metric_set_config(metric_set, drm_fd);
+ }
+}
+
+static void xe_oa_prop_to_ext(struct drm_xe_oa_open_prop *properties,
+ struct drm_xe_ext_set_property *extn)
+{
+ __u64 *prop = (__u64 *)properties->properties_ptr;
+ struct drm_xe_ext_set_property *ext = extn;
+ int i, j;
+
+ for (i = 0; i < properties->num_properties; i++) {
+ ext->base.name = DRM_XE_OA_EXTENSION_SET_PROPERTY;
+ ext->property = *prop++;
+ ext->value = *prop++;
+ ext++;
+ }
+
+ igt_assert_lte(1, i);
+ ext = extn;
+ for (j = 0; j < i - 1; j++)
+ ext[j].base.next_extension = (__u64)&ext[j + 1];
+}
+
+int xe_perf_ioctl(int fd, enum drm_xe_perf_op op, void *arg)
+{
+#define XE_OA_MAX_SET_PROPERTIES 16
+
+ struct drm_xe_ext_set_property ext[XE_OA_MAX_SET_PROPERTIES] = {};
+
+ /* Chain the PERF layer struct */
+ struct drm_xe_perf_param p = {
+ .extensions = 0,
+ .perf_type = DRM_XE_PERF_TYPE_OA,
+ .perf_op = op,
+ .param = (__u64)((op == DRM_XE_PERF_OP_STREAM_OPEN) ? ext : arg),
+ };
+
+ if (op == DRM_XE_PERF_OP_STREAM_OPEN) {
+ struct drm_xe_oa_open_prop *oprop = (struct drm_xe_oa_open_prop *)arg;
+
+ igt_assert_lte(oprop->num_properties, XE_OA_MAX_SET_PROPERTIES);
+ xe_oa_prop_to_ext(oprop, ext);
+ }
+
+ return igt_ioctl(fd, DRM_IOCTL_XE_PERF, &p);
+}
+
+void xe_perf_ioctl_err(int fd, enum drm_xe_perf_op op, void *arg, int err)
+{
+ igt_assert_eq(xe_perf_ioctl(fd, op, arg), -1);
+ igt_assert_eq(errno, err);
+ errno = 0;
+}
diff --git a/lib/xe/xe_oa.h b/lib/xe/xe_oa.h
new file mode 100644
index 000000000000..f3a9d1f6c7b4
--- /dev/null
+++ b/lib/xe/xe_oa.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef PERF_METRICS_H
+#define PERF_METRICS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "igt_list.h"
+#include <xe_drm.h>
+
+#define _DIV_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
+
+#define INTEL_DEVICE_MAX_SLICES (8)
+#define INTEL_DEVICE_MAX_SUBSLICES (32)
+#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
+
+struct intel_perf_devinfo {
+ char devname[20];
+
+ /* The following fields are prepared for equations from the XML files.
+ * Their values are build up from the topology fields.
+ */
+ uint32_t devid;
+ uint32_t graphics_ver;
+ uint32_t revision;
+ /**
+ * Bit shifting required to put OA report timestamps into
+ * timestamp_frequency (some HW generations can shift
+ * timestamp values to the right by a number of bits).
+ */
+ int32_t oa_timestamp_shift;
+ /**
+ * On some platforms only part of the timestamp bits are valid
+ * (on previous platforms we would get full 32bits, newer
+ * platforms can have fewer). It's important to know when
+ * correlating the full 36bits timestamps to the OA report
+ * timestamps.
+ */
+ uint64_t oa_timestamp_mask;
+ /* Frequency of the timestamps in Hz */
+ uint64_t timestamp_frequency;
+ uint64_t gt_min_freq;
+ uint64_t gt_max_freq;
+
+ /* Total number of EUs */
+ uint64_t n_eus;
+ /* Total number of EUs in a slice */
+ uint64_t n_eu_slices;
+ /* Total number of subslices/dualsubslices */
+ uint64_t n_eu_sub_slices;
+ /* Number of subslices/dualsubslices in the first half of the
+ * slices.
+ */
+ uint64_t n_eu_sub_slices_half_slices;
+ /* Mask of available subslices/dualsubslices */
+ uint64_t subslice_mask;
+ /* Mask of available slices */
+ uint64_t slice_mask;
+ /* Number of threads in one EU */
+ uint64_t eu_threads_count;
+
+ /**
+ * Maximu number of slices present on this device (can be more than
+ * num_slices if some slices are fused).
+ */
+ uint16_t max_slices;
+
+ /**
+ * Maximu number of subslices per slice present on this device (can be more
+ * than the maximum value in the num_subslices[] array if some subslices are
+ * fused).
+ */
+ uint16_t max_subslices_per_slice;
+
+ /**
+ * Stride to access subslice_masks[].
+ */
+ uint16_t subslice_slice_stride;
+
+ /**
+ * Maximum number of EUs per subslice (can be more than
+ * num_eu_per_subslice if some EUs are fused off).
+ */
+ uint16_t max_eu_per_subslice;
+
+ /**
+ * Strides to access eu_masks[].
+ */
+ uint16_t eu_slice_stride;
+ uint16_t eu_subslice_stride;
+
+ /**
+ * A bit mask of the slices available.
+ */
+ uint8_t slice_masks[_DIV_ROUND_UP(INTEL_DEVICE_MAX_SLICES, 8)];
+
+ /**
+ * An array of bit mask of the subslices available, use subslice_slice_stride
+ * to access this array.
+ */
+ uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
+ _DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
+
+ /**
+ * An array of bit mask of EUs available, use eu_slice_stride &
+ * eu_subslice_stride to access this array.
+ */
+ uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES *
+ INTEL_DEVICE_MAX_SUBSLICES *
+ _DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
+};
+
+typedef enum {
+ INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT64,
+ INTEL_PERF_LOGICAL_COUNTER_STORAGE_UINT32,
+ INTEL_PERF_LOGICAL_COUNTER_STORAGE_DOUBLE,
+ INTEL_PERF_LOGICAL_COUNTER_STORAGE_FLOAT,
+ INTEL_PERF_LOGICAL_COUNTER_STORAGE_BOOL32,
+} intel_perf_logical_counter_storage_t;
+
+typedef enum {
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_RAW,
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_DURATION_RAW,
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_DURATION_NORM,
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_EVENT,
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT,
+ INTEL_PERF_LOGICAL_COUNTER_TYPE_TIMESTAMP,
+} intel_perf_logical_counter_type_t;
+
+typedef enum {
+ /* size */
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_BYTES,
+
+ /* frequency */
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_HZ,
+
+ /* time */
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_NS,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_US,
+
+ /**/
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_PIXELS,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_TEXELS,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_THREADS,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_PERCENT,
+
+ /* events */
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_MESSAGES,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_NUMBER,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_CYCLES,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_EVENTS,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_UTILIZATION,
+
+ /**/
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_REQUESTS_TO_L3_CACHE_LINES,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_EU_BYTES_PER_L3_CACHE_LINE,
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_GBPS,
+
+ INTEL_PERF_LOGICAL_COUNTER_UNIT_MAX
+} intel_perf_logical_counter_unit_t;
+
+/* Hold deltas of raw performance counters. */
+struct intel_perf_accumulator {
+#define INTEL_PERF_MAX_RAW_OA_COUNTERS 64
+ uint64_t deltas[INTEL_PERF_MAX_RAW_OA_COUNTERS];
+};
+
+struct intel_perf;
+struct intel_perf_metric_set;
+struct intel_perf_logical_counter {
+ const struct intel_perf_metric_set *metric_set;
+ const char *name;
+ const char *symbol_name;
+ const char *desc;
+ const char *group;
+ bool (*availability)(const struct intel_perf *perf);
+ intel_perf_logical_counter_storage_t storage;
+ intel_perf_logical_counter_type_t type;
+ intel_perf_logical_counter_unit_t unit;
+ union {
+ uint64_t (*max_uint64)(const struct intel_perf *perf,
+ const struct intel_perf_metric_set *metric_set,
+ uint64_t *deltas);
+ double (*max_float)(const struct intel_perf *perf,
+ const struct intel_perf_metric_set *metric_set,
+ uint64_t *deltas);
+ };
+
+ union {
+ uint64_t (*read_uint64)(const struct intel_perf *perf,
+ const struct intel_perf_metric_set *metric_set,
+ uint64_t *deltas);
+ double (*read_float)(const struct intel_perf *perf,
+ const struct intel_perf_metric_set *metric_set,
+ uint64_t *deltas);
+ };
+
+ struct igt_list_head link; /* list from intel_perf_logical_counter_group.counters */
+};
+
+struct intel_perf_register_prog {
+ uint32_t reg;
+ uint32_t val;
+};
+
+struct intel_perf_metric_set {
+ const char *name;
+ const char *symbol_name;
+ const char *hw_config_guid;
+
+ struct intel_perf_logical_counter *counters;
+ int n_counters;
+
+ uint64_t perf_oa_metrics_set;
+ int perf_oa_format;
+ int perf_raw_size;
+
+ /* For indexing into accumulator->deltas[] ... */
+ int gpu_time_offset;
+ int gpu_clock_offset;
+ int a_offset;
+ int b_offset;
+ int c_offset;
+ int perfcnt_offset;
+
+ const struct intel_perf_register_prog *b_counter_regs;
+ uint32_t n_b_counter_regs;
+
+ const struct intel_perf_register_prog *mux_regs;
+ uint32_t n_mux_regs;
+
+ const struct intel_perf_register_prog *flex_regs;
+ uint32_t n_flex_regs;
+
+ struct igt_list_head link;
+};
+
+/* A tree structure with group having subgroups and counters. */
+struct intel_perf_logical_counter_group {
+ char *name;
+
+ struct igt_list_head counters;
+ struct igt_list_head groups;
+
+ struct igt_list_head link; /* link for intel_perf_logical_counter_group.groups */
+};
+
+struct intel_perf {
+ const char *name;
+
+ struct intel_perf_logical_counter_group *root_group;
+
+ struct igt_list_head metric_sets;
+
+ struct intel_perf_devinfo devinfo;
+};
+
+struct drm_i915_query_topology_info;
+
+static inline bool
+intel_perf_devinfo_slice_available(const struct intel_perf_devinfo *devinfo,
+ int slice)
+{
+ return (devinfo->slice_masks[slice / 8] & (1U << (slice % 8))) != 0;
+}
+
+static inline bool
+intel_perf_devinfo_subslice_available(const struct intel_perf_devinfo *devinfo,
+ int slice, int subslice)
+{
+ return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride +
+ subslice / 8] & (1U << (subslice % 8))) != 0;
+}
+
+static inline bool
+intel_perf_devinfo_eu_available(const struct intel_perf_devinfo *devinfo,
+ int slice, int subslice, int eu)
+{
+ unsigned subslice_offset = slice * devinfo->eu_slice_stride +
+ subslice * devinfo->eu_subslice_stride;
+
+ return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
+}
+
+struct drm_i915_query_topology_info *xe_fill_i915_topology_info(int drm_fd);
+struct intel_perf *intel_perf_for_fd(int drm_fd, int gt);
+struct intel_perf *intel_perf_for_devinfo(uint32_t device_id,
+ uint32_t revision,
+ uint64_t timestamp_frequency,
+ uint64_t gt_min_freq,
+ uint64_t gt_max_freq,
+ const struct drm_i915_query_topology_info *topology);
+void intel_perf_free(struct intel_perf *perf);
+
+void intel_perf_add_logical_counter(struct intel_perf *perf,
+ struct intel_perf_logical_counter *counter,
+ const char *group);
+
+void intel_perf_add_metric_set(struct intel_perf *perf,
+ struct intel_perf_metric_set *metric_set);
+
+void intel_perf_load_perf_configs(struct intel_perf *perf, int drm_fd);
+
+
+struct drm_xe_oa_open_prop {
+ uint32_t num_properties;
+ uint32_t reserved;
+ uint64_t properties_ptr;
+};
+
+int xe_perf_ioctl(int fd, enum drm_xe_perf_op op, void *arg);
+void xe_perf_ioctl_err(int fd, enum drm_xe_perf_op op, void *arg, int err);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* PERF_METRICS_H */
diff --git a/lib/xe/xe_query.c b/lib/xe/xe_query.c
index 729fba6b1a43..9e08caa74b1b 100644
--- a/lib/xe/xe_query.c
+++ b/lib/xe/xe_query.c
@@ -114,6 +114,27 @@ static struct drm_xe_query_mem_regions *xe_query_mem_regions_new(int fd)
return mem_regions;
}
+static struct drm_xe_query_oa_units *xe_query_oa_units_new(int fd)
+{
+ struct drm_xe_query_oa_units *oa_units;
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_OA_UNITS,
+ .size = 0,
+ .data = 0,
+ };
+
+ igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+
+ oa_units = malloc(query.size);
+ igt_assert(oa_units);
+
+ query.data = to_user_pointer(oa_units);
+ igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
+
+ return oa_units;
+}
+
static uint64_t native_region_for_gt(const struct drm_xe_query_gt_list *gt_list, int gt)
{
uint64_t region;
@@ -251,6 +272,7 @@ struct xe_device *xe_device_get(int fd)
xe_dev->memory_regions = __memory_regions(xe_dev->gt_list);
xe_dev->engines = xe_query_engines(fd);
xe_dev->mem_regions = xe_query_mem_regions_new(fd);
+ xe_dev->oa_units = xe_query_oa_units_new(fd);
xe_dev->vram_size = calloc(xe_dev->gt_list->num_gt, sizeof(*xe_dev->vram_size));
xe_dev->visible_vram_size = calloc(xe_dev->gt_list->num_gt, sizeof(*xe_dev->visible_vram_size));
for (int gt = 0; gt < xe_dev->gt_list->num_gt; gt++) {
@@ -524,6 +546,22 @@ uint32_t xe_min_page_size(int fd, uint64_t region)
*/
xe_dev_FN(xe_config, config, struct drm_xe_query_config *);
+/**
+ * xe_gt_list:
+ * @fd: xe device fd
+ *
+ * Returns query gts of xe device @fd.
+ */
+xe_dev_FN(xe_gt_list, gt_list, struct drm_xe_query_gt_list *);
+
+/**
+ * xe_oa_units:
+ * @fd: xe device fd
+ *
+ * Returns query gts of xe device @fd.
+ */
+xe_dev_FN(xe_oa_units, oa_units, struct drm_xe_query_oa_units *);
+
/**
* xe_number_engine:
* @fd: xe device fd
diff --git a/lib/xe/xe_query.h b/lib/xe/xe_query.h
index 2460384c99af..5e2b7d223a65 100644
--- a/lib/xe/xe_query.h
+++ b/lib/xe/xe_query.h
@@ -38,6 +38,9 @@ struct xe_device {
/** @mem_regions: regions memory information and usage */
struct drm_xe_query_mem_regions *mem_regions;
+ /** @oa_units: information about OA units */
+ struct drm_xe_query_oa_units *oa_units;
+
/** @vram_size: array of vram sizes for all gt_list */
uint64_t *vram_size;
@@ -85,6 +88,8 @@ const char *xe_region_name(uint64_t region);
uint16_t xe_region_class(int fd, uint64_t region);
uint32_t xe_min_page_size(int fd, uint64_t region);
struct drm_xe_query_config *xe_config(int fd);
+struct drm_xe_query_gt_list *xe_gt_list(int fd);
+struct drm_xe_query_oa_units *xe_oa_units(int fd);
unsigned int xe_number_engines(int fd);
bool xe_has_vram(int fd);
uint64_t xe_vram_size(int fd, int gt);
--
2.41.0
More information about the igt-dev
mailing list