[PATCH i-g-t 03/15] tests/intel/xe_oa: Add first tests

Ashutosh Dixit ashutosh.dixit at intel.com
Fri Jul 5 18:26:24 UTC 2024


Add "xe-ref-count" and "sysctl-defaults" subtests.

v2: Set INTEL_XE_DEVICE_MAX_SUBSLICES to 64 (value on PVC)
v3: Rename xe perf layer as xe observation layer
v4: Run on Xe2+ only

Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
 lib/xe/xe_oa.h      |   2 +-
 tests/intel/xe_oa.c | 808 ++++++++++++++++++++++++++++++++++++++++++++
 tests/meson.build   |   2 +
 3 files changed, 811 insertions(+), 1 deletion(-)
 create mode 100644 tests/intel/xe_oa.c

diff --git a/lib/xe/xe_oa.h b/lib/xe/xe_oa.h
index d5f59a3815..962f9dddcc 100644
--- a/lib/xe/xe_oa.h
+++ b/lib/xe/xe_oa.h
@@ -19,7 +19,7 @@ extern "C" {
 #define _DIV_ROUND_UP(a, b)  (((a) + (b) - 1) / (b))
 
 #define INTEL_XE_DEVICE_MAX_SLICES           (8)
-#define INTEL_XE_DEVICE_MAX_SUBSLICES        (32)
+#define INTEL_XE_DEVICE_MAX_SUBSLICES        (64) /* Maximum on XE_PVC */
 #define INTEL_XE_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
 
 enum intel_xe_oa_format_name {
diff --git a/tests/intel/xe_oa.c b/tests/intel/xe_oa.c
new file mode 100644
index 0000000000..5d6546528d
--- /dev/null
+++ b/tests/intel/xe_oa.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+#include <math.h>
+
+#include "drm.h"
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_sysfs.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+#include "xe/xe_oa.h"
+
+/**
+ * TEST: perf
+ * Description: Test the Xe OA metrics streaming interface
+ * Category: Core
+ * Mega feature: Performance interface
+ * Sub-category: Performance tests
+ * Functionality: oa
+ * Feature: xe streaming interface, oa
+ * Test category: Perf
+ */
+
+#define OA_MI_REPORT_PERF_COUNT		((0x28 << 23) | (4 - 2))
+
+#define OAREPORT_REASON_MASK           0x3f
+#define OAREPORT_REASON_SHIFT          19
+#define OAREPORT_REASON_TIMER          (1<<0)
+#define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_CTX_SWITCH     (1<<3)
+#define OAREPORT_REASON_GO             (1<<4)
+#define OAREPORT_REASON_CLK_RATIO      (1<<5)
+
+#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET	(1 << 19)
+#define PIPE_CONTROL_SYNC_GFDT	  (1 << 17)
+#define PIPE_CONTROL_NO_WRITE	   (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE    (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT  (2 << 14)
+#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
+#define PIPE_CONTROL_ISP_DIS	    (1 << 9)
+#define PIPE_CONTROL_INTERRUPT_ENABLE   (1 << 8)
+/* GT */
+#define PIPE_CONTROL_DATA_CACHE_INVALIDATE      (1 << 5)
+#define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE   (1 << 2)
+
+#define MAX_OA_BUF_SIZE (16 * 1024 * 1024)
+#define OA_BUFFER_SIZE MAX_OA_BUF_SIZE
+
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+/*
+ * Engine specific registers defined as offsets from engine->mmio_base. For
+ * these registers, OR bit[0] with 1 so we can add the mmio_base when running
+ * engine specific test.
+ */
+#define MMIO_BASE_OFFSET 0x1
+
+#define OAG_OASTATUS (0xdafc)
+#define OAG_PERF_COUNTER_B(idx) (0xDA94 + 4 * (idx))
+#define OAG_OATAILPTR (0xdb04)
+#define OAG_OATAILPTR_MASK 0xffffffc0
+#define OAG_OABUFFER (0xdb08)
+
+#define XE_OA_MAX_SET_PROPERTIES 16
+
+#define ADD_PROPS(_head, _tail, _key, _value)	\
+	do { \
+		igt_assert((_tail - _head) < (XE_OA_MAX_SET_PROPERTIES * 2)); \
+		*_tail++ = DRM_XE_OA_PROPERTY_##_key; \
+		*_tail++ = _value; \
+	} while (0)
+
+struct accumulator {
+#define MAX_RAW_OA_COUNTERS 62
+	enum intel_xe_oa_format_name format;
+
+	uint64_t deltas[MAX_RAW_OA_COUNTERS];
+};
+
+/* OA unit types */
+enum {
+	OAG,
+	OAR,
+	OAM,
+
+	MAX_OA_TYPE,
+};
+
+struct oa_format {
+	const char *name;
+	size_t size;
+	int a40_high_off; /* bytes */
+	int a40_low_off;
+	int n_a40;
+	int a64_off;
+	int n_a64;
+	int a_off;
+	int n_a;
+	int first_a;
+	int first_a40;
+	int b_off;
+	int n_b;
+	int c_off;
+	int n_c;
+	int oa_type; /* of enum intel_xe_oa_format_name */
+	bool report_hdr_64bit;
+	int counter_select;
+	int counter_size;
+	int bc_report;
+};
+
+static struct oa_format gen12_oa_formats[XE_OA_FORMAT_MAX] = {
+	[XE_OA_FORMAT_A32u40_A4u32_B8_C8] = {
+		"A32u40_A4u32_B8_C8", .size = 256,
+		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
+		.a_off = 144, .n_a = 4, .first_a = 32,
+		.b_off = 192, .n_b = 8,
+		.c_off = 224, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAG,
+		.counter_select = 5,
+	},
+};
+
+static struct oa_format dg2_oa_formats[XE_OA_FORMAT_MAX] = {
+	[XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = {
+		"A32u40_A4u32_B8_C8", .size = 256,
+		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
+		.a_off = 144, .n_a = 4, .first_a = 32,
+		.b_off = 192, .n_b = 8,
+		.c_off = 224, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAR,
+		.counter_select = 5,
+	},
+	/* This format has A36 and A37 interleaved with high bytes of some A
+	 * counters, so we will accumulate only subset of counters.
+	 */
+	[XE_OA_FORMAT_A24u40_A14u32_B8_C8] = {
+		"A24u40_A14u32_B8_C8", .size = 256,
+		/* u40: A4 - A23 */
+		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 20, .first_a40 = 4,
+		/* u32: A0 - A3 */
+		.a_off = 16, .n_a = 4,
+		.b_off = 192, .n_b = 8,
+		.c_off = 224, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAG,
+		.counter_select = 5,
+	},
+	/* This format has 24 u64 counters ranging from A0 - A35. Until we come
+	 * up with a better mechanism to define missing counters, we will use a
+	 * subset of counters that are indexed by one-increments - A28 - A35.
+	 */
+	[XE_OAC_FORMAT_A24u64_B8_C8] = {
+		"OAC_A24u64_B8_C8", .size = 320,
+		.a64_off = 160, .n_a64 = 8,
+		.b_off = 224, .n_b = 8,
+		.c_off = 256, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAC,
+		.report_hdr_64bit = true,
+		.counter_select = 1, },
+};
+
+static struct oa_format mtl_oa_formats[XE_OA_FORMAT_MAX] = {
+	[XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = {
+		"A32u40_A4u32_B8_C8", .size = 256,
+		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
+		.a_off = 144, .n_a = 4, .first_a = 32,
+		.b_off = 192, .n_b = 8,
+		.c_off = 224, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAR,
+		.counter_select = 5,
+	},
+	/* This format has A36 and A37 interleaved with high bytes of some A
+	 * counters, so we will accumulate only subset of counters.
+	 */
+	[XE_OA_FORMAT_A24u40_A14u32_B8_C8] = {
+		"A24u40_A14u32_B8_C8", .size = 256,
+		/* u40: A4 - A23 */
+		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 20, .first_a40 = 4,
+		/* u32: A0 - A3 */
+		.a_off = 16, .n_a = 4,
+		.b_off = 192, .n_b = 8,
+		.c_off = 224, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAG,
+		.counter_select = 5,
+	},
+
+	/* Treat MPEC countes as A counters for now */
+	[XE_OAM_FORMAT_MPEC8u64_B8_C8] = {
+		"MPEC8u64_B8_C8", .size = 192,
+		.a64_off = 32, .n_a64 = 8,
+		.b_off = 96, .n_b = 8,
+		.c_off = 128, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAM_MPEC,
+		.report_hdr_64bit = true,
+		.counter_select = 1,
+	},
+	[XE_OAM_FORMAT_MPEC8u32_B8_C8] = {
+		"MPEC8u32_B8_C8", .size = 128,
+		.a_off = 32, .n_a = 8,
+		.b_off = 64, .n_b = 8,
+		.c_off = 96, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAM_MPEC,
+		.report_hdr_64bit = true,
+		.counter_select = 2,
+	},
+	/* This format has 24 u64 counters ranging from A0 - A35. Until we come
+	 * up with a better mechanism to define missing counters, we will use a
+	 * subset of counters that are indexed by one-increments - A28 - A35.
+	 */
+	[XE_OAC_FORMAT_A24u64_B8_C8] = {
+		"OAC_A24u64_B8_C8", .size = 320,
+		.a64_off = 160, .n_a64 = 8,
+		.b_off = 224, .n_b = 8,
+		.c_off = 256, .n_c = 8, .oa_type = DRM_XE_OA_FMT_TYPE_OAC,
+		.report_hdr_64bit = true,
+		.counter_select = 1, },
+};
+
+static struct oa_format lnl_oa_formats[XE_OA_FORMAT_MAX] = {
+	[XE_OA_FORMAT_PEC64u64] = {
+		"PEC64u64", .size = 576,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 1,
+		.counter_size = 1,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC64u64_B8_C8] = {
+		"PEC64u64_B8_C8", .size = 640,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 1,
+		.counter_size = 1,
+		.bc_report = 1 },
+	[XE_OA_FORMAT_PEC64u32] = {
+		"PEC64u32", .size = 320,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 1,
+		.counter_size = 0,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC32u64_G1] = {
+		"PEC32u64_G1", .size = 320,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 5,
+		.counter_size = 1,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC32u32_G1] = {
+		"PEC32u32_G1", .size = 192,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 5,
+		.counter_size = 0,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC32u64_G2] = {
+		"PEC32u64_G2", .size = 320,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 6,
+		.counter_size = 1,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC32u32_G2] = {
+		"PEC32u64_G2", .size = 192,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 6,
+		.counter_size = 0,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = {
+		"PEC36u64_G1_32_G2_4", .size = 320,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 3,
+		.counter_size = 1,
+		.bc_report = 0 },
+	[XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = {
+		"PEC36u64_G1_4_G2_32_G2", .size = 320,
+		.oa_type = DRM_XE_OA_FMT_TYPE_PEC,
+		.report_hdr_64bit = true,
+		.counter_select = 4,
+		.counter_size = 1,
+		.bc_report = 0 },
+};
+
+static int drm_fd = -1;
+static int sysfs = -1;
+static int pm_fd = -1;
+static int stream_fd = -1;
+static uint32_t devid;
+
+struct drm_xe_engine_class_instance default_hwe;
+
+static struct intel_xe_perf *intel_xe_perf;
+static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
+
+static struct intel_xe_perf_metric_set *metric_set(const struct drm_xe_engine_class_instance *hwe)
+{
+	const char *test_set_name = NULL;
+	struct intel_xe_perf_metric_set *metric_set_iter;
+	struct intel_xe_perf_metric_set *test_set = NULL;
+
+	if (hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER ||
+	    hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
+		test_set_name = "TestOa";
+	else if ((hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE ||
+		  hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE) &&
+		 HAS_OAM(devid))
+		test_set_name = "MediaSet1";
+	else
+		igt_assert(!"reached");
+
+	igt_list_for_each_entry(metric_set_iter, &intel_xe_perf->metric_sets, link) {
+		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
+			test_set = metric_set_iter;
+			break;
+		}
+	}
+
+	igt_assert(test_set);
+
+	/*
+	 * configuration was loaded in init_sys_info() ->
+	 * intel_xe_perf_load_perf_configs(), and test_set->perf_oa_metrics_set
+	 * should point to metric id returned by the config add ioctl. 0 is
+	 * invalid.
+	 */
+	igt_assert_neq_u64(test_set->perf_oa_metrics_set, 0);
+
+	igt_debug("engine %d:%d - %s metric set UUID = %s\n",
+		  hwe->engine_class,
+		  hwe->engine_instance,
+		  test_set->symbol_name,
+		  test_set->hw_config_guid);
+
+	return test_set;
+}
+#define default_test_set metric_set(&default_hwe)
+
+static void set_fd_flags(int fd, int flags)
+{
+	int old = fcntl(fd, F_GETFL, 0);
+
+	igt_assert_lte(0, old);
+	igt_assert_eq(0, fcntl(fd, F_SETFL, old | flags));
+}
+
+static u32 get_stream_status(int fd)
+{
+	struct drm_xe_oa_stream_status status;
+
+	do_ioctl(fd, DRM_XE_OBSERVATION_IOCTL_STATUS, &status);
+
+	return status.oa_status;
+}
+
+static void
+dump_report(const uint32_t *report, uint32_t size, const char *message) {
+	uint32_t i;
+	igt_debug("%s\n", message);
+	for (i = 0; i < size; i += 4) {
+		igt_debug("%08x %08x %08x %08x\n",
+				report[i],
+				report[i + 1],
+				report[i + 2],
+				report[i + 3]);
+	}
+}
+
+static struct oa_format
+get_oa_format(enum intel_xe_oa_format_name format)
+{
+	if (IS_DG2(devid))
+		return dg2_oa_formats[format];
+	else if (IS_METEORLAKE(devid))
+		return mtl_oa_formats[format];
+	else if (intel_graphics_ver(devid) >= IP_VER(20, 0))
+		return lnl_oa_formats[format];
+	else
+		return gen12_oa_formats[format];
+}
+
+static u64 oa_format_fields(u64 name)
+{
+#define FIELD_PREP_ULL(_mask, _val) \
+	(((_val) << (__builtin_ffsll(_mask) - 1)) & (_mask))
+
+	struct oa_format f = get_oa_format(name);
+
+	/* 0 format name is invalid */
+	if (!name)
+		memset(&f, 0xff, sizeof(f));
+
+	return FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, (u64)f.oa_type) |
+		FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, (u64)f.counter_select) |
+		FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, (u64)f.counter_size) |
+		FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, (u64)f.bc_report);
+}
+#define __ff oa_format_fields
+
+static void
+__perf_close(int fd)
+{
+	close(fd);
+	stream_fd = -1;
+
+	if (pm_fd >= 0) {
+		close(pm_fd);
+		pm_fd = -1;
+	}
+}
+
+static int
+__perf_open(int fd, struct intel_xe_oa_open_prop *param, bool prevent_pm)
+{
+	int ret;
+	int32_t pm_value = 0;
+
+	if (stream_fd >= 0)
+		__perf_close(stream_fd);
+	if (pm_fd >= 0) {
+		close(pm_fd);
+		pm_fd = -1;
+	}
+
+	ret = intel_xe_perf_ioctl(fd, DRM_XE_OBSERVATION_OP_STREAM_OPEN, param);
+
+	igt_assert(ret >= 0);
+	errno = 0;
+
+	if (prevent_pm) {
+		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
+		igt_assert(pm_fd >= 0);
+
+		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
+	}
+
+	return ret;
+}
+
+static uint64_t
+read_u64_file(const char *path)
+{
+	FILE *f;
+	uint64_t val;
+
+	f = fopen(path, "r");
+	igt_assert(f);
+
+	igt_assert_eq(fscanf(f, "%"PRIu64, &val), 1);
+
+	fclose(f);
+
+	return val;
+}
+
+static void
+write_u64_file(const char *path, uint64_t val)
+{
+	FILE *f;
+
+	f = fopen(path, "w");
+	igt_assert(f);
+
+	igt_assert(fprintf(f, "%"PRIu64, val) > 0);
+
+	fclose(f);
+}
+
+static uint32_t
+report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
+static uint64_t
+oa_timestamp(const uint32_t *report, enum intel_xe_oa_format_name format)
+{
+	struct oa_format fmt = get_oa_format(format);
+
+	return fmt.report_hdr_64bit ? *(uint64_t *)&report[2] : report[1];
+}
+
+static uint64_t
+timebase_scale(uint64_t delta)
+{
+	return (delta * NSEC_PER_SEC) / intel_xe_perf->devinfo.timestamp_frequency;
+}
+
+/* Returns: the largest OA exponent that will still result in a sampling period
+ * less than or equal to the given @period.
+ */
+static int
+max_oa_exponent_for_period_lte(uint64_t period)
+{
+	/* NB: timebase_scale() takes a uint64_t and an exponent of 30
+	 * would already represent a period of ~3 minutes so there's
+	 * really no need to consider higher exponents.
+	 */
+	for (int i = 0; i < 30; i++) {
+		uint64_t oa_period = timebase_scale(2 << i);
+
+		if (oa_period > period)
+			return max(0, i - 1);
+	}
+
+	igt_assert(!"reached");
+	return -1;
+}
+
+static bool
+oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
+{
+	if (report_reason(report) & OAREPORT_REASON_TIMER)
+		return true;
+
+	return false;
+}
+
+static bool
+init_sys_info(void)
+{
+	igt_assert_neq(devid, 0);
+
+	intel_xe_perf = intel_xe_perf_for_fd(drm_fd, 0);
+	igt_require(intel_xe_perf);
+
+	igt_debug("n_eu_slices: %"PRIu64"\n", intel_xe_perf->devinfo.n_eu_slices);
+	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_xe_perf->devinfo.n_eu_sub_slices);
+	igt_debug("n_eus: %"PRIu64"\n", intel_xe_perf->devinfo.n_eus);
+	igt_debug("timestamp_frequency = %"PRIu64"\n",
+		  intel_xe_perf->devinfo.timestamp_frequency);
+	igt_assert_neq(intel_xe_perf->devinfo.timestamp_frequency, 0);
+
+	intel_xe_perf_load_perf_configs(intel_xe_perf, drm_fd);
+
+	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
+
+	return true;
+}
+
+static void
+read_2_oa_reports(int format_id,
+		  int exponent,
+		  uint32_t *oa_report0,
+		  uint32_t *oa_report1,
+		  bool timer_only)
+{
+	size_t format_size = get_oa_format(format_id).size;
+	uint32_t exponent_mask = (1 << (exponent + 1)) - 1;
+
+	/* Note: we allocate a large buffer so that each read() iteration
+	 * should scrape *all* pending records.
+	 *
+	 * The largest buffer the OA unit supports is 16MB.
+	 *
+	 * Being sure we are fetching all buffered reports allows us to
+	 * potentially throw away / skip all reports whenever we see
+	 * a _REPORT_LOST notification as a way of being sure are
+	 * measurements aren't skewed by a lost report.
+	 *
+	 * Note: that is is useful for some tests but also not something
+	 * applications would be expected to resort to. Lost reports are
+	 * somewhat unpredictable but typically don't pose a problem - except
+	 * to indicate that the OA unit may be over taxed if lots of reports
+	 * are being lost.
+	 */
+	int max_reports = MAX_OA_BUF_SIZE / format_size;
+	int buf_size = format_size * max_reports * 1.5;
+	uint8_t *buf = malloc(buf_size);
+	int n = 0;
+
+	for (int i = 0; i < 1000; i++) {
+		u32 oa_status = 0;
+		ssize_t len;
+
+		while ((len = read(stream_fd, buf, buf_size)) < 0 && errno == EINTR)
+			;
+		if (errno == EIO) {
+			oa_status = get_stream_status(stream_fd);
+			igt_debug("oa_status %#x\n", oa_status);
+			continue;
+		}
+
+		igt_assert(len > 0);
+		igt_debug("read %d bytes\n", (int)len);
+
+		/* Need at least 2 reports */
+		if (len < 2 * format_size)
+			continue;
+
+		for (size_t offset = 0; offset < len; offset += format_size) {
+			const uint32_t *report = (void *)(buf + offset);
+
+			/* Currently the only test that should ever expect to
+			 * see a _BUFFER_LOST error is the buffer_fill test,
+			 * otherwise something bad has probably happened...
+			 */
+			igt_assert(!(oa_status & DRM_XE_OASTATUS_BUFFER_OVERFLOW));
+
+			/* At high sampling frequencies the OA HW might not be
+			 * able to cope with all write requests and will notify
+			 * us that a report was lost. We restart our read of
+			 * two sequential reports due to the timeline blip this
+			 * implies
+			 */
+			if (oa_status & DRM_XE_OASTATUS_REPORT_LOST) {
+				igt_debug("read restart: OA trigger collision / report lost\n");
+				n = 0;
+
+				/* XXX: break, because we don't know where
+				 * within the series of already read reports
+				 * there could be a blip from the lost report.
+				 */
+				break;
+			}
+
+			dump_report(report, format_size / 4, "oa-formats");
+
+			igt_debug("read report: reason = %x, timestamp = %"PRIx64", exponent mask=%x\n",
+				  report[0], oa_timestamp(report, format_id), exponent_mask);
+
+			/* Don't expect zero for timestamps */
+			igt_assert_neq_u64(oa_timestamp(report, format_id), 0);
+
+			if (timer_only) {
+				if (!oa_report_is_periodic(exponent, report)) {
+					igt_debug("skipping non timer report\n");
+					continue;
+				}
+			}
+
+			if (n++ == 0)
+				memcpy(oa_report0, report, format_size);
+			else {
+				memcpy(oa_report1, report, format_size);
+				free(buf);
+				return;
+			}
+		}
+	}
+
+	free(buf);
+
+	igt_assert(!"reached");
+}
+
+static unsigned read_xe_module_ref(void)
+{
+	FILE *fp = fopen("/proc/modules", "r");
+	char *line = NULL;
+	size_t line_buf_size = 0;
+	int len = 0;
+	unsigned ref_count;
+	char mod[8];
+	int modn = 3;
+
+	igt_assert(fp);
+
+	strcpy(mod, "xe ");
+	while ((len = getline(&line, &line_buf_size, fp)) > 0) {
+		if (strncmp(line, mod, modn) == 0) {
+			unsigned long mem;
+			int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
+			igt_assert(ret == 2);
+			goto done;
+		}
+	}
+
+	igt_assert(!"reached");
+
+done:
+	free(line);
+	fclose(fp);
+	return ref_count;
+}
+
+/**
+ * SUBTEST: xe-ref-count
+ * Description: Check that an open oa stream holds a reference on the xe module
+ */
+static void
+test_xe_ref_count(void)
+{
+	uint64_t properties[] = {
+		DRM_XE_OA_PROPERTY_OA_UNIT_ID, 0,
+
+		/* Include OA reports in samples */
+		DRM_XE_OA_PROPERTY_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_XE_OA_PROPERTY_OA_METRIC_SET, 0 /* updated below */,
+		DRM_XE_OA_PROPERTY_OA_FORMAT, __ff(0), /* update below */
+		DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, 0, /* update below */
+	};
+	struct intel_xe_oa_open_prop param = {
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	unsigned baseline, ref_count0, ref_count1;
+	uint32_t oa_report0[64];
+	uint32_t oa_report1[64];
+
+	/* This should be the first test before the first fixture so no drm_fd
+	 * should have been opened so far...
+	 */
+	igt_assert_eq(drm_fd, -1);
+
+	baseline = read_xe_module_ref();
+	igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
+
+	drm_fd = __drm_open_driver(DRIVER_XE);
+	if (is_xe_device(drm_fd))
+		xe_device_get(drm_fd);
+	devid = intel_get_drm_devid(drm_fd);
+	sysfs = igt_sysfs_open(drm_fd);
+
+	/* Note: these global variables are only initialized after calling
+	 * init_sys_info()...
+	 */
+	igt_require(init_sys_info());
+	properties[5] = default_test_set->perf_oa_metrics_set;
+	properties[7] = __ff(default_test_set->perf_oa_format);
+	properties[9] = oa_exp_1_millisec;
+
+	ref_count0 = read_xe_module_ref();
+	igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
+
+	stream_fd = __perf_open(drm_fd, &param, false);
+        set_fd_flags(stream_fd, O_CLOEXEC);
+	ref_count1 = read_xe_module_ref();
+	igt_debug("ref count after opening oa stream = %u\n", ref_count1);
+
+	drm_close_driver(drm_fd);
+	close(sysfs);
+	drm_fd = -1;
+	sysfs = -1;
+	ref_count0 = read_xe_module_ref();
+	igt_debug("ref count after closing drm fd = %u\n", ref_count0);
+
+	read_2_oa_reports(default_test_set->perf_oa_format,
+			  oa_exp_1_millisec,
+			  oa_report0,
+			  oa_report1,
+			  false); /* not just timer reports */
+
+	__perf_close(stream_fd);
+	ref_count0 = read_xe_module_ref();
+	igt_debug("ref count after closing oa stream fd = %u\n", ref_count0);
+}
+
+/**
+ * SUBTEST: sysctl-defaults
+ * Description: Test that observation_paranoid sysctl exists
+ */
+static void
+test_sysctl_defaults(void)
+{
+	int paranoid = read_u64_file("/proc/sys/dev/xe/observation_paranoid");
+
+	igt_assert_eq(paranoid, 1);
+}
+
+igt_main
+{
+	igt_fixture {
+		struct stat sb;
+
+		/*
+		 * Prior tests may have unloaded the module or failed while
+		 * loading/unloading the module. Load xe here before we
+		 * stat the files.
+		 */
+		drm_load_module(DRIVER_XE);
+		srandom(time(NULL));
+		igt_require(!stat("/proc/sys/dev/xe/observation_paranoid", &sb));
+
+		/* Currently only run on Xe2+ */
+		igt_require(intel_graphics_ver(devid) >= IP_VER(20, 0));
+	}
+
+	igt_subtest("xe-ref-count")
+		test_xe_ref_count();
+
+	igt_subtest("sysctl-defaults")
+		test_sysctl_defaults();
+
+	igt_fixture {
+		/* leave sysctl options in their default state... */
+		write_u64_file("/proc/sys/dev/xe/observation_paranoid", 1);
+
+		if (intel_xe_perf)
+			intel_xe_perf_free(intel_xe_perf);
+
+		drm_close_driver(drm_fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 5a7db3923e..357db27230 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -301,6 +301,7 @@ intel_xe_progs = [
 	'xe_mmap',
 	'xe_module_load',
 	'xe_noexec_ping_pong',
+	'xe_oa',
 	'xe_pat',
 	'xe_peer2peer',
 	'xe_pm',
@@ -367,6 +368,7 @@ extra_dependencies = {
 	'perf': [ lib_igt_i915_perf ],
 	'perf_pmu':  [ lib_igt_perf ],
 	'sw_sync': [ libatomic ],
+	'xe_oa': [ lib_igt_xe_oa ],
 }
 
 test_executables = []
-- 
2.41.0



More information about the igt-dev mailing list