[PATCH i-g-t 04/14] tests/intel/xe_oa: Add "oa-formats" subtest

Ashutosh Dixit ashutosh.dixit at intel.com
Mon Jul 1 02:52:59 UTC 2024


Add "oa-formats" subtest.

v2: Remove undefined_a_counters (Umesh)

Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
 tests/intel/xe_oa.c | 465 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 465 insertions(+)

diff --git a/tests/intel/xe_oa.c b/tests/intel/xe_oa.c
index 9a59350b16..1d8c6eb599 100644
--- a/tests/intel/xe_oa.c
+++ b/tests/intel/xe_oa.c
@@ -478,6 +478,57 @@ write_u64_file(const char *path, uint64_t val)
 	fclose(f);
 }
 
+static uint64_t
+read_report_ticks(const uint32_t *report, enum intel_xe_oa_format_name format)
+{
+
+	struct oa_format fmt = get_oa_format(format);
+
+	return fmt.report_hdr_64bit ? *(uint64_t *)&report[6] : report[3];
+}
+
+/*
+ * t0 is a value sampled before t1. width is number of bits used to represent
+ * t0/t1. Normally t1 is greater than t0. In cases where t1 < t0 use this
+ * helper. Since the size of t1/t0 is already 64 bits, no special handling is
+ * needed for width = 64.
+ */
+static uint64_t
+elapsed_delta(uint64_t t1, uint64_t t0, uint32_t width)
+{
+	uint32_t max_bits = sizeof(t1) * 8;
+
+	igt_assert(width <= max_bits);
+
+	if (t1 < t0 && width != max_bits)
+		return ((1ULL << width) - t0) + t1;
+
+	return t1 - t0;
+}
+
+static uint64_t
+oa_tick_delta(const uint32_t *report1,
+	      const uint32_t *report0,
+	      enum intel_xe_oa_format_name format)
+{
+	return elapsed_delta(read_report_ticks(report1, format),
+			     read_report_ticks(report0, format), 32);
+}
+
+static void
+read_report_clock_ratios(const uint32_t *report,
+			      uint32_t *slice_freq_mhz,
+			      uint32_t *unslice_freq_mhz)
+{
+	uint32_t unslice_freq = report[0] & 0x1ff;
+	uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
+	uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
+	uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
+
+	*slice_freq_mhz = (slice_freq * 16666) / 1000;
+	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
+}
+
 static uint32_t
 report_reason(const uint32_t *report)
 {
@@ -485,6 +536,27 @@ report_reason(const uint32_t *report)
 		OAREPORT_REASON_MASK);
 }
 
+static const char *
+read_report_reason(const uint32_t *report)
+{
+	uint32_t reason = report_reason(report);
+
+	if (reason & (1<<0))
+		return "timer";
+	else if (reason & (1<<1))
+	      return "internal trigger 1";
+	else if (reason & (1<<2))
+	      return "internal trigger 2";
+	else if (reason & (1<<3))
+	      return "context switch";
+	else if (reason & (1<<4))
+	      return "GO 1->0 transition (enter RC6)";
+	else if (reason & (1<<5))
+		return "[un]slice clock ratio change";
+	else
+		return "unknown";
+}
+
 static uint64_t
 oa_timestamp(const uint32_t *report, enum intel_xe_oa_format_name format)
 {
@@ -493,6 +565,17 @@ oa_timestamp(const uint32_t *report, enum intel_xe_oa_format_name format)
 	return fmt.report_hdr_64bit ? *(uint64_t *)&report[2] : report[1];
 }
 
+static uint64_t
+oa_timestamp_delta(const uint32_t *report1,
+		   const uint32_t *report0,
+		   enum intel_xe_oa_format_name format)
+{
+	uint32_t width = intel_graphics_ver(devid) >= IP_VER(12, 55) ? 56 : 32;
+
+	return elapsed_delta(oa_timestamp(report1, format),
+			     oa_timestamp(report0, format), width);
+}
+
 static uint64_t
 timebase_scale(uint64_t delta)
 {
@@ -529,6 +612,182 @@ oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
 	return false;
 }
 
+static uint64_t
+read_40bit_a_counter(const uint32_t *report,
+			  enum intel_xe_oa_format_name fmt, int a_id)
+{
+	struct oa_format format = get_oa_format(fmt);
+	uint8_t *a40_high = (((uint8_t *)report) + format.a40_high_off);
+	uint32_t *a40_low = (uint32_t *)(((uint8_t *)report) +
+					 format.a40_low_off);
+	uint64_t high = (uint64_t)(a40_high[a_id]) << 32;
+
+	return a40_low[a_id] | high;
+}
+
+static uint64_t
+xehpsdv_read_64bit_a_counter(const uint32_t *report, enum intel_xe_oa_format_name fmt, int a_id)
+{
+	struct oa_format format = get_oa_format(fmt);
+	uint64_t *a64 = (uint64_t *)(((uint8_t *)report) + format.a64_off);
+
+	return a64[a_id];
+}
+
+static uint64_t
+get_40bit_a_delta(uint64_t value0, uint64_t value1)
+{
+	if (value0 > value1)
+		return (1ULL << 40) + value1 - value0;
+	else
+		return value1 - value0;
+}
+
+static void
+accumulate_uint64(int a_index,
+		  const uint32_t *report0,
+		  const uint32_t *report1,
+		  enum intel_xe_oa_format_name format,
+		  uint64_t *delta)
+{
+	uint64_t value0 = xehpsdv_read_64bit_a_counter(report0, format, a_index),
+		 value1 = xehpsdv_read_64bit_a_counter(report1, format, a_index);
+
+	*delta += (value1 - value0);
+}
+
+/* The TestOa metric set is designed so */
+static void
+sanity_check_reports(const uint32_t *oa_report0, const uint32_t *oa_report1,
+		     enum intel_xe_oa_format_name fmt)
+{
+	struct oa_format format = get_oa_format(fmt);
+	uint64_t time_delta = timebase_scale(oa_timestamp_delta(oa_report1,
+								oa_report0,
+								fmt));
+	uint64_t clock_delta = oa_tick_delta(oa_report1, oa_report0, fmt);
+	uint64_t max_delta;
+	uint64_t freq;
+	uint32_t *rpt0_b = (uint32_t *)(((uint8_t *)oa_report0) +
+					format.b_off);
+	uint32_t *rpt1_b = (uint32_t *)(((uint8_t *)oa_report1) +
+					format.b_off);
+	uint32_t b;
+	uint32_t ref;
+
+	igt_debug("report type: %s->%s\n",
+		  read_report_reason(oa_report0),
+		  read_report_reason(oa_report1));
+
+	freq = time_delta ? (clock_delta * 1000) / time_delta : 0;
+	igt_debug("freq = %"PRIu64"\n", freq);
+
+	igt_debug("clock delta = %"PRIu64"\n", clock_delta);
+
+	max_delta = clock_delta * intel_xe_perf->devinfo.n_eus;
+
+	/* Gen8+ has some 40bit A counters... */
+	for (int j = format.first_a40; j < format.n_a40 + format.first_a40; j++) {
+		uint64_t value0 = read_40bit_a_counter(oa_report0, fmt, j);
+		uint64_t value1 = read_40bit_a_counter(oa_report1, fmt, j);
+		uint64_t delta = get_40bit_a_delta(value0, value1);
+
+		igt_debug("A40_%d: delta = %"PRIu64"\n", j, delta);
+		igt_assert_f(delta <= max_delta,
+			     "A40_%d: delta = %"PRIu64", max_delta = %"PRIu64"\n",
+			     j, delta, max_delta);
+	}
+
+	for (int j = 0; j < format.n_a64; j++) {
+		uint64_t delta = 0;
+
+		accumulate_uint64(j, oa_report0, oa_report1, fmt, &delta);
+
+		igt_debug("A64_%d: delta = %"PRIu64"\n", format.first_a + j, delta);
+		igt_assert_f(delta <= max_delta,
+			     "A64_%d: delta = %"PRIu64", max_delta = %"PRIu64"\n",
+			     format.first_a + j, delta, max_delta);
+	}
+
+	for (int j = 0; j < format.n_a; j++) {
+		uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
+					    format.a_off);
+		uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
+					    format.a_off);
+		int a_id = format.first_a + j;
+		uint32_t delta = a1[j] - a0[j];
+
+		igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta);
+		igt_assert_f(delta <= max_delta,
+			     "A%d: delta = %"PRIu32", max_delta = %"PRIu64"\n",
+			     a_id, delta, max_delta);
+	}
+
+	/* The TestOa metric set defines all B counters to be a
+	 * multiple of the gpu clock
+	 */
+	if (format.n_b && (format.oa_type == DRM_XE_OA_FMT_TYPE_OAG || format.oa_type == DRM_XE_OA_FMT_TYPE_OAR)) {
+		if (clock_delta > 0) {
+			b = rpt1_b[0] - rpt0_b[0];
+			igt_debug("B0: delta = %"PRIu32"\n", b);
+			igt_assert_eq(b, 0);
+
+			b = rpt1_b[1] - rpt0_b[1];
+			igt_debug("B1: delta = %"PRIu32"\n", b);
+			igt_assert_eq(b, clock_delta);
+
+			b = rpt1_b[2] - rpt0_b[2];
+			igt_debug("B2: delta = %"PRIu32"\n", b);
+			igt_assert_eq(b, clock_delta);
+
+			b = rpt1_b[3] - rpt0_b[3];
+			ref = clock_delta / 2;
+			igt_debug("B3: delta = %"PRIu32"\n", b);
+			igt_assert(b >= ref - 1 && b <= ref + 1);
+
+			b = rpt1_b[4] - rpt0_b[4];
+			ref = clock_delta / 3;
+			igt_debug("B4: delta = %"PRIu32"\n", b);
+			igt_assert(b >= ref - 1 && b <= ref + 1);
+
+			b = rpt1_b[5] - rpt0_b[5];
+			ref = clock_delta / 3;
+			igt_debug("B5: delta = %"PRIu32"\n", b);
+			igt_assert(b >= ref - 1 && b <= ref + 1);
+
+			b = rpt1_b[6] - rpt0_b[6];
+			ref = clock_delta / 6;
+			igt_debug("B6: delta = %"PRIu32"\n", b);
+			igt_assert(b >= ref - 1 && b <= ref + 1);
+
+			b = rpt1_b[7] - rpt0_b[7];
+			ref = clock_delta * 2 / 3;
+			igt_debug("B7: delta = %"PRIu32"\n", b);
+			igt_assert(b >= ref - 1 && b <= ref + 1);
+		} else {
+			for (int j = 0; j < format.n_b; j++) {
+				b = rpt1_b[j] - rpt0_b[j];
+				igt_debug("B%i: delta = %"PRIu32"\n", j, b);
+				igt_assert_eq(b, 0);
+			}
+		}
+	}
+
+	for (int j = 0; j < format.n_c; j++) {
+		uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
+					    format.c_off);
+		uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
+					    format.c_off);
+		uint32_t delta = c1[j] - c0[j];
+
+		igt_debug("C%d: delta = %"PRIu32", max_delta=%"PRIu64"\n",
+			  j, delta, max_delta);
+		igt_assert_f(delta <= max_delta,
+			     "C%d: delta = %"PRIu32", max_delta = %"PRIu64"\n",
+			     j, delta, max_delta);
+	}
+}
+
 static bool
 init_sys_info(void)
 {
@@ -817,6 +1076,191 @@ read_2_oa_reports(int format_id,
 	igt_assert(!"reached");
 }
 
+static void
+open_and_read_2_oa_reports(int format_id,
+			   int exponent,
+			   uint32_t *oa_report0,
+			   uint32_t *oa_report1,
+			   bool timer_only,
+			   const struct drm_xe_engine_class_instance *hwe)
+{
+	struct intel_xe_perf_metric_set *test_set = metric_set(hwe);
+	uint64_t properties[] = {
+		DRM_XE_OA_PROPERTY_OA_UNIT_ID, 0,
+
+		/* Include OA reports in samples */
+		DRM_XE_OA_PROPERTY_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_XE_OA_PROPERTY_OA_METRIC_SET, test_set->perf_oa_metrics_set,
+		DRM_XE_OA_PROPERTY_OA_FORMAT, __ff(format_id),
+		DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, exponent,
+		DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, hwe->engine_instance,
+
+	};
+	struct intel_xe_oa_open_prop param = {
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+
+	stream_fd = __perf_open(drm_fd, &param, false);
+	set_fd_flags(stream_fd, O_CLOEXEC);
+
+	read_2_oa_reports(format_id, exponent,
+			  oa_report0, oa_report1, timer_only);
+
+	__perf_close(stream_fd);
+}
+
+static void
+print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
+{
+	struct oa_format format = get_oa_format(fmt);
+	uint64_t ts0 = oa_timestamp(oa_report0, fmt);
+	uint64_t ts1 = oa_timestamp(oa_report1, fmt);
+
+	igt_debug("TIMESTAMP: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
+		  ts0, ts1, ts1 - ts0);
+
+	{
+		uint64_t clock0 = read_report_ticks(oa_report0, fmt);
+		uint64_t clock1 = read_report_ticks(oa_report1, fmt);
+
+		igt_debug("CLOCK: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
+			  clock0, clock1, clock1 - clock0);
+	}
+
+	{
+		uint32_t slice_freq0, slice_freq1, unslice_freq0, unslice_freq1;
+		const char *reason0 = read_report_reason(oa_report0);
+		const char *reason1 = read_report_reason(oa_report1);
+
+		igt_debug("CTX ID: 1st = %"PRIu32", 2nd = %"PRIu32"\n",
+			  oa_report0[2], oa_report1[2]);
+
+		read_report_clock_ratios(oa_report0,
+					 &slice_freq0, &unslice_freq0);
+		read_report_clock_ratios(oa_report1,
+					 &slice_freq1, &unslice_freq1);
+
+		igt_debug("SLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
+			  slice_freq0, slice_freq1,
+			  ((int)slice_freq1 - (int)slice_freq0));
+		igt_debug("UNSLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
+			  unslice_freq0, unslice_freq1,
+			  ((int)unslice_freq1 - (int)unslice_freq0));
+
+		igt_debug("REASONS: 1st = \"%s\", 2nd = \"%s\"\n", reason0, reason1);
+	}
+
+	/* Gen8+ has some 40bit A counters... */
+	for (int j = 0; j < format.n_a40; j++) {
+		uint64_t value0 = read_40bit_a_counter(oa_report0, fmt, j);
+		uint64_t value1 = read_40bit_a_counter(oa_report1, fmt, j);
+		uint64_t delta = get_40bit_a_delta(value0, value1);
+
+		igt_debug("A%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
+			  j, value0, value1, delta);
+	}
+
+	for (int j = 0; j < format.n_a64; j++) {
+		uint64_t value0 = xehpsdv_read_64bit_a_counter(oa_report0, fmt, j);
+		uint64_t value1 = xehpsdv_read_64bit_a_counter(oa_report1, fmt, j);
+		uint64_t delta = value1 - value0;
+
+		igt_debug("A_64%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
+			  format.first_a + j, value0, value1, delta);
+	}
+
+	for (int j = 0; j < format.n_a; j++) {
+		uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
+					    format.a_off);
+		uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
+					    format.a_off);
+		int a_id = format.first_a + j;
+		uint32_t delta = a1[j] - a0[j];
+
+		igt_debug("A%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
+			  a_id, a0[j], a1[j], delta);
+	}
+
+	for (int j = 0; j < format.n_b; j++) {
+		uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) +
+					    format.b_off);
+		uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) +
+					    format.b_off);
+		uint32_t delta = b1[j] - b0[j];
+
+		igt_debug("B%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
+			  j, b0[j], b1[j], delta);
+	}
+
+	for (int j = 0; j < format.n_c; j++) {
+		uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
+					    format.c_off);
+		uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
+					    format.c_off);
+		uint32_t delta = c1[j] - c0[j];
+
+		igt_debug("C%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
+			  j, c0[j], c1[j], delta);
+	}
+}
+
+static bool
+hwe_supports_oa_type(int oa_type, const struct drm_xe_engine_class_instance *hwe)
+{
+	switch (oa_type) {
+	case DRM_XE_OA_FMT_TYPE_OAM:
+	case DRM_XE_OA_FMT_TYPE_OAM_MPEC:
+		return hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE ||
+		       hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
+	case DRM_XE_OA_FMT_TYPE_OAG:
+	case DRM_XE_OA_FMT_TYPE_OAR:
+		return hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER;
+	case DRM_XE_OA_FMT_TYPE_OAC:
+		return hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE;
+	case DRM_XE_OA_FMT_TYPE_PEC:
+		return hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER ||
+		       hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE;
+	default:
+		return false;
+	}
+
+}
+
+/**
+ * SUBTEST: oa-formats
+ * Description: Test that supported OA formats work as expected
+ */
+static void test_oa_formats(const struct drm_xe_engine_class_instance *hwe)
+{
+	for (int i = 0; i < XE_OA_FORMAT_MAX; i++) {
+		struct oa_format format = get_oa_format(i);
+		uint32_t oa_report0[format.size / 4];
+		uint32_t oa_report1[format.size / 4];
+
+		if (!format.name) /* sparse, indexed by ID */
+			continue;
+
+		if (!hwe_supports_oa_type(format.oa_type, hwe))
+			continue;
+
+		igt_debug("Checking OA format %s\n", format.name);
+
+		open_and_read_2_oa_reports(i,
+					   oa_exp_1_millisec,
+					   oa_report0,
+					   oa_report1,
+					   false, /* timer reports only */
+					   hwe);
+
+		print_reports(oa_report0, oa_report1, i);
+		sanity_check_reports(oa_report0, oa_report1, i);
+	}
+}
+
+
 static unsigned read_xe_module_ref(void)
 {
 	FILE *fp = fopen("/proc/modules", "r");
@@ -933,8 +1377,25 @@ test_sysctl_defaults(void)
 	igt_assert_eq(paranoid, 1);
 }
 
+#define __for_one_render_engine_0(hwe) \
+	xe_for_each_engine(drm_fd, hwe) \
+		if (hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) \
+			break; \
+	for_each_if(hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) \
+		igt_dynamic_f("rcs-%d", hwe->engine_instance)
+
+#define __for_one_render_engine(hwe)	      \
+	for (int m = 0, done = 0; !done; m++) \
+		for_each_if(m < xe_number_engines(drm_fd) && \
+			    (hwe = &xe_engine(drm_fd, m)->instance) && \
+			    hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER && \
+			    (done = 1)) \
+			igt_dynamic_f("rcs-%d", hwe->engine_instance)
+
 igt_main
 {
+	struct drm_xe_engine_class_instance *hwe = NULL;
+
 	igt_fixture {
 		struct stat sb;
 
@@ -985,6 +1446,10 @@ igt_main
 	igt_subtest("missing-sample-flags")
 		test_missing_sample_flags();
 
+	igt_subtest_with_dynamic("oa-formats")
+		__for_one_render_engine(hwe)
+			test_oa_formats(hwe);
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/xe/perf_stream_paranoid", 1);
-- 
2.41.0



More information about the igt-dev mailing list