[Intel-gfx] [PATCH i-g-t 12/29] igt/perf: avoid assumptions about oa exponent <-> freq mappings

Lionel Landwerlin lionel.g.landwerlin at intel.com
Tue Apr 25 22:32:44 UTC 2017


From: Robert Bragg <robert at sixbynine.org>

Signed-off-by: Robert Bragg <robert at sixbynine.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 tests/perf.c | 135 +++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 84 insertions(+), 51 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 15f41246..d47e45c8 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -249,6 +249,7 @@ static uint64_t gt_max_freq_mhz = 0;
 static uint64_t timestamp_frequency = 12500000;
 static enum drm_i915_oa_format test_oa_format;
 static bool *undefined_a_counters;
+static uint64_t oa_exp_1_millisec;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(uint32_t *report,
@@ -424,11 +425,11 @@ timebase_scale(uint32_t u32_delta)
 	return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency;
 }
 
-/* Return the largest OA exponent that will still result in a sampling
- * frequency higher than the given frequency.
+/* Returns: the largest OA exponent that will still result in a sampling period
+ * less than or equal to the given @period.
  */
 static int
-max_oa_exponent_for_higher_freq(uint64_t freq)
+max_oa_exponent_for_period_lte(uint64_t period)
 {
 	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
 	 * would already represent a period of ~3 minutes so there's
@@ -436,9 +437,8 @@ max_oa_exponent_for_higher_freq(uint64_t freq)
 	 */
 	for (int i = 0; i < 30; i++) {
 		uint64_t oa_period = timebase_scale(2 << i);
-		uint32_t oa_freq = NSEC_PER_SEC / oa_period;
 
-		if (oa_freq <= freq)
+		if (oa_period > period)
 			return max(0, i - 1);
 	}
 
@@ -446,6 +446,25 @@ max_oa_exponent_for_higher_freq(uint64_t freq)
 	return -1;
 }
 
+/* Return: the largest OA exponent that will still result in a sampling
+ * frequency greater than the given @frequency.
+ */
+static int
+max_oa_exponent_for_freq_gt(uint64_t frequency)
+{
+	uint64_t period = NSEC_PER_SEC / frequency;
+
+	igt_assert_neq(period, 0);
+
+	return max_oa_exponent_for_period_lte(period - 1);
+}
+
+static uint64_t
+oa_exponent_to_ns(int exponent)
+{
+       return 1000000000ULL * (2ULL << exponent) / timestamp_frequency;
+}
+
 static uint64_t
 gen8_read_40bit_a_counter(uint32_t *report, enum drm_i915_oa_format fmt, int a_id)
 {
@@ -524,6 +543,8 @@ init_sys_info(void)
 		  test_set_name,
 		  test_set_uuid);
 
+	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
+
 	snprintf(buf, sizeof(buf),
 		 "/sys/class/drm/card%d/metrics/%s/id",
 		 card,
@@ -593,7 +614,7 @@ test_system_wide_paranoid(void)
 			/* OA unit configuration */
 			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
 			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-			DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 		};
 		struct drm_i915_perf_open_param param = {
 			.flags = I915_PERF_FLAG_FD_CLOEXEC |
@@ -619,7 +640,7 @@ test_system_wide_paranoid(void)
 			/* OA unit configuration */
 			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
 			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-			DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 		};
 		struct drm_i915_perf_open_param param = {
 			.flags = I915_PERF_FLAG_FD_CLOEXEC |
@@ -653,7 +674,7 @@ test_invalid_open_flags(void)
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-		DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 	};
 	struct drm_i915_perf_open_param param = {
 		.flags = ~0, /* Undefined flag bits set! */
@@ -673,7 +694,7 @@ test_invalid_oa_metric_set_id(void)
 
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-		DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 		DRM_I915_PERF_PROP_OA_METRICS_SET, UINT64_MAX,
 	};
 	struct drm_i915_perf_open_param param = {
@@ -708,7 +729,7 @@ test_invalid_oa_format_id(void)
 
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
-		DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 		DRM_I915_PERF_PROP_OA_FORMAT, UINT64_MAX,
 	};
 	struct drm_i915_perf_open_param param = {
@@ -742,7 +763,7 @@ test_missing_sample_flags(void)
 
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
-		DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
 	};
 	struct drm_i915_perf_open_param param = {
@@ -982,8 +1003,6 @@ print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
 static void
 test_oa_formats(void)
 {
-	int oa_exponent = 13;
-
 	for (int i = 0; i < ARRAY_SIZE(oa_formats); i++) {
 		uint32_t oa_report0[64];
 		uint32_t oa_report1[64];
@@ -1013,7 +1032,7 @@ test_oa_formats(void)
 		igt_debug("Checking OA format %s\n", oa_formats[i].name);
 
 		open_and_read_2_oa_reports(i,
-					   oa_exponent,
+					   oa_exp_1_millisec,
 					   oa_report0,
 					   oa_report1,
 					   false); /* timer reports only */
@@ -1252,7 +1271,7 @@ static void
 test_low_oa_exponent_permissions(void)
 {
 	int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
-	int bad_exponent = max_oa_exponent_for_higher_freq(max_freq);
+	int bad_exponent = max_oa_exponent_for_freq_gt(max_freq);
 	int ok_exponent = bad_exponent + 1;
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
@@ -1326,7 +1345,7 @@ test_per_context_mode_unprivileged(void)
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-		DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 	};
 	struct drm_i915_perf_open_param param = {
 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
@@ -1399,13 +1418,14 @@ get_time(void)
 static void
 test_blocking(void)
 {
-	/* 40 milliseconds
+	/* ~40 milliseconds
 	 *
 	 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
 	 * scheduling (liable to kick in when we make blocking poll()s/reads)
 	 * from interfering with the test.
 	 */
-	int oa_exponent = 18;
+	int oa_exponent = max_oa_exponent_for_period_lte(40000000);
+	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -1428,8 +1448,7 @@ test_blocking(void)
 	int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
 	int64_t test_duration_ns = tick_ns * 1000;
 
-	/* Based on the 40ms OA sampling period set above: max OA samples: */
-	int max_iterations = (test_duration_ns / 40000000ull) + 1;
+	int max_iterations = (test_duration_ns / oa_period) + 1;
 
 	/* It's a bit tricky to put a lower limit here, but we expect a
 	 * relatively low latency for seeing reports, while we don't currently
@@ -1440,7 +1459,7 @@ test_blocking(void)
 	 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
 	 * to check for data and giving some time to read().
 	 */
-	int min_iterations = (test_duration_ns / 46000000ull);
+	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
 
 	int64_t start;
 	int n = 0;
@@ -1489,7 +1508,7 @@ test_blocking(void)
 	user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
 	kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
 
-	igt_debug("%d blocking reads during test with 25Hz OA sampling\n", n);
+	igt_debug("%d blocking reads during test with ~25Hz OA sampling\n", n);
 	igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
 		  user_ns, (int)tick_ns,
 		  (int)start_times.tms_utime, (int)end_times.tms_utime);
@@ -1515,13 +1534,14 @@ test_blocking(void)
 static void
 test_polling(void)
 {
-	/* 40 milliseconds
+	/* ~40 milliseconds
 	 *
 	 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
 	 * scheduling (liable to kick in when we make blocking poll()s/reads)
 	 * from interfering with the test.
 	 */
-	int oa_exponent = 18;
+	int oa_exponent = max_oa_exponent_for_period_lte(40000000);
+	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -1545,8 +1565,7 @@ test_polling(void)
 	int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
 	int64_t test_duration_ns = tick_ns * 1000;
 
-	/* Based on the 40ms OA sampling period set above: max OA samples: */
-	int max_iterations = (test_duration_ns / 40000000ull) + 1;
+	int max_iterations = (test_duration_ns / oa_period) + 1;
 
 	/* It's a bit tricky to put a lower limit here, but we expect a
 	 * relatively low latency for seeing reports, while we don't currently
@@ -1557,7 +1576,7 @@ test_polling(void)
 	 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
 	 * to check for data and giving some time to read().
 	 */
-	int min_iterations = (test_duration_ns / 46000000ull);
+	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
 	int64_t start;
 	int n = 0;
 
@@ -1636,7 +1655,7 @@ test_polling(void)
 	user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
 	kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
 
-	igt_debug("%d blocking poll()s during test with 25Hz OA sampling\n", n);
+	igt_debug("%d blocking poll()s during test with ~25Hz OA sampling\n", n);
 	igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
 		  user_ns, (int)tick_ns,
 		  (int)start_times.tms_utime, (int)end_times.tms_utime);
@@ -1662,7 +1681,9 @@ test_polling(void)
 static void
 test_buffer_fill(void)
 {
-	int oa_exponent = 5; /* 5 micro seconds */
+	/* ~5 micro second period */
+	int oa_exponent = max_oa_exponent_for_period_lte(5000);
+	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -1680,7 +1701,12 @@ test_buffer_fill(void)
 	int stream_fd = __perf_open(drm_fd, &param);
 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
 	uint8_t *buf = malloc(buf_size);
+	size_t oa_buf_size = 16 * 1024 * 1024;
+	size_t report_size = oa_formats[test_oa_format].size;
+	int n_full_oa_reports = oa_buf_size / report_size;
+	uint64_t fill_duration = n_full_oa_reports * oa_period;
 
+	igt_assert(fill_duration < 1000000000);
 
 	for (int i = 0; i < 5; i++) {
 		struct drm_i915_perf_record_header *header;
@@ -1688,9 +1714,9 @@ test_buffer_fill(void)
 		int offset = 0;
 		int len;
 
-		/* It should take ~330 milliseconds to fill a 16MB OA buffer with a
-		 * 5 microsecond sampling period and 256 byte reports. */
-		nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
+		nanosleep(&(struct timespec){ .tv_sec = 0,
+					      .tv_nsec = fill_duration * 1.25 },
+			  NULL);
 
 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
 			;
@@ -1707,15 +1733,17 @@ test_buffer_fill(void)
 
 		igt_assert_eq(overflow_seen, true);
 
-		nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL);
+		nanosleep(&(struct timespec){ .tv_sec = 0,
+					      .tv_nsec = fill_duration / 2 },
+			  NULL);
 
 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
 			;
 
 		igt_assert_neq(len, -1);
 
-		/* expect ~ 200 records in 1 millisecond */
-		igt_assert(len > 256 * 150);
+		igt_assert(len > report_size * n_full_oa_reports * 0.45);
+		igt_assert(len < report_size * n_full_oa_reports * 0.55);
 
 		overflow_seen = false;
 		for (offset = 0; offset < len; offset += header->size) {
@@ -1736,7 +1764,9 @@ test_buffer_fill(void)
 static void
 test_enable_disable(void)
 {
-	int oa_exponent = 5; /* 5 micro seconds */
+	/* ~5 micro second period */
+	int oa_exponent = max_oa_exponent_for_period_lte(5000);
+	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -1755,20 +1785,22 @@ test_enable_disable(void)
 	int stream_fd = __perf_open(drm_fd, &param);
 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
 	uint8_t *buf = malloc(buf_size);
+	size_t oa_buf_size = 16 * 1024 * 1024;
+	size_t report_size = oa_formats[test_oa_format].size;
+	int n_full_oa_reports = oa_buf_size / report_size;
+	uint64_t fill_duration = n_full_oa_reports * oa_period;
 
 
 	for (int i = 0; i < 5; i++) {
 		int len;
 
-		/* If the stream were enabled then it would take ~330
-		 * milliseconds to fill a 16MB OA buffer with a 5 microsecond
-		 * sampling period and 256 byte reports.
-		 *
-		 * Giving enough time for an overflow might help catch whether
+		/* Giving enough time for an overflow might help catch whether
 		 * the OA unit has been enabled even if the driver might at
 		 * least avoid copying reports while disabled.
 		 */
-		nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
+		nanosleep(&(struct timespec){ .tv_sec = 0,
+					      .tv_nsec = fill_duration * 1.25 },
+			  NULL);
 
 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
 			;
@@ -1778,15 +1810,17 @@ test_enable_disable(void)
 
 		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
 
-		nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL);
+		nanosleep(&(struct timespec){ .tv_sec = 0,
+					      .tv_nsec = fill_duration / 2 },
+			  NULL);
 
 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
 			;
 
 		igt_assert_neq(len, -1);
 
-		/* expect ~ 200 records in 1 millisecond */
-		igt_assert(len > 256 * 150 && len < 256 * 2000);
+		igt_assert(len > report_size * n_full_oa_reports * 0.45);
+		igt_assert(len < report_size * n_full_oa_reports * 0.55);
 
 		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
 
@@ -1807,7 +1841,7 @@ test_enable_disable(void)
 static void
 test_short_reads(void)
 {
-	int oa_exponent = 5; /* 5 micro seconds */
+	int oa_exponent = max_oa_exponent_for_period_lte(5000);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -2350,7 +2384,6 @@ test_per_ctx_mi_rpc(void)
 static void
 test_rc6_disable(void)
 {
-	int oa_exponent = 13; /* 1 millisecond */
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -2358,7 +2391,7 @@ test_rc6_disable(void)
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
-		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
 	};
 	struct drm_i915_perf_open_param param = {
 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
@@ -2424,7 +2457,6 @@ done:
 static void
 test_i915_ref_count(void)
 {
-	int oa_exponent = 13; /* 1 millisecond */
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -2432,7 +2464,7 @@ test_i915_ref_count(void)
 		/* OA unit configuration */
 		DRM_I915_PERF_PROP_OA_METRICS_SET, 0 /* updated below */,
 		DRM_I915_PERF_PROP_OA_FORMAT, 0, /* update below */
-		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+		DRM_I915_PERF_PROP_OA_EXPONENT, 0, /* update below */
 	};
 	struct drm_i915_perf_open_param param = {
 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
@@ -2462,6 +2494,7 @@ test_i915_ref_count(void)
 	igt_require(init_sys_info());
 	properties[3] = test_metric_set_id;
 	properties[5] = test_oa_format;
+	properties[7] = oa_exp_1_millisec;
 
 	ref_count0 = read_i915_module_ref();
 	igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
@@ -2481,7 +2514,7 @@ test_i915_ref_count(void)
 
 	read_2_oa_reports(stream_fd,
 			  test_oa_format,
-			  oa_exponent,
+			  oa_exp_1_millisec,
 			  oa_report0,
 			  oa_report1,
 			  false); /* not just timer reports */
-- 
2.11.0



More information about the Intel-gfx mailing list