[igt-dev] [PATCH i-g-t] tests/perf: make oa-exponents subtest more reliable
Matthew Auld
matthew.william.auld at gmail.com
Tue Feb 20 18:29:38 UTC 2018
On 15 February 2018 at 15:06, Lionel Landwerlin
<lionel.g.landwerlin at intel.com> wrote:
> We know the OA unit might skip some reports from time to time (reasons
> include pressure on memory controller, power management, ...). So
> rather than checking that the time between periodic reports is about
> the period we asked for, let's verify that the time is always longer
> or equal to the period we've asked for.
>
> We still have to leave some room for errors. Here is dump of an error
> in this updated test :
>
> (perf:405) DEBUG: report0019 ts=e217de20 hw_id=0x00000014 delta=64
> (perf:405) DEBUG: report0020 ts=e217de60 hw_id=0x00000014 delta=64
> (perf:405) DEBUG: report0021 ts=e217dea0 hw_id=0x00000014 delta=64
> (perf:405) DEBUG: report0022 ts=e217df66 hw_id=0x00000014 delta=198 ******
> (perf:405) DEBUG: report0023 ts=e217dfa0 hw_id=0x00000014 delta=58 ******
> (perf:405) DEBUG: report0024 ts=e217dfe0 hw_id=0x00000014 delta=64
> (perf:405) DEBUG: report0025 ts=e217e020 hw_id=0x00000014 delta=64
> (perf:405) DEBUG: report0026 ts=e217e060 hw_id=0x00000014 delta=64
>
> As you can see there is a discrepency in the periodic reports. I have
> no explanation for it. This isn't a programming error since the same
> context has correct periods before and after, so it must be some kind
> of hardware glitch/corner-case that hasn't be been documented.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> tests/perf.c | 355 ++++++++++++++---------------------------------------------
> 1 file changed, 80 insertions(+), 275 deletions(-)
>
> diff --git a/tests/perf.c b/tests/perf.c
> index 15070827..fb858121 100644
> --- a/tests/perf.c
> +++ b/tests/perf.c
> @@ -1757,302 +1757,107 @@ test_oa_exponents(void)
> * test can fail due to buffer overflows if it wasn't possible to
> * keep up, so we don't start from an exponent of zero...
> */
> - for (int exponent = 5; exponent < 18; exponent++) {
> - uint64_t expected_timestamp_delta;
> - uint32_t time_delta;
> - int n_tested = 0;
> - int n_time_delta_matches = 0;
> -
> - /* The exponent is effectively selecting a bit in the timestamp
> - * to trigger reports on and so in practice we expect the raw
> - * timestamp deltas for periodic reports to exactly match the
> - * value of next bit.
> - */
> - expected_timestamp_delta = 2UL << exponent;
> -
> - for (int j = 0; n_tested < 10 && j < 100; j++) {
> - uint64_t properties[] = {
> - /* Include OA reports in samples */
> - DRM_I915_PERF_PROP_SAMPLE_OA, true,
> + for (int exponent = 5; exponent < 20; exponent++) {
> + uint64_t properties[] = {
> + /* Include OA reports in samples */
> + DRM_I915_PERF_PROP_SAMPLE_OA, true,
>
> - /* OA unit configuration */
> - DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
> - DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
> - DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
> - };
> - struct drm_i915_perf_open_param param = {
> - .flags = I915_PERF_FLAG_FD_CLOEXEC,
> - .num_properties = ARRAY_SIZE(properties) / 2,
> - .properties_ptr = to_user_pointer(properties),
> - };
> - int ret;
> - uint64_t average_timestamp_delta;
> - uint32_t n_reports = 0;
> - uint32_t n_idle_reports = 0;
> - uint32_t n_reads = 0;
> - uint64_t first_timestamp = 0;
> - bool check_first_timestamp = true;
> + /* OA unit configuration */
> + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
> + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
> + DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
> + };
> + struct drm_i915_perf_open_param param = {
> + .flags = I915_PERF_FLAG_FD_CLOEXEC,
> + .num_properties = ARRAY_SIZE(properties) / 2,
> + .properties_ptr = to_user_pointer(properties),
> + };
> + uint64_t expected_timestamp_delta = 2ULL << exponent;
> + size_t format_size = get_oa_format(test_oa_format).size;
> + size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
> + format_size);
> + int max_reports = MAX_OA_BUF_SIZE / format_size;
> + int buf_size = sample_size * max_reports * 1.5;
> + uint8_t *buf = calloc(1, buf_size);
> + int ret, n_timer_reports = 0;
> + struct {
> + uint32_t report[64];
> + } timer_reports[30];
> +
> + igt_debug("testing OA exponent %d,"
> + " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n",
> + exponent, expected_timestamp_delta,
> + oa_exponent_to_ns(exponent),
> + oa_exponent_to_ns(exponent) / 1000.0,
> + oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
> +
> + stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
> +
> + while (n_timer_reports < ARRAY_SIZE(timer_reports)) {
> struct drm_i915_perf_record_header *header;
> - uint64_t delta_delta;
> - struct {
> - uint32_t report[64];
> - } reports[30];
> - struct {
> - uint8_t *buf;
> - size_t len;
> - } reads[1000];
> - double error;
> - double tick_per_period;
> -
> - igt_debug("ITER %d: testing OA exponent %d,"
> - " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n",
> - j, exponent,
> - expected_timestamp_delta,
> - oa_exponent_to_ns(exponent),
> - oa_exponent_to_ns(exponent) / 1000.0,
> - oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
> -
> - stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
> -
> - /* Right after opening the OA stream, read a
> - * first timestamp as way to filter previously
> - * scheduled work that would have configured
> - * the OA unit at a different period. */
> - first_timestamp = i915_get_one_gpu_timestamp();
> -
> - while (n_reads < ARRAY_SIZE(reads) &&
> - n_reports < ARRAY_SIZE(reports)) {
> - const size_t buf_size = 1024 * 1024;
> - uint8_t *buf = reads[n_reads++].buf = calloc(1, buf_size);
> -
> - while ((ret = read(stream_fd, buf, buf_size)) < 0 &&
> - errno == EINTR)
> - ;
> -
> - /* We should never have no data. */
> - igt_assert(ret > 0);
> - reads[n_reads - 1].len = ret;
> -
> - igt_debug(" > read %i bytes\n", ret);
> -
> - for (int offset = 0;
> - offset < ret && n_reports < ARRAY_SIZE(reports);
> - offset += header->size) {
> - uint32_t *report;
> - double previous_tick_per_period;
> -
> - header = (void *)(buf + offset);
> -
> - if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) {
> - igt_assert(!"reached");
> - break;
> - }
> -
> - if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
> - n_reports = 0;
> - n_idle_reports = 0;
> - for (int r = 0; r < n_reads; r++)
> - free(reads[r].buf);
> - n_reads = 0;
> - break;
> - }
> -
> - if (header->type != DRM_I915_PERF_RECORD_SAMPLE)
> - continue;
>
> - report = (void *)(header + 1);
> -
> - /* Skip anything before the first
> - * timestamp, it might not be at the
> - * right periodic exponent. */
> - if (check_first_timestamp &&
> - report[1] < first_timestamp) {
> - igt_debug(" > Dropping ts=%u (prior %"PRIu64")\n",
> - report[1], first_timestamp);
> - continue;
> - }
> -
> - /* Once we've passed the first
> - * timestamp, no need to check. */
> - check_first_timestamp = false;
> -
> - if (!oa_report_ctx_is_valid(report))
> - n_idle_reports++;
> + while ((ret = read(stream_fd, buf, buf_size)) < 0 &&
> + errno == EINTR)
> + ;
>
> - /* We only measure timestamps between
> - * periodic reports. */
> - if (!oa_report_is_periodic(exponent, report))
> - continue;
> + /* igt_debug(" > read %i bytes\n", ret); */
>
> - igt_debug(" > write %i timestamp=%u\n", n_reports, report[1]);
> - memcpy(reports[n_reports].report, report,
> - sizeof(reports[n_reports].report));
> - n_reports++;
> + /* We should never have no data. */
> + igt_assert(ret > 0);
>
> - previous_tick_per_period = tick_per_period;
> + for (int offset = 0;
> + offset < ret && n_timer_reports < ARRAY_SIZE(timer_reports);
> + offset += header->size) {
> + uint32_t *report;
>
> - if (n_reports > 1) {
> - tick_per_period =
> - oa_reports_tick_per_period(reports[n_reports - 2].report,
> - reports[n_reports - 1].report);
> + header = (void *)(buf + offset);
>
> - /* Dismiss the series of report
> - * if we notice clock frequency
> - * changes. */
> - if (!double_value_within(tick_per_period,
> - previous_tick_per_period, 5)) {
> - igt_debug("Noticed clock frequency change at ts=%u (%f / %f), "
> - "dropping reports and trying again\n",
> - report[1], previous_tick_per_period, tick_per_period);
> - n_reports = 0;
> - n_idle_reports = 0;
> - for (int r = 0; r < n_reads; r++)
> - free(reads[r].buf);
> - n_reads = 0;
> - break;
> - }
> - }
> + if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) {
> + igt_assert(!"reached");
> + break;
> }
> - }
> -
> - __perf_close(stream_fd);
> - igt_debug("closed stream\n");
>
> - igt_assert_eq(n_reports, ARRAY_SIZE(reports));
> + if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
> + igt_debug("report loss\n");
>
> - average_timestamp_delta = 0;
> - for (int i = 0; i < (n_reports - 1); i++) {
> - /* XXX: calculating with u32 arithmetic to account for overflow */
> - uint32_t u32_delta = reports[i + 1].report[1] - reports[i].report[1];
> -
> - average_timestamp_delta += u32_delta;
> - }
> - average_timestamp_delta /= (n_reports - 1);
> -
> - if (average_timestamp_delta > expected_timestamp_delta)
> - delta_delta = average_timestamp_delta - expected_timestamp_delta;
> - else
> - delta_delta = expected_timestamp_delta - average_timestamp_delta;
> - error = (delta_delta / (double)expected_timestamp_delta) * 100.0;
> -
> - time_delta = timebase_scale(average_timestamp_delta);
> -
> - igt_debug(" > Avg. time delta = %"PRIu32"(ns),"
> - " n idle reports = %u, n reads = %u, error=%f\n",
> - time_delta, n_idle_reports, n_reads, error);
> - if (error > 5) {
> - uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL;
> -
> - igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", "
> - "expected_delta = %"PRIu64", first_timestamp = %"PRIu64"\n",
> - average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp);
> - for (int i = 0; i < (n_reports - 1); i++) {
> - /* XXX: calculating with u32 arithmetic to account for overflow */
> - uint32_t u32_delta =
> - reports[i + 1].report[1] - reports[i].report[1];
> -
> - if (u32_delta > expected_timestamp_delta)
> - delta_delta = u32_delta - expected_timestamp_delta;
> - else
> - delta_delta = expected_timestamp_delta - u32_delta;
> - error = (delta_delta / (double)expected_timestamp_delta) * 100.0;
> + if (header->type != DRM_I915_PERF_RECORD_SAMPLE)
> + continue;
>
> - igt_debug(" > ts=%u-%u timestamp delta from %2d to %2d = %-8u (error = %u%%, ctx_id = %x)\n",
> - reports[i + 1].report[1], reports[i].report[1],
> - i, i + 1, u32_delta, (unsigned)error,
> - oa_report_get_ctx_id(reports[i + 1].report));
> - }
> - for (int r = 0; r < n_reads; r++) {
> - igt_debug(" > read\n");
> - for (int offset = 0;
> - offset < reads[r].len;
> - offset += header->size) {
> - /* Just a random counter,
> - * helpful to notice
> - * inconsistency in debug.
> - */
> - int counter_print = 13;
> - uint64_t a0 = 0, aN = 0;
> - double local_period = 0;
> -
> - header = (void *) &reads[r].buf[offset];
> -
> - if (header->type != DRM_I915_PERF_RECORD_SAMPLE) {
> - igt_debug(" > loss\n");
> - continue;
> - }
> -
> - rpt = (void *)(header + 1);
> -
> - if (last) {
> - a0 = gen8_read_40bit_a_counter(rpt, test_oa_format, 0) -
> - gen8_read_40bit_a_counter(last, test_oa_format, 0);
> - aN = gen8_read_40bit_a_counter(rpt, test_oa_format, counter_print) -
> - gen8_read_40bit_a_counter(last, test_oa_format, counter_print);
> - }
> -
> - if (last_periodic &&
> - intel_gen(devid) >= 8 &&
> - oa_report_is_periodic(exponent, rpt)) {
> - local_period =
> - ((uint64_t) rpt[3] - last_periodic[3]) /
> - ((uint64_t) rpt[1] - last_periodic[1]);
> - }
> + report = (void *)(header + 1);
>
> - igt_debug(" > report ts=%u"
> - " ts_delta_last=%8u ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x gpu_ticks=%u period=%.2f A0=%"PRIu64" A%i=%"PRIu64"\n",
> - rpt[1],
> - (last != NULL) ? (rpt[1] - last[1]) : 0,
> - (last_periodic != NULL) ? (rpt[1] - last_periodic[1]) : 0,
> - oa_report_is_periodic(exponent, rpt),
> - oa_report_get_ctx_id(rpt),
> - (last != NULL) ? (rpt[3] - last[3]) : 0,
> - local_period,
> - a0, counter_print, aN);
> -
> - last = rpt;
> - if (oa_report_is_periodic(exponent, rpt))
> - last_periodic = rpt;
> - }
> - }
> + if (!oa_report_is_periodic(exponent, report))
> + continue;
>
> - igt_assert(!"reached");
> + memcpy(timer_reports[n_timer_reports].report, report,
> + sizeof(timer_reports[n_timer_reports].report));
> + n_timer_reports++;
> }
> + }
>
> - if (timestamp_delta_within(average_timestamp_delta,
> - expected_timestamp_delta,
> - expected_timestamp_delta * 0.05)) {
> - igt_debug(" > timestamp delta matching %"PRIu64"ns ~= expected %"PRIu64"! :)\n",
> - timebase_scale(average_timestamp_delta),
> - timebase_scale(expected_timestamp_delta));
> - n_time_delta_matches++;
> - } else {
> - igt_debug(" > timestamp delta mismatch: %"PRIu64"ns != expected %"PRIu64"ns\n",
> - timebase_scale(average_timestamp_delta),
> - timebase_scale(expected_timestamp_delta));
> - igt_assert(average_timestamp_delta <
> - (expected_timestamp_delta * 2));
> - }
> + close(stream_fd);
> + stream_fd = -1;
__perf_close(stream_fd)?
>
> - n_tested++;
> + igt_debug("report%04i ts=%08x hw_id=0x%08x\n", 0,
> + timer_reports[0].report[1],
> + oa_report_get_ctx_id(timer_reports[0].report));
> + uint32_t matches = 0;
> + for (int i = 1; i < n_timer_reports; i++) {
> + uint32_t delta =
> + timer_reports[i].report[1] - timer_reports[i - 1].report[1];
>
> - for (int r = 0; r < n_reads; r++)
> - free(reads[r].buf);
> - }
> + igt_debug("report%04i ts=%08x hw_id=0x%08x delta=%u %s\n", i,
> + timer_reports[i].report[1],
> + oa_report_get_ctx_id(timer_reports[i].report),
> + delta, delta == expected_timestamp_delta ? "" : "******");
>
> - if (n_tested < 10)
> - igt_debug("Too many test iterations had to be skipped\n");
> - igt_assert_eq(n_tested, 10);
> + matches += expected_timestamp_delta >=
> + (timer_reports[i].report[1] - timer_reports[i - 1].report[1]);
expected_timestamp_delta >= delta;
I'm getting unused warnings for i915_get_one_gpu_timestamp() and
timestamp_delta_within()...
Anyway, if it improves the situation:
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
More information about the igt-dev
mailing list