[Intel-gfx] [PATCH i-g-t 5/7] tests/perf_pmu: Tests for i915 PMU API
Tvrtko Ursulin
tursulin at ursulin.net
Fri Sep 29 12:39:37 UTC 2017
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
A bunch of tests for the new i915 PMU feature.
Parts of the code were initialy sketched by Dmitry Rogozhkin.
v2: (Most suggestions by Chris Wilson)
* Add new class/instance based engine list.
* Add gem_has_engine/gem_require_engine to work with class/instance.
* Use the above two throughout the test.
* Shorten tests to 100ms busy batches, seems enough.
* Add queued counter sanity checks.
* Use igt_nsec_elapsed.
* Skip on perf -ENODEV in some tests instead of embedding knowledge locally.
* Fix multi ordering for busy accounting.
* Use new guranteed_usleep when sleep time is asserted on.
* Check for no queued when idle/busy.
* Add queued counter init test.
* Add queued tests.
* Consolidate and increase multiple busy engines tests to most-busy and
all-busy tests.
* Guarantte interrupts by using fences.
* Test RC6 via forcewake.
v3:
* Tweak assert in interrupts subtest.
* Sprinkle of comments.
* Fix multi-client test which got broken in v2.
v4:
* Measured instead of guaranteed sleep.
* Missing sync in no_sema.
* Log busyness before asserts for debug.
* access(2) instead of open(2) to determine if cpu0 is hotpluggable.
* Test frequency reporting via min/max setting instead assuming.
^^ All above suggested by Chris Wilson. ^^
* Drop queued subtests to match i915.
* Use long batches with fences to ensure interrupts.
* Test render node as well.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
---
lib/igt_gt.c | 50 +++
lib/igt_gt.h | 38 ++
lib/igt_perf.h | 9 +-
tests/Makefile.am | 1 +
tests/Makefile.sources | 1 +
tests/perf_pmu.c | 957 +++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 1048 insertions(+), 8 deletions(-)
create mode 100644 tests/perf_pmu.c
diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index b3f3b3809eee..4c75811fb1b3 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -568,3 +568,53 @@ bool gem_can_store_dword(int fd, unsigned int engine)
return true;
}
+
+const struct intel_execution_engine2 intel_execution_engines2[] = {
+ { "rcs0", I915_ENGINE_CLASS_RENDER, 0 },
+ { "bcs0", I915_ENGINE_CLASS_COPY, 0 },
+ { "vcs0", I915_ENGINE_CLASS_VIDEO, 0 },
+ { "vcs1", I915_ENGINE_CLASS_VIDEO, 1 },
+ { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+ enum drm_i915_gem_engine_class class,
+ unsigned int instance)
+{
+ if (class != I915_ENGINE_CLASS_VIDEO)
+ igt_assert(instance == 0);
+ else
+ igt_assert(instance >= 0 && instance <= 1);
+
+ switch (class) {
+ case I915_ENGINE_CLASS_RENDER:
+ return I915_EXEC_RENDER;
+ case I915_ENGINE_CLASS_COPY:
+ return I915_EXEC_BLT;
+ case I915_ENGINE_CLASS_VIDEO:
+ if (instance == 0) {
+ if (gem_has_bsd2(gem_fd))
+ return I915_EXEC_BSD | I915_EXEC_BSD_RING1;
+ else
+ return I915_EXEC_BSD;
+
+ } else {
+ return I915_EXEC_BSD | I915_EXEC_BSD_RING2;
+ }
+ case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ return I915_EXEC_VEBOX;
+ case I915_ENGINE_CLASS_OTHER:
+ default:
+ igt_assert(0);
+ };
+}
+
+bool gem_has_engine(int gem_fd,
+ enum drm_i915_gem_engine_class class,
+ unsigned int instance)
+{
+ return gem_has_ring(gem_fd,
+ gem_class_instance_to_eb_flags(gem_fd, class,
+ instance));
+}
diff --git a/lib/igt_gt.h b/lib/igt_gt.h
index 2579cbd37be7..fb67ae1a7d1f 100644
--- a/lib/igt_gt.h
+++ b/lib/igt_gt.h
@@ -25,6 +25,7 @@
#define IGT_GT_H
#include "igt_debugfs.h"
+#include "igt_core.h"
void igt_require_hang_ring(int fd, int ring);
@@ -80,4 +81,41 @@ extern const struct intel_execution_engine {
bool gem_can_store_dword(int fd, unsigned int engine);
+extern const struct intel_execution_engine2 {
+ const char *name;
+ int class;
+ int instance;
+} intel_execution_engines2[];
+
+#define for_each_engine_class_instance(fd__, e__) \
+ for ((e__) = intel_execution_engines2;\
+ (e__)->name; \
+ (e__)++)
+
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_OTHER = 0,
+ I915_ENGINE_CLASS_RENDER = 1,
+ I915_ENGINE_CLASS_COPY = 2,
+ I915_ENGINE_CLASS_VIDEO = 3,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+ I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+ enum drm_i915_gem_engine_class class,
+ unsigned int instance);
+
+bool gem_has_engine(int gem_fd,
+ enum drm_i915_gem_engine_class class,
+ unsigned int instance);
+
+static inline
+void gem_require_engine(int gem_fd,
+ enum drm_i915_gem_engine_class class,
+ unsigned int instance)
+{
+ igt_require(gem_has_engine(gem_fd, class, instance));
+}
+
#endif /* IGT_GT_H */
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index e38171da5261..dc4df760f531 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -29,14 +29,7 @@
#include <linux/perf_event.h>
-enum drm_i915_gem_engine_class {
- I915_ENGINE_CLASS_OTHER = 0,
- I915_ENGINE_CLASS_RENDER = 1,
- I915_ENGINE_CLASS_COPY = 2,
- I915_ENGINE_CLASS_VIDEO = 3,
- I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
- I915_ENGINE_CLASS_MAX /* non-ABI */
-};
+#include "igt_gt.h"
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1cea4036d00f..2819df381df7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -126,6 +126,7 @@ gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
gen7_forcewake_mt_LDADD = $(LDADD) -lpthread
gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
gem_userptr_blits_LDADD = $(LDADD) -lpthread
+perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
gem_wait_LDADD = $(LDADD) -lrt
kms_flip_LDADD = $(LDADD) -lrt -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 0adc28a014d2..7d1fdf16892d 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -217,6 +217,7 @@ TESTS_progs = \
kms_vblank \
meta_test \
perf \
+ perf_pmu \
pm_backlight \
pm_lpsp \
pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 000000000000..70d435e953ea
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,957 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+
+#include "igt.h"
+#include "igt_core.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
+
+IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
+
+const double tolerance = 0.02f;
+const unsigned long batch_duration_ns = 100 * 1000 * 1000;
+
+static int open_pmu(uint64_t config)
+{
+ int fd;
+
+ fd = perf_i915_open(config);
+ igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+ igt_assert(fd >= 0);
+
+ return fd;
+}
+
+static int open_group(uint64_t config, int group)
+{
+ int fd;
+
+ fd = perf_i915_open_group(config, group);
+ igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+ igt_assert(fd >= 0);
+
+ return fd;
+}
+
+static void
+init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample)
+{
+ int fd;
+
+ fd = open_pmu(__I915_PMU_ENGINE(e->class, e->instance, sample));
+
+ close(fd);
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+ uint64_t data[2];
+
+ igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+ return data[0];
+}
+
+static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+ uint64_t buf[2 + num];
+ unsigned int i;
+
+ igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
+
+ for (i = 0; i < num; i++)
+ val[i] = buf[2 + i];
+}
+
+#define assert_within_epsilon(x, ref, tolerance) \
+ igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
+ (double)(x) >= (1.0 - tolerance) * (double)ref, \
+ "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
+ #x, #ref, (double)x, tolerance * 100.0, (double)ref)
+
+/*
+ * Helper for cases where we assert on time spent sleeping (directly or
+ * indirectly), so make it more robust by ensuring the system sleep time
+ * is within test tolerance to start with.
+ */
+static unsigned int measured_usleep(unsigned int usec)
+{
+ uint64_t slept = 0;
+
+ while (usec > 0) {
+ struct timespec start = { };
+ uint64_t this_sleep;
+
+ igt_nsec_elapsed(&start);
+ usleep(usec);
+ this_sleep = igt_nsec_elapsed(&start);
+ slept += this_sleep;
+ if (this_sleep > usec * 1000)
+ break;
+ usec -= this_sleep;
+ }
+
+ return slept;
+}
+
+static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
+{
+ return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance);
+}
+
+static void
+single(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
+{
+ double ref = busy ? batch_duration_ns : 0.0f;
+ igt_spin_t *spin;
+ uint64_t val;
+ int fd;
+
+ fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+
+ if (busy) {
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_set_timeout(spin, batch_duration_ns);
+ } else {
+ usleep(batch_duration_ns / 1000);
+ }
+
+ if (busy)
+ gem_sync(gem_fd, spin->handle);
+
+ val = pmu_read_single(fd);
+
+ assert_within_epsilon(val, ref, tolerance);
+
+ if (busy)
+ igt_spin_batch_free(gem_fd, spin);
+ close(fd);
+}
+
+static void log_busy(int fd, unsigned int num_engines, uint64_t *val)
+{
+ char buf[1024];
+ int rem = sizeof(buf);
+ unsigned int i;
+ char *p = buf;
+
+ for (i = 0; i < num_engines; i++) {
+ int len;
+
+ len = snprintf(p, rem, "%u=%" PRIu64 "\n", i, val[i]);
+ igt_assert(len > 0);
+ rem -= len;
+ p += len;
+ }
+
+ igt_info("%s", buf);
+}
+
+static void
+busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+ const unsigned int num_engines)
+{
+ const struct intel_execution_engine2 *e_;
+ uint64_t val[num_engines];
+ int fd[num_engines];
+ igt_spin_t *spin;
+ unsigned int busy_idx, i;
+
+ i = 0;
+ fd[0] = -1;
+ for_each_engine_class_instance(fd, e_) {
+ if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+ continue;
+ else if (e == e_)
+ busy_idx = i;
+
+ fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+ e_->instance),
+ fd[0]);
+ }
+
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+ gem_sync(gem_fd, spin->handle);
+
+ pmu_read_multi(fd[0], num_engines, val);
+ log_busy(fd[0], num_engines, val);
+
+ assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
+ for (i = 0; i < num_engines; i++) {
+ if (i == busy_idx)
+ continue;
+ assert_within_epsilon(val[i], 0.0f, tolerance);
+ }
+
+ igt_spin_batch_free(gem_fd, spin);
+ close(fd[0]);
+}
+
+static void
+most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+ const unsigned int num_engines)
+{
+ const struct intel_execution_engine2 *e_;
+ uint64_t val[num_engines];
+ int fd[num_engines];
+ igt_spin_t *spin[num_engines];
+ unsigned int idle_idx, i;
+
+ gem_require_engine(gem_fd, e->class, e->instance);
+
+ i = 0;
+ fd[0] = -1;
+ for_each_engine_class_instance(fd, e_) {
+ if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+ continue;
+
+ fd[i] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+ e_->instance),
+ fd[0]);
+
+ if (e == e_) {
+ idle_idx = i;
+ } else {
+ spin[i] = igt_spin_batch_new(gem_fd, 0,
+ e2ring(gem_fd, e_), 0);
+ igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+ }
+
+ i++;
+ }
+
+ for (i = 0; i < num_engines; i++) {
+ if (i != idle_idx)
+ gem_sync(gem_fd, spin[i]->handle);
+ }
+
+ pmu_read_multi(fd[0], num_engines, val);
+ log_busy(fd[0], num_engines, val);
+
+ for (i = 0; i < num_engines; i++) {
+ if (i == idle_idx)
+ assert_within_epsilon(val[i], 0.0f, tolerance);
+ else
+ assert_within_epsilon(val[i], batch_duration_ns,
+ tolerance);
+ }
+
+ for (i = 0; i < num_engines; i++) {
+ if (i != idle_idx)
+ igt_spin_batch_free(gem_fd, spin[i]);
+ }
+ close(fd[0]);
+}
+
+static void
+all_busy_check_all(int gem_fd, const unsigned int num_engines)
+{
+ const struct intel_execution_engine2 *e;
+ uint64_t val[num_engines];
+ int fd[num_engines];
+ igt_spin_t *spin[num_engines];
+ unsigned int i;
+
+ i = 0;
+ fd[0] = -1;
+ for_each_engine_class_instance(fd, e) {
+ if (!gem_has_engine(gem_fd, e->class, e->instance))
+ continue;
+
+ fd[i] = open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance),
+ fd[0]);
+
+ spin[i] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+
+ i++;
+ }
+
+ for (i = 0; i < num_engines; i++)
+ gem_sync(gem_fd, spin[i]->handle);
+
+ pmu_read_multi(fd[0], num_engines, val);
+ log_busy(fd[0], num_engines, val);
+
+ for (i = 0; i < num_engines; i++)
+ assert_within_epsilon(val[i], batch_duration_ns, tolerance);
+
+ for (i = 0; i < num_engines; i++)
+ igt_spin_batch_free(gem_fd, spin[i]);
+ close(fd[0]);
+}
+
+static void
+no_sema(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
+{
+ igt_spin_t *spin;
+ uint64_t val[2];
+ int fd;
+
+ fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+ open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
+
+ if (busy) {
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_set_timeout(spin, batch_duration_ns);
+ } else {
+ usleep(batch_duration_ns / 1000);
+ }
+
+ if (busy)
+ gem_sync(gem_fd, spin->handle);
+
+ pmu_read_multi(fd, 2, val);
+
+ assert_within_epsilon(val[0], 0.0f, tolerance);
+ assert_within_epsilon(val[1], 0.0f, tolerance);
+
+ if (busy)
+ igt_spin_batch_free(gem_fd, spin);
+ close(fd);
+}
+
+static void
+multi_client(int gem_fd, const struct intel_execution_engine2 *e)
+{
+ uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
+ unsigned int slept;
+ igt_spin_t *spin;
+ uint64_t val[2];
+ int fd[2];
+
+ fd[0] = open_pmu(config);
+
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+ usleep(batch_duration_ns / 3000);
+
+ /*
+ * Second PMU client which is initialized after the first one,
+ * and exists before it, should not affect accounting as reported
+ * in the first client.
+ */
+ fd[1] = open_pmu(config);
+ slept = measured_usleep(batch_duration_ns / 3000);
+ val[1] = pmu_read_single(fd[1]);
+ close(fd[1]);
+
+ gem_sync(gem_fd, spin->handle);
+
+ val[0] = pmu_read_single(fd[0]);
+
+ assert_within_epsilon(val[0], batch_duration_ns, tolerance);
+ assert_within_epsilon(val[1], slept, tolerance);
+
+ igt_spin_batch_free(gem_fd, spin);
+ close(fd[0]);
+}
+
+/**
+ * Tests that i915 PMU corectly errors out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ * - sampling period is not supported
+ * - pid > 0 is not supported since we can't count per-process (we count
+ * per whole system)
+ * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void invalid_init(void)
+{
+ struct perf_event_attr attr;
+ int pid, cpu;
+
+#define ATTR_INIT() \
+do { \
+ memset(&attr, 0, sizeof (attr)); \
+ attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+ attr.type = i915_type_id(); \
+ igt_assert(attr.type != 0); \
+} while(0)
+
+ ATTR_INIT();
+ attr.sample_period = 100;
+ pid = -1;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = 0;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = -1;
+ cpu = 1;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, ENODEV);
+}
+
+static void init_other(unsigned int i, bool valid)
+{
+ int fd;
+
+ fd = perf_i915_open(__I915_PMU_OTHER(i));
+ igt_require(!(fd < 0 && errno == ENODEV));
+ if (valid) {
+ igt_assert(fd >= 0);
+ } else {
+ igt_assert(fd < 0);
+ return;
+ }
+
+ close(fd);
+}
+
+static void read_other(unsigned int i, bool valid)
+{
+ int fd;
+
+ fd = perf_i915_open(__I915_PMU_OTHER(i));
+ igt_require(!(fd < 0 && errno == ENODEV));
+ if (valid) {
+ igt_assert(fd >= 0);
+ } else {
+ igt_assert(fd < 0);
+ return;
+ }
+
+ (void)pmu_read_single(fd);
+
+ close(fd);
+}
+
+static bool cpu0_hotplug_support(void)
+{
+ return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
+}
+
+static void cpu_hotplug(int gem_fd)
+{
+ struct timespec start = { };
+ igt_spin_t *spin;
+ uint64_t val, ref;
+ int fd;
+
+ igt_require(cpu0_hotplug_support());
+
+ spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+ fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
+ igt_assert(fd >= 0);
+
+ igt_nsec_elapsed(&start);
+
+ /*
+ * Toggle online status of all the CPUs in a child process and ensure
+ * this has not affected busyness stats in the parent.
+ */
+ igt_fork(child, 1) {
+ int cpu = 0;
+
+ for (;;) {
+ char name[128];
+ int cpufd;
+
+ sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
+ cpu);
+ cpufd = open(name, O_WRONLY);
+ if (cpufd == -1) {
+ igt_assert(cpu > 0);
+ break;
+ }
+ igt_assert_eq(write(cpufd, "0", 2), 2);
+
+ usleep(1000 * 1000);
+
+ igt_assert_eq(write(cpufd, "1", 2), 2);
+
+ close(cpufd);
+ cpu++;
+ }
+ }
+
+ igt_waitchildren();
+
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ ref = igt_nsec_elapsed(&start);
+ val = pmu_read_single(fd);
+
+ assert_within_epsilon(val, ref, tolerance);
+
+ igt_spin_batch_free(gem_fd, spin);
+ close(fd);
+}
+
+static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
+{
+ const unsigned int cal_min_us = calibration_us * 3;
+ const unsigned int tolerance_pct = 10;
+ const uint32_t bbe = 0xa << 23;
+ const unsigned int loops = 17;
+ struct drm_i915_gem_exec_object2 obj = {};
+ struct drm_i915_gem_execbuffer2 eb =
+ { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+ struct timespec t_begin = { };
+ long size, last_size;
+ unsigned long ns;
+
+ igt_nsec_elapsed(&t_begin);
+
+ size = 256 * 1024;
+ do {
+ struct timespec t_start = { };
+
+ obj.handle = gem_create(fd, size);
+ gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
+ sizeof(bbe));
+ gem_execbuf(fd, &eb);
+ gem_sync(fd, obj.handle);
+
+ igt_nsec_elapsed(&t_start);
+
+ for (int loop = 0; loop < loops; loop++)
+ gem_execbuf(fd, &eb);
+ gem_sync(fd, obj.handle);
+
+ ns = igt_nsec_elapsed(&t_start);
+
+ gem_close(fd, obj.handle);
+
+ last_size = size;
+ size = calibration_us * 1000 * size * loops / ns;
+ size = ALIGN(size, sizeof(uint32_t));
+ } while (igt_nsec_elapsed(&t_begin) / 1000 < cal_min_us ||
+ abs(size - last_size) > (size * tolerance_pct / 100));
+
+ return size / sizeof(uint32_t);
+}
+
+static int chain_nop(int gem_fd, unsigned long sz, int in_fence, bool sync)
+{
+ struct drm_i915_gem_exec_object2 obj = {};
+ struct drm_i915_gem_execbuffer2 eb =
+ { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+ const uint32_t bbe = 0xa << 23;
+
+ sz = ALIGN(sz, sizeof(uint32_t));
+
+ obj.handle = gem_create(gem_fd, sz);
+ gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, sizeof(bbe));
+
+ eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT;
+
+ if (in_fence >= 0) {
+ eb.flags |= I915_EXEC_FENCE_IN;
+ eb.rsvd2 = in_fence;
+ }
+
+ gem_execbuf_wr(gem_fd, &eb);
+
+ if (sync)
+ gem_sync(gem_fd, obj.handle);
+
+ gem_close(gem_fd, obj.handle);
+ if (in_fence >= 0)
+ close(in_fence);
+
+ return eb.rsvd2 >> 32;
+}
+
+static void
+test_interrupts(int gem_fd)
+{
+ const unsigned int calibration_us = 250000;
+ const unsigned int batch_len_us = 100000;
+ const unsigned int batch_count = 3000000 / batch_len_us;
+ uint64_t idle, busy, prev;
+ unsigned long cal, sz;
+ int fd, fence = -1;
+ unsigned int i;
+
+ cal = calibrate_nop(gem_fd, calibration_us);
+ sz = batch_len_us * cal / calibration_us;
+
+ fd = open_pmu(I915_PMU_INTERRUPTS);
+
+ gem_quiescent_gpu(gem_fd);
+
+ /* Wait for idle state. */
+ prev = pmu_read_single(fd);
+ idle = prev + 1;
+ while (idle != prev) {
+ usleep(100000);
+ prev = idle;
+ idle = pmu_read_single(fd);
+ }
+
+ igt_assert_eq(idle - prev, 0);
+
+ /* Send some no-op batches with chained fences to ensure interrupts. */
+ for (i = 1; i <= batch_count; i++)
+ fence = chain_nop(gem_fd, sz, fence,
+ i < batch_count ? false : true);
+
+ close(fence);
+
+ /* Check at least as many interrupts has been generated. */
+ busy = pmu_read_single(fd);
+ igt_assert(busy >= batch_count);
+
+ close(fd);
+}
+
+static void
+test_frequency(int gem_fd)
+{
+ const uint64_t duration_ns = 2000000000;
+ uint32_t min_freq, max_freq, boost_freq;
+ uint64_t min[2], max[2], start[2];
+ igt_spin_t *spin;
+ int fd, sysfs;
+
+ sysfs = igt_sysfs_open(gem_fd, NULL);
+ igt_require(sysfs >= 0);
+
+ min_freq = igt_sysfs_get_u32(sysfs, "gt_RPn_freq_mhz");
+ max_freq = igt_sysfs_get_u32(sysfs, "gt_RP0_freq_mhz");
+ boost_freq = igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz");
+ igt_require(min_freq > 0 && max_freq > 0 && boost_freq > 0);
+ igt_require(max_freq > min_freq);
+ igt_require(boost_freq > min_freq);
+
+ fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
+ open_group(I915_PMU_ACTUAL_FREQUENCY, fd);
+
+ /*
+ * Set GPU to min frequency and read PMU counters.
+ */
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", min_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == min_freq);
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", min_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
+
+ pmu_read_multi(fd, 2, start);
+
+ spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+ igt_spin_batch_set_timeout(spin, duration_ns);
+ gem_sync(gem_fd, spin->handle);
+
+ pmu_read_multi(fd, 2, min);
+ min[0] -= start[0];
+ min[1] -= start[1];
+
+ igt_spin_batch_free(gem_fd, spin);
+
+ usleep(1000000);
+
+ /*
+ * Set GPU to max frequency and read PMU counters.
+ */
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", max_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == max_freq);
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", boost_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == boost_freq);
+
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", max_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
+
+ pmu_read_multi(fd, 2, start);
+
+ spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+ igt_spin_batch_set_timeout(spin, duration_ns);
+ gem_sync(gem_fd, spin->handle);
+
+ pmu_read_multi(fd, 2, max);
+ max[0] -= start[0];
+ max[1] -= start[1];
+
+ igt_spin_batch_free(gem_fd, spin);
+
+ /*
+ * Restore min/max.
+ */
+ igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq));
+ igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == min_freq);
+
+ close(fd);
+
+ igt_assert(min[0] < max[0]);
+ igt_assert(min[1] < max[1]);
+}
+
+static void
+test_rc6(int gem_fd)
+{
+ int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+ uint64_t idle, busy, prev;
+ unsigned int slept;
+ int fd, fw;
+
+ fd = open_pmu(I915_PMU_RC6_RESIDENCY);
+
+ gem_quiescent_gpu(gem_fd);
+ usleep(1000000);
+
+ /* Go idle and check full RC6. */
+ prev = pmu_read_single(fd);
+ slept = measured_usleep(duration_ns / 1000);
+ idle = pmu_read_single(fd);
+
+ assert_within_epsilon(idle - prev, slept, tolerance);
+
+ /* Wake up device and check no RC6. */
+ fw = igt_open_forcewake_handle(gem_fd);
+ igt_assert(fw >= 0);
+
+ prev = pmu_read_single(fd);
+ usleep(duration_ns / 1000);
+ busy = pmu_read_single(fd);
+
+ assert_within_epsilon(busy - prev, 0.0, tolerance);
+
+ close(fw);
+ close(fd);
+}
+
+static void
+test_rc6p(int gem_fd)
+{
+ int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+ unsigned int num_pmu = 1;
+ uint64_t idle[3], busy[3], prev[3];
+ unsigned int slept, i;
+ int fd, ret, fw;
+
+ fd = open_group(I915_PMU_RC6_RESIDENCY, -1);
+ ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
+ if (ret > 0) {
+ num_pmu++;
+ ret = perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd);
+ if (ret > 0)
+ num_pmu++;
+ }
+
+ igt_require(num_pmu == 3);
+
+ gem_quiescent_gpu(gem_fd);
+ usleep(1000000);
+
+ /* Go idle and check full RC6. */
+ pmu_read_multi(fd, num_pmu, prev);
+ slept = measured_usleep(duration_ns / 1000);
+ pmu_read_multi(fd, num_pmu, idle);
+
+ for (i = 0; i < num_pmu; i++)
+ assert_within_epsilon(idle[i] - prev[i], slept, tolerance);
+
+ /* Wake up device and check no RC6. */
+ fw = igt_open_forcewake_handle(gem_fd);
+ igt_assert(fw >= 0);
+
+ pmu_read_multi(fd, num_pmu, prev);
+ usleep(duration_ns / 1000);
+ pmu_read_multi(fd, num_pmu, busy);
+
+ for (i = 0; i < num_pmu; i++)
+ assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance);
+
+ close(fw);
+ close(fd);
+}
+
+igt_main
+{
+ const unsigned int num_other_metrics =
+ I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
+ unsigned int num_engines = 0;
+ int fd = -1;
+ const struct intel_execution_engine2 *e;
+ unsigned int i;
+
+ igt_fixture {
+ fd = drm_open_driver_master(DRIVER_INTEL);
+
+ igt_require_gem(fd);
+ igt_require(i915_type_id() > 0);
+
+ for_each_engine_class_instance(fd, e) {
+ if (gem_has_engine(fd, e->class, e->instance))
+ num_engines++;
+ }
+ }
+
+ /**
+ * Test invalid access via perf API is rejected.
+ */
+ igt_subtest("invalid-init")
+ invalid_init();
+
+ for_each_engine_class_instance(fd, e) {
+ /**
+ * Test that a single engine metric can be initialized.
+ */
+ igt_subtest_f("init-busy-%s", e->name)
+ init(fd, e, I915_SAMPLE_BUSY);
+
+ igt_subtest_f("init-wait-%s", e->name)
+ init(fd, e, I915_SAMPLE_WAIT);
+
+ igt_subtest_f("init-sema-%s", e->name)
+ init(fd, e, I915_SAMPLE_SEMA);
+
+ /**
+ * Test that engines show no load when idle.
+ */
+ igt_subtest_f("idle-%s", e->name)
+ single(fd, e, false);
+
+ /**
+ * Test that a single engine reports load correctly.
+ */
+ igt_subtest_f("busy-%s", e->name)
+ single(fd, e, true);
+
+ /**
+ * Test that when one engine is loaded other report no load.
+ */
+ igt_subtest_f("busy-check-all-%s", e->name)
+ busy_check_all(fd, e, num_engines);
+
+ /**
+ * Test that when all except one engine are loaded all loads
+ * are correctly reported.
+ */
+ igt_subtest_f("most-busy-check-all-%s", e->name)
+ most_busy_check_all(fd, e, num_engines);
+
+ /**
+ * Test that semphore counters report no activity on idle
+ * or busy engines.
+ */
+ igt_subtest_f("idle-no-semaphores-%s", e->name)
+ no_sema(fd, e, false);
+
+ igt_subtest_f("busy-no-semaphores-%s", e->name)
+ no_sema(fd, e, true);
+
+ /**
+ * Check that two perf clients do not influence each others
+ * observations.
+ */
+ igt_subtest_f("multi-client-%s", e->name)
+ multi_client(fd, e);
+ }
+
+ /**
+ * Test that when all engines are loaded all loads are
+ * correctly reported.
+ */
+ igt_subtest("all-busy-check-all")
+ all_busy_check_all(fd, num_engines);
+
+ /**
+ * Test that non-engine counters can be initialized and read. Apart
+ * from the invalid metric which should fail.
+ */
+ for (i = 0; i < num_other_metrics + 1; i++) {
+ igt_subtest_f("other-init-%u", i)
+ init_other(i, i < num_other_metrics);
+
+ igt_subtest_f("other-read-%u", i)
+ read_other(i, i < num_other_metrics);
+ }
+
+ /**
+ * Test counters are not affected by CPU offline/online events.
+ */
+ igt_subtest("cpu-hotplug")
+ cpu_hotplug(fd);
+
+ /**
+ * Test GPU frequency.
+ */
+ igt_subtest("frequency")
+ test_frequency(fd);
+
+ /**
+ * Test interrupt count reporting.
+ */
+ igt_subtest("interrupts")
+ test_interrupts(fd);
+
+ /**
+ * Test RC6 residency reporting.
+ */
+ igt_subtest("rc6")
+ test_rc6(fd);
+
+ /**
+ * Test RC6p residency reporting.
+ */
+ igt_subtest("rc6p")
+ test_rc6p(fd);
+
+ /**
+ * Check render nodes are counted.
+ */
+ igt_subtest_group {
+ int render_fd;
+
+ igt_fixture {
+ render_fd = drm_open_driver_render(DRIVER_INTEL);
+ igt_require_gem(render_fd);
+
+ gem_quiescent_gpu(fd);
+ }
+
+ for_each_engine_class_instance(fd, e) {
+ igt_subtest_f("render-node-busy-%s", e->name)
+ single(fd, e, true);
+ }
+
+ igt_fixture {
+ close(render_fd);
+ }
+ }
+}
--
2.9.5
More information about the Intel-gfx
mailing list