[Intel-gfx] [RFC i-g-t v2] tests/perf_pmu: test i915 RFC PMU
Dmitry Rogozhkin
dmitry.v.rogozhkin at intel.com
Fri Sep 1 15:57:40 UTC 2017
i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/
Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
(limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
initialization
* enable: verify that events from parallel consumenrs can be disabled
without effect on another consumer
* frequency: verify that actual frequency metric works as expected
* busy: verify that BUSY metrics work for each engine
* busy_parallel: verify that parallel requests for BUSY metrics do
not conflict
* cpu_online: verify PMU context migration on CPUs going online/offline
v1: add cpu_online test
v2: add enable and frequency tests
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
tests/Makefile.sources | 1 +
tests/perf_pmu.c | 824 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 825 insertions(+)
create mode 100644 tests/perf_pmu.c
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
kms_vblank \
meta_test \
perf \
+ perf_pmu \
pm_backlight \
pm_lpsp \
pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..f7b0904
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define USAGE_TOLERANCE 0.02
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_OTHER = 0,
+ I915_ENGINE_CLASS_RENDER = 1,
+ I915_ENGINE_CLASS_COPY = 2,
+ I915_ENGINE_CLASS_VIDEO = 3,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+ I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+ I915_SAMPLE_QUEUED = 0,
+ I915_SAMPLE_BUSY = 1,
+ I915_SAMPLE_WAIT = 2,
+ I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+ (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+ ((class) << I915_PMU_CLASS_SHIFT | \
+ (instance) << I915_PMU_SAMPLE_BITS | \
+ (sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY __I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+ pid_t pid,
+ int cpu,
+ int group_fd,
+ unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+ char buf[1024];
+ int fd, n;
+
+ fd = open("/sys/bus/event_source/devices/i915/type", 0);
+ if (fd < 0) {
+ n = -1;
+ } else {
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ }
+ if (n < 0)
+ return 0;
+
+ buf[n] = '\0';
+ return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec) +
+ (end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec)*1e9 +
+ (end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 obj;
+ struct timespec start, now;
+
+ gem_require_ring(fd, ring_id);
+
+ memset(&obj, 0, sizeof(obj));
+ obj.handle = handle;
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = to_user_pointer(&obj);
+ execbuf.buffer_count = 1;
+ execbuf.flags = ring_id;
+ execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+ execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ if (__gem_execbuf(fd, &execbuf)) {
+ execbuf.flags = ring_id;
+ gem_execbuf(fd, &execbuf);
+ }
+
+ do {
+ for (int loop = 0; loop < 1024; loop++) {
+ gem_execbuf(fd, &execbuf);
+ }
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while (elapsed(&start, &now) < timeout);
+ gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof (attr));
+
+ attr.type = i915_type_id();
+ if (attr.type == 0)
+ return -ENOENT;
+ attr.config = config;
+
+ attr.read_format = read_format;
+ if (group != -1)
+ attr.read_format &= ~PERF_FORMAT_GROUP;
+
+ return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+ int config;
+ struct {
+ uint64_t value;
+ uint64_t time;
+ } start;
+ struct {
+ uint64_t value;
+ uint64_t time;
+ } end;
+};
+
+struct pmu_metrics {
+ int fd;
+ int read_format;
+ int num_metrics;
+ struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+ int i, res;
+
+ memset(pm, 0, sizeof(struct pmu_metrics));
+ pm->fd = -1;
+ pm->read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric));
+ if (!pm->metrics)
+ return -1;
+
+ for (i = 0; i < num_configs; ++i) {
+ if (pm->fd < 0)
+ res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format);
+ else
+ res = perf_i915_open(configs[i], pm->fd, pm->read_format);
+ if (res >= 0) {
+ pm->metrics[pm->num_metrics++].config = configs[i];
+ }
+ }
+
+ igt_info("perf_init: enabled %d metrics from %d requested\n",
+ pm->num_metrics, num_configs);
+
+ return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+ if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+ if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+ uint64_t nr_values; /* The number of events */
+ uint64_t timestamp; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+ struct {
+ uint64_t value; /* The value of the event */
+ } values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+ int read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ struct perf_read_format data;
+ ssize_t len;
+ int i;
+
+ if (pm->fd < 0)
+ return -1;
+
+ if (pm->read_format != read_format)
+ return -1;
+
+ len = read(pm->fd, &data, sizeof(data));
+ if (len < 0) {
+ return -1;
+ }
+
+ if (pm->num_metrics != data.nr_values)
+ return -1;
+
+ for (i = 0; i < data.nr_values; ++i) {
+ pm->metrics[i].start.value = pm->metrics[i].end.value;
+ pm->metrics[i].end.value = data.values[i].value;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ pm->metrics[i].start.time = pm->metrics[i].end.time;
+ pm->metrics[i].end.time = data.timestamp;
+ }
+ }
+
+ return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+ switch (config) {
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+ return "i915/rcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+ return "i915/vcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+ return "i915/vcs1-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+ return "i915/bcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+ return "i915/vecs0-busy/";
+ case I915_PMU_ACTUAL_FREQUENCY:
+ return "i915/actual-frequency/";
+ default:
+ return "i915/unknown/";
+ }
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+ return m->end.value - m->start.value;
+}
+
+static uint64_t perf_avg_freq(struct metric* m)
+{
+ return (m->end.value - m->start.value)/(m->end.time - m->start.time);
+}
+
+static int sysfs_read_i32(const char *name)
+{
+ char buf[4096];
+ int sysfd;
+ int len;
+
+ sprintf(buf, "/sys/class/drm/card%d/%s",
+ drm_get_card(), name);
+ sysfd = open(buf, O_RDONLY);
+ if (sysfd < 0)
+ return -1;
+
+ len = read(sysfd, buf, sizeof(buf)-1);
+ close(sysfd);
+ if (len < 0)
+ return -1;
+
+ buf[len] = '\0';
+ return atoi(buf);
+}
+
+static uint64_t debugfs_read_u64_16(const char *name)
+{
+ char buf[4096];
+ int sysfd;
+ int len;
+
+ sprintf(buf, "/sys/kernel/debug/dri/%d/%s",
+ drm_get_card(), name);
+ sysfd = open(buf, O_RDONLY);
+ if (sysfd < 0)
+ return 0;
+
+ len = read(sysfd, buf, sizeof(buf)-1);
+ close(sysfd);
+ if (len < 0)
+ return 0;
+
+ buf[len] = '\0';
+
+ return strtoll(buf, NULL, 16);
+}
+
+static bool is_engine_config(uint64_t config)
+{
+ return config < __I915_PMU_OTHER(0);
+}
+
+#define ENGINE_SAMPLE_BITS (16)
+#define BIT_ULL(nr) (1ULL << (nr))
+
+static uint64_t event_enabled_mask(uint64_t config)
+{
+ if (is_engine_config(config))
+ return BIT_ULL(config & I915_PMU_SAMPLE_MASK);
+ else
+ return BIT_ULL(config - __I915_PMU_OTHER(0)) <<
+ ENGINE_SAMPLE_BITS;
+}
+
+static int configure_all(int** configs)
+{
+ unsigned int class[] =
+ {
+ I915_ENGINE_CLASS_RENDER,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_COPY,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE,
+ };
+ int* c = malloc(1024 * sizeof(int));
+ int n = 0;
+
+ igt_assert(c != NULL);
+
+ for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+ /* TODO Adding metrics for 0-instances only. Would be nice
+ * to get everything, but for that we either need to add
+ * check for different platforms here or use upcoming
+ * engines discover API.
+ */
+ c[n++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+ }
+ c[n++] = I915_PMU_ACTUAL_FREQUENCY;
+ c[n++] = I915_PMU_REQUESTED_FREQUENCY;
+ c[n++] = I915_PMU_ENERGY;
+ c[n++] = I915_PMU_RC6_RESIDENCY;
+ c[n++] = I915_PMU_RC6p_RESIDENCY;
+ c[n++] = I915_PMU_RC6pp_RESIDENCY;
+
+ *configs = c;
+ return n;
+}
+
+static uint64_t get_enabled_mask(int num_configs, int* configs)
+{
+ uint64_t mask = 0;
+ for (int i = 0; i < num_configs; ++i) {
+ mask |= event_enabled_mask(configs[i]);
+ }
+ return mask;
+}
+
+static void test_init(void)
+{
+ struct pmu_metrics pm;
+ int* configs;
+ int num_configs = configure_all(&configs);
+ uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+ igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+ igt_assert_eq(perf_read(&pm), 0);
+
+ ioctl(pm.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ perf_close(&pm);
+ free(configs);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ * - sampling period is not supported
+ * - pid > 0 is not supported since we can't count per-process (we count
+ * per whole system(
+ * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+ struct perf_event_attr attr;
+ int pid, cpu;
+
+#define ATTR_INIT() \
+ do { \
+ memset(&attr, 0, sizeof (attr)); \
+ attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+ attr.type = i915_type_id(); \
+ igt_assert(attr.type != 0); \
+ } while(0)
+
+ ATTR_INIT();
+ attr.sample_period = 100;
+ pid = -1;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = 0;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = -1;
+ cpu = 1;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, ENODEV);
+}
+
+static void test_enable(void)
+{
+ struct pmu_metrics pm1, pm2;
+ int* configs;
+ int num_configs = configure_all(&configs);
+ uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+ igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm1, num_configs, configs), 0);
+ igt_assert_eq(pm1.num_metrics, num_configs);
+
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm2, num_configs, configs), 0);
+ igt_assert_eq(pm2.num_metrics, num_configs);
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ ioctl(pm2.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ ioctl(pm1.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ perf_close(&pm2);
+ perf_close(&pm1);
+ free(configs);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+ struct {
+ const char* engine_name;
+ unsigned int class;
+ unsigned int instance;
+ unsigned int ring_id;
+ } engines[] = {
+ { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+ { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
+ { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
+ { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+ { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
+ };
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now));
+
+ if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[j]) > (1 - USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ } else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+ /* Check that BCS engine had just tiny load.
+ * NOTE Some load on BCS is non-avoidable if you run under any graphical server,
+ * so we can't check for zero.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) < USAGE_TOLERANCE * elapsed_ns(&start, &now));
+ } else {
+ /* Check that other engines did not have any load.
+ * NOTE This may fail if you have any other workload running in parallel to this test.
+ */
+ igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+ }
+ }
+ }
+ perf_close(&pm);
+
+ /* Return how many angines we have tried. */
+ return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ int num_engines;
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the engines one by one, we will get back
+ * how many engines were tried.
+ */
+ num_engines = test_single(fd, handle);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ /* Since engines were loaded in turns one by one for the barely the same time,
+ * they each should have produced barely the same load proportional to the
+ * number of engines.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ }
+ perf_close(&pm);
+}
+
+static bool is_hotplug_cpu0(void)
+{
+ int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+ if (fd == -1)
+ return false;
+ close(fd);
+ return true;
+}
+
+static void test_cpu_online(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0);
+ struct timespec start, now;
+ int timeout = 32; // seconds
+
+ igt_require(is_hotplug_cpu0());
+
+ igt_assert_eq(perf_init(&pm, 1, &config), 0);
+ igt_assert_eq(pm.num_metrics, 1);
+
+ igt_fork(child, 1) {
+ int cpu_fd;
+ char cpu_name[64];
+ char online[] = "1";
+ char offline[] = "0";
+ useconds_t offline_time= 1000*1000; // 1 second
+
+ igt_info("ettempting to put each CPU offline for 1 second:\n");
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ int cpu = 0;
+ do {
+ sprintf(cpu_name, "/sys/devices/system/cpu/cpu%d/online", cpu);
+ cpu_fd = open(cpu_name, O_WRONLY);
+ if (cpu_fd == -1) {
+ igt_info(" no more CPUs, starting over...\n");
+ break;
+ }
+
+ igt_info(" CPU#%d: ", cpu);
+ if (-1 == write(cpu_fd, &offline, sizeof(offline))) {
+ igt_info("failed to put offline: ");
+ }
+ usleep(offline_time);
+ if (-1 == write(cpu_fd, &online, sizeof(online))) {
+ igt_info("failed to put it back online: ");
+ }
+ close(cpu_fd);
+ igt_info("done\n");
+ ++cpu;
+ } while(1);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while(elapsed(&start, &now) < timeout);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, I915_EXEC_RENDER, timeout);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_waitchildren();
+
+ igt_info("Executed on rcs0 for %ldus\n",elapsed_ns(&start, &now));
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[0].config), perf_elapsed(&pm.metrics[0]));
+
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[0]) < elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[0]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+
+ perf_close(&pm);
+}
+
+static void test_frequency(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ACTUAL_FREQUENCY
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on rcs0 for the specified time. */
+ nop_on_ring(fd, handle, I915_EXEC_RENDER, 20);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_waitchildren();
+
+ igt_info("Executed on rcs0 for %ldus\n", elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ if (pm.metrics[j].config == I915_PMU_ACTUAL_FREQUENCY) {
+ int min = sysfs_read_i32("gt_min_freq_mhz");
+ int max = sysfs_read_i32("gt_max_freq_mhz");
+
+ igt_info(" %s: %ld MHz\n", perf_get_metric_name(pm.metrics[j].config), perf_avg_freq(&pm.metrics[j]));
+
+ igt_assert(perf_avg_freq(&pm.metrics[j]) >= min);
+ igt_assert(perf_avg_freq(&pm.metrics[j]) <= max);
+ } else if (pm.metrics[j].config == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ /* Since engines were loaded in turns one by one for the barely the same time,
+ * they each should have produced barely the same load proportional to the
+ * number of engines.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[j]) < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ } else {
+ igt_assert(!"buggy test");
+ }
+ }
+ perf_close(&pm);
+}
+
+igt_main
+{
+ uint32_t handle = 0;
+ int device = -1;
+
+ igt_fixture {
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+ device = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(device);
+
+ handle = gem_create(device, 4096);
+ gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+ igt_fork_hang_detector(device);
+ }
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("init")
+ test_init();
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("invalid_init")
+ test_invalid_init();
+
+ /* Test concurent events enable/disable. */
+ igt_subtest_f("enable")
+ test_enable();
+
+ /* Test frequency metrics consumer. */
+ igt_subtest_f("frequency")
+ test_frequency(device, handle);
+
+ /* Test single engines busy metrics consumer. */
+ igt_subtest_f("busy")
+ test_single(device, handle);
+
+ /* Test parallel engines busy metrics consumers. */
+ igt_subtest_f("busy_parallel")
+ test_parallel(device, handle);
+
+ /* Test pmu context migration to another CPU on cpu getting online/offline. */
+ igt_subtest_f("cpu_online")
+ test_cpu_online(device, handle);
+
+
+ igt_fixture {
+ igt_stop_hang_detector();
+ gem_close(device, handle);
+ close(device);
+ }
+}
--
1.8.3.1
More information about the Intel-gfx
mailing list