[Intel-gfx] [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API

Chris Wilson chris at chris-wilson.co.uk
Mon Sep 18 13:17:22 UTC 2017


Quoting Tvrtko Ursulin (2017-09-18 12:38:40)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> A bunch of tests for the new i915 PMU feature.
> 
> Parts of the code were initialy sketched by Dmitry Rogozhkin.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
> ---
>  lib/igt_gt.c           |  23 +-
>  lib/igt_gt.h           |   8 +
>  tests/Makefile.sources |   1 +
>  tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 738 insertions(+), 7 deletions(-)
>  create mode 100644 tests/perf_pmu.c
> 
> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> index b3f3b3809eee..102cc2841feb 100644
> --- a/lib/igt_gt.c
> +++ b/lib/igt_gt.c
> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
>         return missed;
>  }
>  
> +enum drm_i915_gem_engine_class {
> +       I915_ENGINE_CLASS_OTHER = 0,
> +       I915_ENGINE_CLASS_RENDER = 1,
> +       I915_ENGINE_CLASS_COPY = 2,
> +       I915_ENGINE_CLASS_VIDEO = 3,
> +       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> +       I915_ENGINE_CLASS_MAX /* non-ABI */
> +};
> +
>  const struct intel_execution_engine intel_execution_engines[] = {
> -       { "default", NULL, 0, 0 },
> -       { "render", "rcs0", I915_EXEC_RENDER, 0 },
> -       { "bsd", "vcs0", I915_EXEC_BSD, 0 },
> -       { "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> -       { "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> -       { "blt", "bcs0", I915_EXEC_BLT, 0 },
> -       { "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
> +       { "default", NULL, -1, -1, 0, 0 },
> +       { "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
> +       { "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
> +       { "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> +       { "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> +       { "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
> +       { "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
>         { NULL, 0, 0 }

I was anticipating a new struct for the explicit interface so that we
can easily phase out the out with its aliasing.

>  };
>  
> diff --git a/lib/igt_gt.h b/lib/igt_gt.h
> index 2579cbd37be7..436041ce9cc0 100644
> --- a/lib/igt_gt.h
> +++ b/lib/igt_gt.h
> @@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
>  extern const struct intel_execution_engine {
>         const char *name;
>         const char *full_name;
> +       int class;
> +       int instance;
>         unsigned exec_id;
>         unsigned flags;
>  } intel_execution_engines[];
> @@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
>              e__++) \
>                 for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
>  
> +#define for_each_engine_class_instance(fd__, e__) \
> +       for ((e__) = intel_execution_engines;\
> +            (e__)->name; \
> +            (e__)++) \
> +               for_if ((e__)->class > 0)
> +
>  bool gem_can_store_dword(int fd, unsigned int engine);
>  
>  #endif /* IGT_GT_H */
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index cf542df181a8..4bab6247151c 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -217,6 +217,7 @@ TESTS_progs = \
>         kms_vblank \
>         meta_test \
>         perf \
> +       perf_pmu \
>         pm_backlight \
>         pm_lpsp \
>         pm_rc6_residency \
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> new file mode 100644
> index 000000000000..2dbee586dacc
> --- /dev/null
> +++ b/tests/perf_pmu.c
> @@ -0,0 +1,713 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/times.h>
> +#include <sys/types.h>
> +#include <dirent.h>
> +#include <time.h>
> +#include <poll.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +
> +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
> +
> +const double tolerance = 0.02f;
> +const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
> +
> +static void
> +init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
> +{
> +       uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
> +       int fd;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));

gem_require_ring()

> +
> +       fd = perf_i915_open(config);

Although the kernel interface is the authority.

So this should be igt_require, and igt_assert(has_ring);

> +       igt_assert(fd >= 0);
> +
> +       close(fd);
> +}
> +
> +static uint64_t pmu_read_single(int fd)
> +{
> +       uint64_t data[2];
> +       ssize_t len;
> +
> +       len = read(fd, data, sizeof(data));

Perf is a datagram api, right? A short read gives what you asked for and
discards the rest of the packet, iirc.

> +       igt_assert_eq(len, sizeof(data));
> +
> +       return data[0];
> +}
> +
> +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> +{
> +       uint64_t buf[2 + num];
> +       unsigned int i;
> +       ssize_t len;
> +
> +       len = read(fd, buf, sizeof(buf));
> +       igt_assert_eq(len, sizeof(buf));
> +       for (i = 0; i < num; i++)
> +               val[i] = buf[2 + i];
> +}
> +
> +#define assert_within_epsilon(x, ref, tolerance) \
> +       igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
> +                    (double)(x) >= (1.0 - tolerance) * (double)ref, \
> +                    "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
> +                    #x, #ref, (double)x, tolerance * 100.0, (double)ref)
> +
> +static void
> +single(int gem_fd, const struct intel_execution_engine *e, bool busy)
> +{
> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +       double ref = busy ? batch_duration_ns : 0.0f;
> +       igt_spin_t *spin;
> +       uint64_t val;
> +       int fd;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       if (busy) {
> +               spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +               igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       } else {
> +               usleep(batch_duration_ns / 1000);
> +       }
> +
> +       fd = perf_i915_open(config);
> +       igt_assert(fd >= 0);
> +
> +       if (busy)
> +               gem_sync(gem_fd, spin->handle);
> +
> +       val = pmu_read_single(fd);
> +
> +       assert_within_epsilon(val, ref, tolerance);
> +
> +       if (busy)
> +               igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +busy_check_all(int gem_fd, const struct intel_execution_engine *e,

busy_check_others

busy_check_all I would expect to be checking that all engines are
correctly recorded as being busy at the same time. And there should also
be permutations of (busy, idle, wait) across the engines.

> +              const unsigned int num_engines)
> +{
> +       const struct intel_execution_engine *e_;
> +       uint64_t val[num_engines];
> +       int fd[2];
> +       igt_spin_t *spin;
> +       unsigned int busy_idx, i;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +       i = 0;
> +       fd[0] = -1;
> +       for_each_engine_class_instance(fd, e_) {
> +               if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
> +                       continue;
> +
> +               if (e == e_)
> +                       busy_idx = i;
> +
> +               fd[i == 0 ? 0 : 1] =
> +                       perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
> +                                                                 e_->instance),
> +                                            fd[0]);
> +               igt_assert(fd[0] > 0);
> +               igt_assert(i == 0 || fd[1] > 0);
> +               i++;
> +       }
> +
> +       gem_sync(gem_fd, spin->handle);
> +
> +       pmu_read_multi(fd[0], num_engines, val);
> +
> +       assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
> +       for (i = 0; i < num_engines; i++) {
> +               if (i == busy_idx)
> +                       continue;
> +               assert_within_epsilon(val[i], 0.0f, tolerance);
> +       }
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd[0]);
> +}

> +static void
> +no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)

This is just the sanity check half of the sema test.

No wait, no queued?

> +static void
> +multi_client(int gem_fd, const struct intel_execution_engine *e)
> +{
> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +       igt_spin_t *spin;
> +       uint64_t val[2];
> +       int fd[2];
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +       fd[0] = perf_i915_open(config);
> +       igt_assert(fd[0] >= 0);
> +
> +       usleep(batch_duration_ns / 4000);
> +
> +       fd[1] = perf_i915_open(config);
> +       igt_assert(fd[1] >= 0);
> +
> +       usleep(batch_duration_ns / 3000);
> +
> +       val[1] = pmu_read_single(fd[1]);
> +       close(fd[1]);
> +
> +       gem_sync(gem_fd, spin->handle);
> +
> +       val[0] = pmu_read_single(fd[0]);
> +
> +       assert_within_epsilon(val[0], batch_duration_ns, tolerance);
> +       assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd[0]);
> +}


> +static void cpu_hotplug(int gem_fd)
> +{
> +       struct timespec start, now;
> +       igt_spin_t *spin;
> +       uint64_t val, ref;
> +       int fd;
> +
> +       igt_require(cpu0_hotplug_support());
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
> +       igt_assert(fd >= 0);
> +
> +       clock_gettime(CLOCK_MONOTONIC, &start);
> +
> +       igt_fork(child, 1) {
> +               int cpu = 0;
> +
> +               for (;;) {
> +                       char name[128];
> +                       int cpufd;
> +
> +                       sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
> +                               cpu);
> +                       cpufd = open(name, O_WRONLY);
> +                       if (cpufd == -1) {
> +                               igt_assert(cpu > 0);
> +                               break;
> +                       }
> +                       igt_assert_eq(write(cpufd, "0", 2), 2);
> +
> +                       usleep(1000 * 1000);
> +
> +                       igt_assert_eq(write(cpufd, "1", 2), 2);
> +
> +                       close(cpufd);
> +                       cpu++;
> +               }
> +       }
> +
> +       igt_waitchildren();
> +
> +       igt_spin_batch_end(spin);
> +       gem_sync(gem_fd, spin->handle);
> +
> +       clock_gettime(CLOCK_MONOTONIC, &now);

Did we ever export the igt routines for probing supported clocks?
In this case, this fits into igt_nsec_elapsed.

> +       val = pmu_read_single(fd);
> +
> +       ref = elapsed_ns(&start, &now);
> +
> +       assert_within_epsilon(val, ref, tolerance);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +test_interrupts(int gem_fd)
> +{
> +       igt_spin_t *spin;
> +       uint64_t idle, busy, prev;
> +       int fd;
> +
> +       fd = perf_i915_open(I915_PMU_INTERRUPTS);
> +       igt_assert(fd >= 0);
> +
> +       gem_quiescent_gpu(gem_fd);
> +       sleep(2);
> +       prev = pmu_read_single(fd);
> +       usleep(batch_duration_ns / 1000);
> +       idle = pmu_read_single(fd);
> +
> +       igt_assert_eq(idle - prev, 0);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       gem_sync(gem_fd, spin->handle);

There's no guaranteed interrupt here.

> +
> +       busy = pmu_read_single(fd);
> +       igt_assert(busy > idle);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +test_frequency(int gem_fd)
> +{
> +       igt_spin_t *spin;
> +       uint64_t idle[2], busy[2];
> +       int fd;
> +
> +       fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
> +       igt_assert(fd >= 0);

Ask the kernel if it is supported.

> +       igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
> +
> +       gem_quiescent_gpu(gem_fd);
> +       usleep(batch_duration_ns / 1000);
> +
> +       pmu_read_multi(fd, 2, idle);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       gem_sync(gem_fd, spin->handle);
> +
> +       pmu_read_multi(fd, 2, busy);
> +
> +       igt_assert(busy[0] > idle[0]);
> +       igt_assert(busy[1] > idle[1]);

Nothing guarantees busy[1] changes, it is hw/fw dependent.
busy[0] depends on user config.

> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +

> +static void
> +test_rc6p(int gem_fd)
> +{
> +       const unsigned int devid = intel_get_drm_devid(gem_fd);
> +       int64_t duration_ns = 2 * 1000 * 1000 * 1000;
> +       unsigned int num_pmu = 1;
> +       igt_spin_t *spin;
> +       uint64_t idle[3], busy[3], prev[3];
> +       unsigned int i;
> +       int fd, ret;
> +
> +       igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));

Ask the kernel. (Applies equally to rc6, rc6p).

No rc6pp testing?

> +
> +       fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
> +       igt_assert(fd >= 0);
> +
> +       ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +       if (ret > 0) {
> +               num_pmu++;
> +               ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +               if (ret > 0)
> +                       num_pmu++;
> +       }
> +
> +       gem_quiescent_gpu(gem_fd);
> +       sleep(2);
> +
> +       pmu_read_multi(fd, num_pmu, prev);
> +       usleep(duration_ns / 1000);
> +       pmu_read_multi(fd, num_pmu, idle);
> +
> +       for (i = 0; i < num_pmu; i++)
> +               assert_within_epsilon(idle[i] - prev[i], duration_ns,
> +                                     tolerance);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, duration_ns);

Are we sure the GPU isn't allowed to sleep? i915_user_forcewake we
expect to keep the GPU out of rc6.

> +igt_main
> +{
> +       const unsigned int num_other_metrics =
> +                               I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
> +       unsigned int num_engines = 0;
> +       int fd = -1;
> +       const struct intel_execution_engine *e;
> +       unsigned int i;
> +
> +       igt_fixture {
> +               fd = drm_open_driver_master(DRIVER_INTEL);
> +
> +               igt_require_gem(fd);
> +               igt_require(i915_type_id() > 0);
> +
> +               for_each_engine_class_instance(fd, e) {
> +                       if (gem_has_ring(fd, e->exec_id | e->flags))
> +                               num_engines++;
> +               }
> +       }
> +
> +       /**
> +        * Test invalid access via perf API is rejected.
> +        */

ARGH. No comments on the intentions of the code?
-Chris


More information about the Intel-gfx mailing list