[Intel-gfx] [PATCH i-g-t 4/7] intel-gpu-overlay: Catch-up to new i915 PMU

Chris Wilson chris at chris-wilson.co.uk
Mon Sep 25 15:31:42 UTC 2017


Quoting Tvrtko Ursulin (2017-09-25 16:14:59)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  lib/igt_perf.h           | 93 ++++++++++++++++++++++++++++++++++--------------
>  overlay/gem-interrupts.c |  2 +-
>  overlay/gpu-freq.c       |  4 +--
>  overlay/gpu-top.c        | 68 +++++++++++++++++++----------------
>  overlay/power.c          |  4 +--
>  overlay/rc6.c            |  6 ++--
>  6 files changed, 111 insertions(+), 66 deletions(-)
> 
> diff --git a/lib/igt_perf.h b/lib/igt_perf.h
> index 8e674c3a3755..e29216f0500a 100644
> --- a/lib/igt_perf.h
> +++ b/lib/igt_perf.h
> @@ -1,3 +1,27 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
>  #ifndef I915_PERF_H
>  #define I915_PERF_H
>  
> @@ -5,41 +29,56 @@
>  
>  #include <linux/perf_event.h>
>  
> -#define I915_SAMPLE_BUSY       0
> -#define I915_SAMPLE_WAIT       1
> -#define I915_SAMPLE_SEMA       2
> +enum drm_i915_gem_engine_class {
> +       I915_ENGINE_CLASS_OTHER = 0,
> +       I915_ENGINE_CLASS_RENDER = 1,
> +       I915_ENGINE_CLASS_COPY = 2,
> +       I915_ENGINE_CLASS_VIDEO = 3,
> +       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> +       I915_ENGINE_CLASS_MAX /* non-ABI */
> +};
> +
> +enum drm_i915_pmu_engine_sample {
> +       I915_SAMPLE_QUEUED = 0,
> +       I915_SAMPLE_BUSY = 1,
> +       I915_SAMPLE_WAIT = 2,
> +       I915_SAMPLE_SEMA = 3,
> +       I915_ENGINE_SAMPLE_MAX /* non-ABI */
> +};
> +
> +#define I915_PMU_SAMPLE_BITS (4)
> +#define I915_PMU_SAMPLE_MASK (0xf)
> +#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
> +#define I915_PMU_CLASS_SHIFT \
> +       (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
>  
> -#define I915_SAMPLE_RCS                0
> -#define I915_SAMPLE_VCS                1
> -#define I915_SAMPLE_BCS                2
> -#define I915_SAMPLE_VECS       3
> +#define __I915_PMU_ENGINE(class, instance, sample) \
> +       ((class) << I915_PMU_CLASS_SHIFT | \
> +       (instance) << I915_PMU_SAMPLE_BITS | \
> +       (sample))
>  
> -#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
> +#define I915_PMU_ENGINE_QUEUED(class, instance) \
> +       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
>  
> -#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_BUSY(class, instance) \
> +       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
>  
> -#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_WAIT(class, instance) \
> +       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
>  
> -#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
> +#define I915_PMU_ENGINE_SEMA(class, instance) \
> +       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
>  
> -#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
> -#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
> -#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
> +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
>  
> -#define I915_PERF_ACTUAL_FREQUENCY 32
> -#define I915_PERF_REQUESTED_FREQUENCY 33
> -#define I915_PERF_ENERGY 34
> -#define I915_PERF_INTERRUPTS 35
> +#define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
> +#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
> +#define I915_PMU_INTERRUPTS            __I915_PMU_OTHER(2)
> +#define I915_PMU_RC6_RESIDENCY         __I915_PMU_OTHER(3)
> +#define I915_PMU_RC6p_RESIDENCY                __I915_PMU_OTHER(4)
> +#define I915_PMU_RC6pp_RESIDENCY       __I915_PMU_OTHER(5)
>  
> -#define I915_PERF_RC6_RESIDENCY                40
> -#define I915_PERF_RC6p_RESIDENCY       41
> -#define I915_PERF_RC6pp_RESIDENCY      42
> +#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY
>  
>  static inline int
>  perf_event_open(struct perf_event_attr *attr,
> diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
> index 3eda24f4d7eb..add4a9dfd725 100644
> --- a/overlay/gem-interrupts.c
> +++ b/overlay/gem-interrupts.c
> @@ -113,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
>  {
>         memset(irqs, 0, sizeof(*irqs));
>  
> -       irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
> +       irqs->fd = perf_i915_open(I915_PMU_INTERRUPTS);
>         if (irqs->fd < 0 && interrupts_read() < 0)
>                 irqs->error = ENODEV;
>  
> diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
> index 76c5ed9acfd1..c4619b87242a 100644
> --- a/overlay/gpu-freq.c
> +++ b/overlay/gpu-freq.c
> @@ -37,8 +37,8 @@ static int perf_open(void)
>  {
>         int fd;
>  
> -       fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
> -       if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
> +       fd = perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, -1);
> +       if (perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
>                 close(fd);
>                 fd = -1;
>         }
> diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
> index 812f47d5aced..61b8f62fd78c 100644
> --- a/overlay/gpu-top.c
> +++ b/overlay/gpu-top.c
> @@ -43,49 +43,57 @@
>  #define   RING_WAIT            (1<<11)
>  #define   RING_WAIT_SEMAPHORE  (1<<10)
>  
> -#define __I915_PERF_RING(n) (4*n)
> -#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
> -#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
> -#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
> -
>  static int perf_init(struct gpu_top *gt)
>  {
> -       const char *names[] = {
> -               "RCS",
> -               "BCS",
> -               "VCS0",
> -               "VCS1",
> -               NULL,
> +       struct engine_desc {
> +               unsigned class, inst;
> +               const char *name;
> +       } *d, engines[] = {
> +               { I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
> +               { I915_ENGINE_CLASS_COPY, 0, "bcs0" },
> +               { I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
> +               { I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
> +               { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },

Hmm, there is some hidden coupling with colours atm, but other than that
the order is flexible, iirc.

> +               { 0, 0, NULL }
>         };
> -       int n;
>  
> -       gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
> +       d = &engines[0];
> +
> +       gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
> +                                     -1);
>         if (gt->fd < 0)
>                 return -1;
>  
> -       if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
> +       if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
> +                                gt->fd) >= 0)
>                 gt->have_wait = 1;
>  
> -       if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
> +       if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
> +                                gt->fd) >= 0)
>                 gt->have_sema = 1;
>  
> -       gt->ring[0].name = names[0];
> +       gt->ring[0].name = d->name;
>         gt->num_rings = 1;
>  
> -       for (n = 1; names[n]; n++) {
> -               if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
> -                       if (gt->have_wait &&
> -                           perf_i915_open_group(I915_PERF_RING_WAIT(n),
> -                                                gt->fd) < 0)
> -                               return -1;
> -
> -                       if (gt->have_sema &&
> -                           perf_i915_open_group(I915_PERF_RING_SEMA(n),
> -                                                gt->fd) < 0)
> -                               return -1;
> -
> -                       gt->ring[gt->num_rings++].name = names[n];
> -               }
> +       for (d++; d->name; d++) {
> +               if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
> +                                                             d->inst),
> +                                       gt->fd) < 0)
> +                       continue;
> +
> +               if (gt->have_wait &&
> +                   perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
> +                                                             d->inst),
> +                                        gt->fd) < 0)
> +                       return -1;
> +
> +               if (gt->have_sema &&
> +                   perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
> +                                                             d->inst),
> +                                  gt->fd) < 0)
> +                       return -1;
> +
> +               gt->ring[gt->num_rings++].name = d->name;
>         }
>  
>         return 0;
> diff --git a/overlay/power.c b/overlay/power.c
> index dd4aec6bffd9..805f4ca7805c 100644
> --- a/overlay/power.c
> +++ b/overlay/power.c
> @@ -45,9 +45,7 @@ int power_init(struct power *power)
>  
>         memset(power, 0, sizeof(*power));
>  
> -       power->fd = perf_i915_open(I915_PERF_ENERGY);
> -       if (power->fd != -1)
> -               return 0;
> +       power->fd = -1;

Hmm, didn't you say that the rapl values were exposed via perf as well?


More information about the Intel-gfx mailing list