[Intel-gfx] [PATCH 24/24] RFC drm/i915: Expose a PMU interface for perf queries
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jun 15 11:17:09 UTC 2017
On 18/05/2017 10:46, Chris Wilson wrote:
> The first goal is to be able to measure GPU (and invidual ring) busyness
> without having to poll registers from userspace. (Which not only incurs
> holding the forcewake lock indefinitely, perturbing the system, but also
> runs the risk of hanging the machine.) As an alternative we can use the
> perf event counter interface to sample the ring registers periodically
> and send those results to userspace.
I finally got round trying this and it does not look too bad overhead wise.
Maybe it is polish and review time? :)
I am also thinking if we could make it more efficient on gen8+ by
employing the core of the engine busyness RFC I've recently sent. So in
other words avoid one timer and MMIO sampling when possible. It can be a
separate patch on top added by me if we get agreement on this.
> To be able to do so, we need to export the two symbols from
> kernel/events/core.c to register and unregister a PMU device.
>
> v2: Use a common timer for the ring sampling.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/i915_drv.c | 2 +
> drivers/gpu/drm/i915/i915_drv.h | 23 ++
> drivers/gpu/drm/i915/i915_pmu.c | 449 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +
> include/uapi/drm/i915_drm.h | 40 +++
> kernel/events/core.c | 1 +
> 7 files changed, 518 insertions(+)
> create mode 100644 drivers/gpu/drm/i915/i915_pmu.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 7b05fb802f4c..ca88e6e67910 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -26,6 +26,7 @@ i915-y := i915_drv.o \
>
> i915-$(CONFIG_COMPAT) += i915_ioc32.o
> i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
> +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>
> # GEM code
> i915-y += i915_cmd_parser.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 2d2fb4327f97..e3c6d052d1c9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1144,6 +1144,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
> struct drm_device *dev = &dev_priv->drm;
>
> i915_gem_shrinker_init(dev_priv);
> + i915_pmu_register(dev_priv);
>
> /*
> * Notify a valid surface after modesetting,
> @@ -1197,6 +1198,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
> intel_opregion_unregister(dev_priv);
>
> i915_perf_unregister(dev_priv);
> + i915_pmu_unregister(dev_priv);
>
> i915_teardown_sysfs(dev_priv);
> i915_guc_log_unregister(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1fa1e7d48f02..10beae1a13c8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -40,6 +40,7 @@
> #include <linux/hash.h>
> #include <linux/intel-iommu.h>
> #include <linux/kref.h>
> +#include <linux/perf_event.h>
> #include <linux/pm_qos.h>
> #include <linux/reservation.h>
> #include <linux/shmem_fs.h>
> @@ -2075,6 +2076,12 @@ struct intel_cdclk_state {
> unsigned int cdclk, vco, ref;
> };
>
> +enum {
> + __I915_SAMPLE_FREQ_ACT = 0,
> + __I915_SAMPLE_FREQ_REQ,
> + __I915_NUM_PMU_SAMPLERS
> +};
> +
> struct drm_i915_private {
> struct drm_device drm;
>
> @@ -2564,6 +2571,13 @@ struct drm_i915_private {
> int irq;
> } lpe_audio;
>
> + struct {
> + struct pmu base;
> + struct hrtimer timer;
> + u64 enable;
> + u64 sample[__I915_NUM_PMU_SAMPLERS];
> + } pmu;
> +
> /*
> * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
> * will be rejected. Instead look for a better place.
> @@ -3681,6 +3695,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv);
> extern void i915_perf_register(struct drm_i915_private *dev_priv);
> extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
>
> +/* i915_pmu.c */
> +#ifdef CONFIG_PERF_EVENTS
> +extern void i915_pmu_register(struct drm_i915_private *i915);
> +extern void i915_pmu_unregister(struct drm_i915_private *i915);
> +#else
> +static inline void i915_pmu_register(struct drm_i915_private *i915) {}
> +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
> +#endif
> +
> /* i915_suspend.c */
> extern int i915_save_state(struct drm_i915_private *dev_priv);
> extern int i915_restore_state(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> new file mode 100644
> index 000000000000..80e1c07841ac
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -0,0 +1,449 @@
> +#include <linux/perf_event.h>
> +#include <linux/pm_runtime.h>
> +
> +#include "i915_drv.h"
> +#include "intel_ringbuffer.h"
> +
> +#define FREQUENCY 200
> +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
> +
> +#define RING_MASK 0xffffffff
> +#define RING_MAX 32
> +
> +static void engines_sample(struct drm_i915_private *dev_priv)
> +{
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + bool fw = false;
> +
> + if ((dev_priv->pmu.enable & RING_MASK) == 0)
> + return;
> +
> + if (!dev_priv->gt.awake)
> + return;
> +
> + if (!intel_runtime_pm_get_if_in_use(dev_priv))
> + return;
> +
> + for_each_engine(engine, dev_priv, id) {
> + u32 val;
> +
> + if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
This would need some nicely named macros. PMU_ENGINE_MASK(id) ?
> + continue;
> +
Would help me at least:
/* Skip idle engines. */
> + if (i915_seqno_passed(intel_engine_get_seqno(engine),
> + intel_engine_last_submit(engine)))
> + continue;
> +
> + if (!fw) {
> + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> + fw = true;
> + }
> +
> + val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> + if (!(val & MODE_IDLE))
> + engine->pmu_sample[I915_SAMPLE_BUSY] += PERIOD;
> +
> + val = I915_READ_FW(RING_CTL(engine->mmio_base));
> + if (val & RING_WAIT)
> + engine->pmu_sample[I915_SAMPLE_WAIT] += PERIOD;
> + if (val & RING_WAIT_SEMAPHORE)
> + engine->pmu_sample[I915_SAMPLE_SEMA] += PERIOD;
> + }
> +
> + if (fw)
> + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> + intel_runtime_pm_put(dev_priv);
> +}
> +
> +static void frequency_sample(struct drm_i915_private *dev_priv)
> +{
> + if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_ACTUAL_FREQUENCY)) {
> + u64 val;
> +
> + val = dev_priv->rps.cur_freq;
> + if (dev_priv->gt.awake &&
> + intel_runtime_pm_get_if_in_use(dev_priv)) {
> + val = I915_READ_NOTRACE(GEN6_RPSTAT1);
> + if (INTEL_GEN(dev_priv) >= 9)
> + val = (val & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
> + else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
> + val = (val & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
> + else
> + val = (val & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
> + intel_runtime_pm_put(dev_priv);
> + }
> + val = intel_gpu_freq(dev_priv, val);
> + dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT] += val * PERIOD;
> + }
> +
> + if (dev_priv->pmu.enable & BIT_ULL(I915_PMU_REQUESTED_FREQUENCY)) {
> + u64 val = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq);
> + dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ] += val * PERIOD;
> + }
> +}
I think a very nice thing would be to gate the timer on gt.awake |
I915_PMU_REQUESTED_FREQUENCY from the callers. Actually not gate, but to
manage it via those triggers.
Maybe have something like event_needs_timer(event) at the timer enable
time and export i1915_pmu_gt_idle/active to be called from the relevant
parts elsewhere. Those functions would then manage the timer enablement
depending on the enabled events mask.
This way we would have no timer running during GT idle periods which
would be very nice. (Unless I915_PMU_REQUESTED_FREQUENCY is enabled.)
Then on top, as mentioned before, we could have event_needs_timer() give
different results for engine stats if we followed up with a gen8+ mode
driven by the request in/out hooks instead of having to run a timer and
do mmio.
> +
> +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
> +{
> + struct drm_i915_private *i915 =
> + container_of(hrtimer, struct drm_i915_private, pmu.timer);
> +
> + if (i915->pmu.enable == 0)
> + return HRTIMER_NORESTART;
> +
> + engines_sample(i915);
> + frequency_sample(i915);
> +
> + hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
> + return HRTIMER_RESTART;
> +}
> +
> +static void i915_pmu_event_destroy(struct perf_event *event)
> +{
> + WARN_ON(event->parent);
> +}
> +
> +static int engine_event_init(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + int engine = event->attr.config >> 2;
> + int sample = event->attr.config & 3;
Some macros for these two.
> +
> + switch (sample) {
> + case I915_SAMPLE_BUSY:
> + case I915_SAMPLE_WAIT:
> + break;
> + case I915_SAMPLE_SEMA:
> + if (INTEL_GEN(i915) < 6)
> + return -ENODEV;
> + break;
> + default:
> + return -ENOENT;
> + }
> +
> + if (engine >= I915_NUM_ENGINES)
> + return -ENOENT;
Why ENOENT and not ENODEV?
> +
> + if (!i915->engine[engine])
> + return -ENODEV;
> +
> + return 0;
> +}
> +
> +static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
> +{
> + struct perf_sample_data data;
> + struct perf_event *event;
> + u64 period;
> +
> + event = container_of(hrtimer, struct perf_event, hw.hrtimer);
> + if (event->state != PERF_EVENT_STATE_ACTIVE)
> + return HRTIMER_NORESTART;
> +
> + event->pmu->read(event);
> +
> + perf_sample_data_init(&data, 0, event->hw.last_period);
> + perf_event_overflow(event, &data, NULL);
> +
> + period = max_t(u64, 10000, event->hw.sample_period);
> + hrtimer_forward_now(hrtimer, ns_to_ktime(period));
> + return HRTIMER_RESTART;
> +}
> +
> +static void init_hrtimer(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + hwc->hrtimer.function = hrtimer_sample;
> +
> + if (event->attr.freq) {
> + long freq = event->attr.sample_freq;
> +
> + event->attr.sample_period = NSEC_PER_SEC / freq;
> + hwc->sample_period = event->attr.sample_period;
> + local64_set(&hwc->period_left, hwc->sample_period);
> + hwc->last_period = hwc->sample_period;
> + event->attr.freq = 0;
> + }
> +}
> +
> +static int i915_pmu_event_init(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + int ret;
> +
> + /* XXX ideally only want pid == -1 && cpu == -1 */
What is this about?
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + if (has_branch_stack(event))
> + return -EOPNOTSUPP;
> +
> + ret = 0;
> + if (event->attr.config < RING_MAX) {
> + ret = engine_event_init(event);
> + } else switch (event->attr.config) {
> + case I915_PMU_ACTUAL_FREQUENCY:
> + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
> + ret = -ENODEV; /* requires a mutex for sampling! */
> + case I915_PMU_REQUESTED_FREQUENCY:
> + case I915_PMU_ENERGY:
> + case I915_PMU_RC6_RESIDENCY:
> + case I915_PMU_RC6p_RESIDENCY:
> + case I915_PMU_RC6pp_RESIDENCY:
> + if (INTEL_GEN(i915) < 6)
> + ret = -ENODEV;
> + break;
> + }
> + if (ret)
> + return ret;
> +
> + if (!event->parent)
> + event->destroy = i915_pmu_event_destroy;
> +
> + init_hrtimer(event);
> +
> + return 0;
> +}
> +
> +static void i915_pmu_timer_start(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + s64 period;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + period = local64_read(&hwc->period_left);
> + if (period) {
> + if (period < 0)
> + period = 10000;
> +
> + local64_set(&hwc->period_left, 0);
> + } else {
> + period = max_t(u64, 10000, hwc->sample_period);
> + }
> +
> + hrtimer_start_range_ns(&hwc->hrtimer,
> + ns_to_ktime(period), 0,
> + HRTIMER_MODE_REL_PINNED);
> +}
> +
> +static void i915_pmu_timer_cancel(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (!is_sampling_event(event))
> + return;
> +
> + local64_set(&hwc->period_left,
> + ktime_to_ns(hrtimer_get_remaining(&hwc->hrtimer)));
> + hrtimer_cancel(&hwc->hrtimer);
> +}
> +
> +static void i915_pmu_enable(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> +
> + if (i915->pmu.enable == 0)
> + hrtimer_start_range_ns(&i915->pmu.timer,
> + ns_to_ktime(PERIOD), 0,
> + HRTIMER_MODE_REL_PINNED);
> +
> + i915->pmu.enable |= BIT_ULL(event->attr.config);
> +
> + i915_pmu_timer_start(event);
> +}
> +
> +static void i915_pmu_disable(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> +
> + i915->pmu.enable &= ~BIT_ULL(event->attr.config);
> + i915_pmu_timer_cancel(event);
> +}
> +
> +static int i915_pmu_event_add(struct perf_event *event, int flags)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (flags & PERF_EF_START)
> + i915_pmu_enable(event);
> +
> + hwc->state = !(flags & PERF_EF_START);
> +
> + return 0;
> +}
> +
> +static void i915_pmu_event_del(struct perf_event *event, int flags)
> +{
> + i915_pmu_disable(event);
> +}
> +
> +static void i915_pmu_event_start(struct perf_event *event, int flags)
> +{
> + i915_pmu_enable(event);
> +}
> +
> +static void i915_pmu_event_stop(struct perf_event *event, int flags)
> +{
> + i915_pmu_disable(event);
> +}
> +
> +static u64 read_energy_uJ(struct drm_i915_private *dev_priv)
> +{
> + u64 power;
> +
> + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> +
> + intel_runtime_pm_get(dev_pr > +
> + rdmsrl(MSR_RAPL_POWER_UNIT, power);
> + power = (power & 0x1f00) >> 8;
> + power = 1000000 >> power; /* convert to uJ */
> + power *= I915_READ_NOTRACE(MCH_SECP_NRG_STTS);
> +
> + intel_runtime_pm_put(dev_priv);
> +
> + return power;
> +}
> +
> +static inline u64 calc_residency(struct drm_i915_private *dev_priv,
> + const i915_reg_t reg)
> +{
> + u64 val, units = 128, div = 100000;
> +
> + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> +
> + intel_runtime_pm_get(dev_priv);
> + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> + div = dev_priv->czclk_freq;
> + units = 1;
> + if (I915_READ_NOTRACE(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> + units <<= 8;
> + } else if (IS_GEN9_LP(dev_priv)) {
> + div = 1200;
> + units = 1;
> + }
> + val = I915_READ_NOTRACE(reg);
> + intel_runtime_pm_put(dev_priv);
> +
> + val *= units;
> + return DIV_ROUND_UP_ULL(val, div);
> +}
> +
> +static u64 count_interrupts(struct drm_i915_private *i915)
> +{
> + /* open-coded kstat_irqs() */
> + struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
> + u64 sum = 0;
> + int cpu;
> +
> + if (!desc || !desc->kstat_irqs)
> + return 0;
> +
> + for_each_possible_cpu(cpu)
> + sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
> +
> + return sum;
> +}
> +
> +static void i915_pmu_event_read(struct perf_event *event)
> +{
> + struct drm_i915_private *i915 =
> + container_of(event->pmu, typeof(*i915), pmu.base);
> + u64 val = 0;
> +
> + if (event->attr.config < 32) {
32 could use a name as well.
> + int engine = event->attr.config >> 2;
> + int sample = event->attr.config & 3;
> + val = i915->engine[engine]->pmu_sample[sample];
> + } else switch (event->attr.config) {
> + case I915_PMU_ACTUAL_FREQUENCY:
> + val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
> + break;
> + case I915_PMU_REQUESTED_FREQUENCY:
> + val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
> + break;
> + case I915_PMU_ENERGY:
> + val = read_energy_uJ(i915);
> + break;
> + case I915_PMU_INTERRUPTS:
> + val = count_interrupts(i915);
> + break;
> +
> + case I915_PMU_RC6_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
Don't need to set val to something in this case? Like 100%, or what is
the equivalent. Maybe not, I need to study how the PMU API works..
> +
> + val = calc_residency(i915, IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6);
> + break;
> +
> + case I915_PMU_RC6p_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
> +
> + if (!IS_VALLEYVIEW(i915))
> + val = calc_residency(i915, GEN6_GT_GFX_RC6p);
> + break;
> +
> + case I915_PMU_RC6pp_RESIDENCY:
> + if (!i915->gt.awake)
> + return;
> +
> + if (!IS_VALLEYVIEW(i915))
> + val = calc_residency(i915, GEN6_GT_GFX_RC6pp);
> + break;
> + }
> +
> + local64_set(&event->count, val);
> +}
> +
> +static int i915_pmu_event_event_idx(struct perf_event *event)
> +{
> + return 0;
> +}
> +
> +void i915_pmu_register(struct drm_i915_private *i915)
> +{
> + if (INTEL_GEN(i915) <= 2)
> + return;
> +
> + i915->pmu.base.task_ctx_nr = perf_sw_context;
> + i915->pmu.base.event_init = i915_pmu_event_init;
> + i915->pmu.base.add = i915_pmu_event_add;
> + i915->pmu.base.del = i915_pmu_event_del;
> + i915->pmu.base.start = i915_pmu_event_start;
> + i915->pmu.base.stop = i915_pmu_event_stop;
> + i915->pmu.base.read = i915_pmu_event_read;
> + i915->pmu.base.event_idx = i915_pmu_event_event_idx;
> +
> + hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + i915->pmu.timer.function = i915_sample;
> + i915->pmu.enable = 0;
> +
> + if (perf_pmu_register(&i915->pmu.base, "i915", -1))
> + i915->pmu.base.event_init = NULL;
> +}
> +
> +void i915_pmu_unregister(struct drm_i915_private *i915)
> +{
> + if (!i915->pmu.base.event_init)
> + return;
> +
> + i915->pmu.enable = 0;
> +
> + perf_pmu_unregister(&i915->pmu.base);
> + i915->pmu.base.event_init = NULL;
> +
> + hrtimer_cancel(&i915->pmu.timer);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6aa20ac8cde3..084fa7816256 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -244,6 +244,8 @@ struct intel_engine_cs {
> I915_SELFTEST_DECLARE(bool mock : 1);
> } breadcrumbs;
>
> + u64 pmu_sample[3];
Define for 3 needed.
> +
> /*
> * A pool of objects to use as shadow copies of client batch buffers
> * when the command parser is enabled. Prevents the client from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 34ee011f08ac..e9375ff29371 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -86,6 +86,46 @@ enum i915_mocs_table_index {
> I915_MOCS_CACHED,
> };
>
> +/**
> + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
> + *
> + */
> +#define I915_SAMPLE_BUSY 0
> +#define I915_SAMPLE_WAIT 1
> +#define I915_SAMPLE_SEMA 2
> +
> +#define I915_SAMPLE_RCS 0
> +#define I915_SAMPLE_VCS 1
> +#define I915_SAMPLE_BCS 2
> +#define I915_SAMPLE_VECS 3
> +
> +#define __I915_PMU_COUNT(ring, id) ((ring) << 4 | (id))
> +
> +#define I915_PMU_COUNT_RCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_RCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_RCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_COUNT_VCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_VCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_VCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
VCS2 is missing.
> +
> +#define I915_PMU_COUNT_BCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_BCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_BCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_COUNT_VECS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
> +#define I915_PMU_COUNT_VECS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
> +#define I915_PMU_COUNT_VECS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
> +
> +#define I915_PMU_ACTUAL_FREQUENCY 32
> +#define I915_PMU_REQUESTED_FREQUENCY 33
> +#define I915_PMU_ENERGY 34
> +#define I915_PMU_INTERRUPTS 35
Do these numbers become ABI btw?
> +
> +#define I915_PMU_RC6_RESIDENCY 40
> +#define I915_PMU_RC6p_RESIDENCY 41
> +#define I915_PMU_RC6pp_RESIDENCY 42
> +
> /* Each region is a minimum of 16k, and there are at most 255 of them.
> */
> #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f811dd20bbc1..6351ed8a2e56 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7365,6 +7365,7 @@ int perf_event_overflow(struct perf_event *event,
> {
> return __perf_event_overflow(event, 1, data, regs);
> }
> +EXPORT_SYMBOL_GPL(perf_event_overflow);
>
> /*
> * Generic software event infrastructure
>
First pass only for now. No insight into perf/PMU interaction and
haven't checked the residency&co calculations.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list